aboutsummaryrefslogtreecommitdiffstats
path: root/charms/trusty/cassandra/hooks/charmhelpers/coordinator.py
diff options
context:
space:
mode:
Diffstat (limited to 'charms/trusty/cassandra/hooks/charmhelpers/coordinator.py')
-rw-r--r--charms/trusty/cassandra/hooks/charmhelpers/coordinator.py607
1 files changed, 607 insertions, 0 deletions
diff --git a/charms/trusty/cassandra/hooks/charmhelpers/coordinator.py b/charms/trusty/cassandra/hooks/charmhelpers/coordinator.py
new file mode 100644
index 0000000..0303c3f
--- /dev/null
+++ b/charms/trusty/cassandra/hooks/charmhelpers/coordinator.py
@@ -0,0 +1,607 @@
+# Copyright 2014-2015 Canonical Limited.
+#
+# This file is part of charm-helpers.
+#
+# charm-helpers is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License version 3 as
+# published by the Free Software Foundation.
+#
+# charm-helpers is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with charm-helpers. If not, see <http://www.gnu.org/licenses/>.
+'''
+The coordinator module allows you to use Juju's leadership feature to
+coordinate operations between units of a service.
+
+Behavior is defined in subclasses of coordinator.BaseCoordinator.
+One implementation is provided (coordinator.Serial), which allows an
+operation to be run on a single unit at a time, on a first come, first
+served basis. You can trivially define more complex behavior by
+subclassing BaseCoordinator or Serial.
+
+:author: Stuart Bishop <stuart.bishop@canonical.com>
+
+
+Services Framework Usage
+========================
+
+Ensure a peers relation is defined in metadata.yaml. Instantiate a
+BaseCoordinator subclass before invoking ServiceManager.manage().
+Ensure that ServiceManager.manage() is wired up to the leader-elected,
+leader-settings-changed, peers relation-changed and peers
+relation-departed hooks in addition to any other hooks you need, or your
+service will deadlock.
+
+Ensure calls to acquire() are guarded, so that locks are only requested
+when they are really needed (and thus hooks only triggered when necessary).
+Failing to do this and calling acquire() unconditionally will put your unit
+into a hook loop. Calls to granted() do not need to be guarded.
+
+For example::
+
+ from charmhelpers.core import hookenv, services
+ from charmhelpers import coordinator
+
+ def maybe_restart(servicename):
+ serial = coordinator.Serial()
+ if needs_restart():
+ serial.acquire('restart')
+ if serial.granted('restart'):
+ hookenv.service_restart(servicename)
+
+ services = [dict(service='servicename',
+ data_ready=[maybe_restart])]
+
+ if __name__ == '__main__':
+ _ = coordinator.Serial() # Must instantiate before manager.manage()
+ manager = services.ServiceManager(services)
+ manager.manage()
+
+
+You can implement a similar pattern using a decorator. If the lock has
+not been granted, an attempt to acquire() it will be made if the guard
+function returns True. If the lock has been granted, the decorated function
+is run as normal::
+
+ from charmhelpers.core import hookenv, services
+ from charmhelpers import coordinator
+
+ serial = coordinator.Serial() # Global, instatiated on module import.
+
+ def needs_restart():
+ [ ... Introspect state. Return True if restart is needed ... ]
+
+ @serial.require('restart', needs_restart)
+ def maybe_restart(servicename):
+ hookenv.service_restart(servicename)
+
+ services = [dict(service='servicename',
+ data_ready=[maybe_restart])]
+
+ if __name__ == '__main__':
+ manager = services.ServiceManager(services)
+ manager.manage()
+
+
+Traditional Usage
+=================
+
+Ensure a peers relation is defined in metadata.yaml.
+
+If you are using charmhelpers.core.hookenv.Hooks, ensure that a
+BaseCoordinator subclass is instantiated before calling Hooks.execute.
+
+If you are not using charmhelpers.core.hookenv.Hooks, ensure
+that a BaseCoordinator subclass is instantiated and its handle()
+method called at the start of all your hooks.
+
+For example::
+
+ import sys
+ from charmhelpers.core import hookenv
+ from charmhelpers import coordinator
+
+ hooks = hookenv.Hooks()
+
+ def maybe_restart():
+ serial = coordinator.Serial()
+ if serial.granted('restart'):
+ hookenv.service_restart('myservice')
+
+ @hooks.hook
+ def config_changed():
+ update_config()
+ serial = coordinator.Serial()
+ if needs_restart():
+ serial.acquire('restart'):
+ maybe_restart()
+
+ # Cluster hooks must be wired up.
+ @hooks.hook('cluster-relation-changed', 'cluster-relation-departed')
+ def cluster_relation_changed():
+ maybe_restart()
+
+ # Leader hooks must be wired up.
+ @hooks.hook('leader-elected', 'leader-settings-changed')
+ def leader_settings_changed():
+ maybe_restart()
+
+ [ ... repeat for *all* other hooks you are using ... ]
+
+ if __name__ == '__main__':
+ _ = coordinator.Serial() # Must instantiate before execute()
+ hooks.execute(sys.argv)
+
+
+You can also use the require decorator. If the lock has not been granted,
+an attempt to acquire() it will be made if the guard function returns True.
+If the lock has been granted, the decorated function is run as normal::
+
+ from charmhelpers.core import hookenv
+
+ hooks = hookenv.Hooks()
+ serial = coordinator.Serial() # Must instantiate before execute()
+
+ @require('restart', needs_restart)
+ def maybe_restart():
+ hookenv.service_restart('myservice')
+
+ @hooks.hook('install', 'config-changed', 'upgrade-charm',
+ # Peers and leader hooks must be wired up.
+ 'cluster-relation-changed', 'cluster-relation-departed',
+ 'leader-elected', 'leader-settings-changed')
+ def default_hook():
+ [...]
+ maybe_restart()
+
+ if __name__ == '__main__':
+ hooks.execute()
+
+
+Details
+=======
+
+A simple API is provided similar to traditional locking APIs. A lock
+may be requested using the acquire() method, and the granted() method
+may be used do to check if a lock previously requested by acquire() has
+been granted. It doesn't matter how many times acquire() is called in a
+hook.
+
+Locks are released at the end of the hook they are acquired in. This may
+be the current hook if the unit is leader and the lock is free. It is
+more likely a future hook (probably leader-settings-changed, possibly
+the peers relation-changed or departed hook, potentially any hook).
+
+Whenever a charm needs to perform a coordinated action it will acquire()
+the lock and perform the action immediately if acquisition is
+successful. It will also need to perform the same action in every other
+hook if the lock has been granted.
+
+
+Grubby Details
+--------------
+
+Why do you need to be able to perform the same action in every hook?
+If the unit is the leader, then it may be able to grant its own lock
+and perform the action immediately in the source hook. If the unit is
+the leader and cannot immediately grant the lock, then its only
+guaranteed chance of acquiring the lock is in the peers relation-joined,
+relation-changed or peers relation-departed hooks when another unit has
+released it (the only channel to communicate to the leader is the peers
+relation). If the unit is not the leader, then it is unlikely the lock
+is granted in the source hook (a previous hook must have also made the
+request for this to happen). A non-leader is notified about the lock via
+leader settings. These changes may be visible in any hook, even before
+the leader-settings-changed hook has been invoked. Or the requesting
+unit may be promoted to leader after making a request, in which case the
+lock may be granted in leader-elected or in a future peers
+relation-changed or relation-departed hook.
+
+This could be simpler if leader-settings-changed was invoked on the
+leader. We could then never grant locks except in
+leader-settings-changed hooks giving one place for the operation to be
+performed. Unfortunately this is not the case with Juju 1.23 leadership.
+
+But of course, this doesn't really matter to most people as most people
+seem to prefer the Services Framework or similar reset-the-world
+approaches, rather than the twisty maze of attempting to deduce what
+should be done based on what hook happens to be running (which always
+seems to evolve into reset-the-world anyway when the charm grows beyond
+the trivial).
+
+I chose not to implement a callback model, where a callback was passed
+to acquire to be executed when the lock is granted, because the callback
+may become invalid between making the request and the lock being granted
+due to an upgrade-charm being run in the interim. And it would create
+restrictions, such no lambdas, callback defined at the top level of a
+module, etc. Still, we could implement it on top of what is here, eg.
+by adding a defer decorator that stores a pickle of itself to disk and
+have BaseCoordinator unpickle and execute them when the locks are granted.
+'''
+from datetime import datetime
+from functools import wraps
+import json
+import os.path
+
+from six import with_metaclass
+
+from charmhelpers.core import hookenv
+
+
+# We make BaseCoordinator and subclasses singletons, so that if we
+# need to spill to local storage then only a single instance does so,
+# rather than having multiple instances stomp over each other.
+class Singleton(type):
+ _instances = {}
+
+ def __call__(cls, *args, **kwargs):
+ if cls not in cls._instances:
+ cls._instances[cls] = super(Singleton, cls).__call__(*args,
+ **kwargs)
+ return cls._instances[cls]
+
+
+class BaseCoordinator(with_metaclass(Singleton, object)):
+ relid = None # Peer relation-id, set by __init__
+ relname = None
+
+ grants = None # self.grants[unit][lock] == timestamp
+ requests = None # self.requests[unit][lock] == timestamp
+
+ def __init__(self, relation_key='coordinator', peer_relation_name=None):
+ '''Instatiate a Coordinator.
+
+ Data is stored on the peers relation and in leadership storage
+ under the provided relation_key.
+
+ The peers relation is identified by peer_relation_name, and defaults
+ to the first one found in metadata.yaml.
+ '''
+ # Most initialization is deferred, since invoking hook tools from
+ # the constructor makes testing hard.
+ self.key = relation_key
+ self.relname = peer_relation_name
+ hookenv.atstart(self.initialize)
+
+ # Ensure that handle() is called, without placing that burden on
+ # the charm author. They still need to do this manually if they
+ # are not using a hook framework.
+ hookenv.atstart(self.handle)
+
+ def initialize(self):
+ if self.requests is not None:
+ return # Already initialized.
+
+ assert hookenv.has_juju_version('1.23'), 'Needs Juju 1.23+'
+
+ if self.relname is None:
+ self.relname = _implicit_peer_relation_name()
+
+ relids = hookenv.relation_ids(self.relname)
+ if relids:
+ self.relid = sorted(relids)[0]
+
+ # Load our state, from leadership, the peer relationship, and maybe
+ # local state as a fallback. Populates self.requests and self.grants.
+ self._load_state()
+ self._emit_state()
+
+ # Save our state if the hook completes successfully.
+ hookenv.atexit(self._save_state)
+
+ # Schedule release of granted locks for the end of the hook.
+ # This needs to be the last of our atexit callbacks to ensure
+ # it will be run first when the hook is complete, because there
+ # is no point mutating our state after it has been saved.
+ hookenv.atexit(self._release_granted)
+
+ def acquire(self, lock):
+ '''Acquire the named lock, non-blocking.
+
+ The lock may be granted immediately, or in a future hook.
+
+ Returns True if the lock has been granted. The lock will be
+ automatically released at the end of the hook in which it is
+ granted.
+
+ Do not mindlessly call this method, as it triggers a cascade of
+ hooks. For example, if you call acquire() every time in your
+ peers relation-changed hook you will end up with an infinite loop
+ of hooks. It should almost always be guarded by some condition.
+ '''
+ unit = hookenv.local_unit()
+ ts = self.requests[unit].get(lock)
+ if not ts:
+ # If there is no outstanding request on the peers relation,
+ # create one.
+ self.requests.setdefault(lock, {})
+ self.requests[unit][lock] = _timestamp()
+ self.msg('Requested {}'.format(lock))
+
+ # If the leader has granted the lock, yay.
+ if self.granted(lock):
+ self.msg('Acquired {}'.format(lock))
+ return True
+
+ # If the unit making the request also happens to be the
+ # leader, it must handle the request now. Even though the
+ # request has been stored on the peers relation, the peers
+ # relation-changed hook will not be triggered.
+ if hookenv.is_leader():
+ return self.grant(lock, unit)
+
+ return False # Can't acquire lock, yet. Maybe next hook.
+
+ def granted(self, lock):
+ '''Return True if a previously requested lock has been granted'''
+ unit = hookenv.local_unit()
+ ts = self.requests[unit].get(lock)
+ if ts and self.grants.get(unit, {}).get(lock) == ts:
+ return True
+ return False
+
+ def requested(self, lock):
+ '''Return True if we are in the queue for the lock'''
+ return lock in self.requests[hookenv.local_unit()]
+
+ def request_timestamp(self, lock):
+ '''Return the timestamp of our outstanding request for lock, or None.
+
+ Returns a datetime.datetime() UTC timestamp, with no tzinfo attribute.
+ '''
+ ts = self.requests[hookenv.local_unit()].get(lock, None)
+ if ts is not None:
+ return datetime.strptime(ts, _timestamp_format)
+
+ def handle(self):
+ if not hookenv.is_leader():
+ return # Only the leader can grant requests.
+
+ self.msg('Leader handling coordinator requests')
+
+ # Clear our grants that have been released.
+ for unit in self.grants.keys():
+ for lock, grant_ts in list(self.grants[unit].items()):
+ req_ts = self.requests.get(unit, {}).get(lock)
+ if req_ts != grant_ts:
+ # The request timestamp does not match the granted
+ # timestamp. Several hooks on 'unit' may have run
+ # before the leader got a chance to make a decision,
+ # and 'unit' may have released its lock and attempted
+ # to reacquire it. This will change the timestamp,
+ # and we correctly revoke the old grant putting it
+ # to the end of the queue.
+ ts = datetime.strptime(self.grants[unit][lock],
+ _timestamp_format)
+ del self.grants[unit][lock]
+ self.released(unit, lock, ts)
+
+ # Grant locks
+ for unit in self.requests.keys():
+ for lock in self.requests[unit]:
+ self.grant(lock, unit)
+
+ def grant(self, lock, unit):
+ '''Maybe grant the lock to a unit.
+
+ The decision to grant the lock or not is made for $lock
+ by a corresponding method grant_$lock, which you may define
+ in a subclass. If no such method is defined, the default_grant
+ method is used. See Serial.default_grant() for details.
+ '''
+ if not hookenv.is_leader():
+ return False # Not the leader, so we cannot grant.
+
+ # Set of units already granted the lock.
+ granted = set()
+ for u in self.grants:
+ if lock in self.grants[u]:
+ granted.add(u)
+ if unit in granted:
+ return True # Already granted.
+
+ # Ordered list of units waiting for the lock.
+ reqs = set()
+ for u in self.requests:
+ if u in granted:
+ continue # In the granted set. Not wanted in the req list.
+ for l, ts in self.requests[u].items():
+ if l == lock:
+ reqs.add((ts, u))
+ queue = [t[1] for t in sorted(reqs)]
+ if unit not in queue:
+ return False # Unit has not requested the lock.
+
+ # Locate custom logic, or fallback to the default.
+ grant_func = getattr(self, 'grant_{}'.format(lock), self.default_grant)
+
+ if grant_func(lock, unit, granted, queue):
+ # Grant the lock.
+ self.msg('Leader grants {} to {}'.format(lock, unit))
+ self.grants.setdefault(unit, {})[lock] = self.requests[unit][lock]
+ return True
+
+ return False
+
+ def released(self, unit, lock, timestamp):
+ '''Called on the leader when it has released a lock.
+
+ By default, does nothing but log messages. Override if you
+ need to perform additional housekeeping when a lock is released,
+ for example recording timestamps.
+ '''
+ interval = _utcnow() - timestamp
+ self.msg('Leader released {} from {}, held {}'.format(lock, unit,
+ interval))
+
+ def require(self, lock, guard_func, *guard_args, **guard_kw):
+ """Decorate a function to be run only when a lock is acquired.
+
+ The lock is requested if the guard function returns True.
+
+ The decorated function is called if the lock has been granted.
+ """
+ def decorator(f):
+ @wraps(f)
+ def wrapper(*args, **kw):
+ if self.granted(lock):
+ self.msg('Granted {}'.format(lock))
+ return f(*args, **kw)
+ if guard_func(*guard_args, **guard_kw) and self.acquire(lock):
+ return f(*args, **kw)
+ return None
+ return wrapper
+ return decorator
+
+ def msg(self, msg):
+ '''Emit a message. Override to customize log spam.'''
+ hookenv.log('coordinator.{} {}'.format(self._name(), msg),
+ level=hookenv.INFO)
+
+ def _name(self):
+ return self.__class__.__name__
+
+ def _load_state(self):
+ self.msg('Loading state'.format(self._name()))
+
+ # All responses must be stored in the leadership settings.
+ # The leader cannot use local state, as a different unit may
+ # be leader next time. Which is fine, as the leadership
+ # settings are always available.
+ self.grants = json.loads(hookenv.leader_get(self.key) or '{}')
+
+ local_unit = hookenv.local_unit()
+
+ # All requests must be stored on the peers relation. This is
+ # the only channel units have to communicate with the leader.
+ # Even the leader needs to store its requests here, as a
+ # different unit may be leader by the time the request can be
+ # granted.
+ if self.relid is None:
+ # The peers relation is not available. Maybe we are early in
+ # the units's lifecycle. Maybe this unit is standalone.
+ # Fallback to using local state.
+ self.msg('No peer relation. Loading local state')
+ self.requests = {local_unit: self._load_local_state()}
+ else:
+ self.requests = self._load_peer_state()
+ if local_unit not in self.requests:
+ # The peers relation has just been joined. Update any state
+ # loaded from our peers with our local state.
+ self.msg('New peer relation. Merging local state')
+ self.requests[local_unit] = self._load_local_state()
+
+ def _emit_state(self):
+ # Emit this units lock status.
+ for lock in sorted(self.requests[hookenv.local_unit()].keys()):
+ if self.granted(lock):
+ self.msg('Granted {}'.format(lock))
+ else:
+ self.msg('Waiting on {}'.format(lock))
+
+ def _save_state(self):
+ self.msg('Publishing state'.format(self._name()))
+ if hookenv.is_leader():
+ # sort_keys to ensure stability.
+ raw = json.dumps(self.grants, sort_keys=True)
+ hookenv.leader_set({self.key: raw})
+
+ local_unit = hookenv.local_unit()
+
+ if self.relid is None:
+ # No peers relation yet. Fallback to local state.
+ self.msg('No peer relation. Saving local state')
+ self._save_local_state(self.requests[local_unit])
+ else:
+ # sort_keys to ensure stability.
+ raw = json.dumps(self.requests[local_unit], sort_keys=True)
+ hookenv.relation_set(self.relid, relation_settings={self.key: raw})
+
+ def _load_peer_state(self):
+ requests = {}
+ units = set(hookenv.related_units(self.relid))
+ units.add(hookenv.local_unit())
+ for unit in units:
+ raw = hookenv.relation_get(self.key, unit, self.relid)
+ if raw:
+ requests[unit] = json.loads(raw)
+ return requests
+
+ def _local_state_filename(self):
+ # Include the class name. We allow multiple BaseCoordinator
+ # subclasses to be instantiated, and they are singletons, so
+ # this avoids conflicts (unless someone creates and uses two
+ # BaseCoordinator subclasses with the same class name, so don't
+ # do that).
+ return '.charmhelpers.coordinator.{}'.format(self._name())
+
+ def _load_local_state(self):
+ fn = self._local_state_filename()
+ if os.path.exists(fn):
+ with open(fn, 'r') as f:
+ return json.load(f)
+ return {}
+
+ def _save_local_state(self, state):
+ fn = self._local_state_filename()
+ with open(fn, 'w') as f:
+ json.dump(state, f)
+
+ def _release_granted(self):
+ # At the end of every hook, release all locks granted to
+ # this unit. If a hook neglects to make use of what it
+ # requested, it will just have to make the request again.
+ # Implicit release is the only way this will work, as
+ # if the unit is standalone there may be no future triggers
+ # called to do a manual release.
+ unit = hookenv.local_unit()
+ for lock in list(self.requests[unit].keys()):
+ if self.granted(lock):
+ self.msg('Released local {} lock'.format(lock))
+ del self.requests[unit][lock]
+
+
+class Serial(BaseCoordinator):
+ def default_grant(self, lock, unit, granted, queue):
+ '''Default logic to grant a lock to a unit. Unless overridden,
+ only one unit may hold the lock and it will be granted to the
+ earliest queued request.
+
+ To define custom logic for $lock, create a subclass and
+ define a grant_$lock method.
+
+ `unit` is the unit name making the request.
+
+ `granted` is the set of units already granted the lock. It will
+ never include `unit`. It may be empty.
+
+ `queue` is the list of units waiting for the lock, ordered by time
+ of request. It will always include `unit`, but `unit` is not
+ necessarily first.
+
+ Returns True if the lock should be granted to `unit`.
+ '''
+ return unit == queue[0] and not granted
+
+
+def _implicit_peer_relation_name():
+ md = hookenv.metadata()
+ assert 'peers' in md, 'No peer relations in metadata.yaml'
+ return sorted(md['peers'].keys())[0]
+
+
+# A human readable, sortable UTC timestamp format.
+_timestamp_format = '%Y-%m-%d %H:%M:%S.%fZ'
+
+
+def _utcnow(): # pragma: no cover
+ # This wrapper exists as mocking datetime methods is problematic.
+ return datetime.utcnow()
+
+
+def _timestamp():
+ return _utcnow().strftime(_timestamp_format)