Add the rt linux 4.1.3-rt3 as base

Import the rt linux 4.1.3-rt3 as OPNFV kvm base. It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and the base is: commit 0917f823c59692d751951bf5ea699a2d1e2f26a2 Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Sat Jul 25 12:13:34 2015 +0200 Prepare v4.1.3-rt3 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> We lose all the git history this way and it's not good. We should apply another opnfv project repo in future. Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423 Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
author: Yunhong Jiang <yunhong.jiang@intel.com> 2015-08-04 12:17:53 -0700
committer: Yunhong Jiang <yunhong.jiang@intel.com> 2015-08-04 15:44:42 -0700
commit: 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (patch)
tree: 1c9cafbcd35f783a87880a10f85d1a060db1a563 /kernel/lib/proportions.c
parent: 98260f3884f4a202f9ca5eabed40b1354c489b29 (diff)
1 files changed, 407 insertions, 0 deletions
diff --git a/kernel/lib/proportions.c b/kernel/lib/proportions.c
new file mode 100644
index 000000000..6f724298f
--- /dev/null
+++ b/kernel/lib/proportions.c
@@ -0,0 +1,407 @@
+/*
+ * Floating proportions
+ *
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Description:
+ *
+ * The floating proportion is a time derivative with an exponentially decaying
+ * history:
+ *
+ *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
+ *
+ * Where j is an element from {prop_local}, x_{j} is j's number of events,
+ * and i the time period over which the differential is taken. So d/dt_{-i} is
+ * the differential over the i-th last period.
+ *
+ * The decaying history gives smooth transitions. The time differential carries
+ * the notion of speed.
+ *
+ * The denominator is 2^(1+i) because we want the series to be normalised, ie.
+ *
+ *   \Sum_{i=0} 1/2^(1+i) = 1
+ *
+ * Further more, if we measure time (t) in the same events as x; so that:
+ *
+ *   t = \Sum_{j} x_{j}
+ *
+ * we get that:
+ *
+ *   \Sum_{j} p_{j} = 1
+ *
+ * Writing this in an iterative fashion we get (dropping the 'd's):
+ *
+ *   if (++x_{j}, ++t > period)
+ *     t /= 2;
+ *     for_each (j)
+ *       x_{j} /= 2;
+ *
+ * so that:
+ *
+ *   p_{j} = x_{j} / t;
+ *
+ * We optimize away the '/= 2' for the global time delta by noting that:
+ *
+ *   if (++t > period) t /= 2:
+ *
+ * Can be approximated by:
+ *
+ *   period/2 + (++t % period/2)
+ *
+ * [ Furthermore, when we choose period to be 2^n it can be written in terms of
+ *   binary operations and wraparound artefacts disappear. ]
+ *
+ * Also note that this yields a natural counter of the elapsed periods:
+ *
+ *   c = t / (period/2)
+ *
+ * [ Its monotonic increasing property can be applied to mitigate the wrap-
+ *   around issue. ]
+ *
+ * This allows us to do away with the loop over all prop_locals on each period
+ * expiration. By remembering the period count under which it was last accessed
+ * as c_{j}, we can obtain the number of 'missed' cycles from:
+ *
+ *   c - c_{j}
+ *
+ * We can then lazily catch up to the global period count every time we are
+ * going to use x_{j}, by doing:
+ *
+ *   x_{j} /= 2^(c - c_{j}), c_{j} = c
+ */
+
+#include <linux/proportions.h>
+#include <linux/rcupdate.h>
+
+int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp)
+{
+	int err;
+
+	if (shift > PROP_MAX_SHIFT)
+		shift = PROP_MAX_SHIFT;
+
+	pd->index = 0;
+	pd->pg[0].shift = shift;
+	mutex_init(&pd->mutex);
+	err = percpu_counter_init(&pd->pg[0].events, 0, gfp);
+	if (err)
+		goto out;
+
+	err = percpu_counter_init(&pd->pg[1].events, 0, gfp);
+	if (err)
+		percpu_counter_destroy(&pd->pg[0].events);
+
+out:
+	return err;
+}
+
+/*
+ * We have two copies, and flip between them to make it seem like an atomic
+ * update. The update is not really atomic wrt the events counter, but
+ * it is internally consistent with the bit layout depending on shift.
+ *
+ * We copy the events count, move the bits around and flip the index.
+ */
+void prop_change_shift(struct prop_descriptor *pd, int shift)
+{
+	int index;
+	int offset;
+	u64 events;
+	unsigned long flags;
+
+	if (shift > PROP_MAX_SHIFT)
+		shift = PROP_MAX_SHIFT;
+
+	mutex_lock(&pd->mutex);
+
+	index = pd->index ^ 1;
+	offset = pd->pg[pd->index].shift - shift;
+	if (!offset)
+		goto out;
+
+	pd->pg[index].shift = shift;
+
+	local_irq_save(flags);
+	events = percpu_counter_sum(&pd->pg[pd->index].events);
+	if (offset < 0)
+		events <<= -offset;
+	else
+		events >>= offset;
+	percpu_counter_set(&pd->pg[index].events, events);
+
+	/*
+	 * ensure the new pg is fully written before the switch
+	 */
+	smp_wmb();
+	pd->index = index;
+	local_irq_restore(flags);
+
+	synchronize_rcu();
+
+out:
+	mutex_unlock(&pd->mutex);
+}
+
+/*
+ * wrap the access to the data in an rcu_read_lock() section;
+ * this is used to track the active references.
+ */
+static struct prop_global *prop_get_global(struct prop_descriptor *pd)
+__acquires(RCU)
+{
+	int index;
+
+	rcu_read_lock();
+	index = pd->index;
+	/*
+	 * match the wmb from vcd_flip()
+	 */
+	smp_rmb();
+	return &pd->pg[index];
+}
+
+static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
+__releases(RCU)
+{
+	rcu_read_unlock();
+}
+
+static void
+prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
+{
+	int offset = *pl_shift - new_shift;
+
+	if (!offset)
+		return;
+
+	if (offset < 0)
+		*pl_period <<= -offset;
+	else
+		*pl_period >>= offset;
+
+	*pl_shift = new_shift;
+}
+
+/*
+ * PERCPU
+ */
+
+#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
+
+int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp)
+{
+	raw_spin_lock_init(&pl->lock);
+	pl->shift = 0;
+	pl->period = 0;
+	return percpu_counter_init(&pl->events, 0, gfp);
+}
+
+void prop_local_destroy_percpu(struct prop_local_percpu *pl)
+{
+	percpu_counter_destroy(&pl->events);
+}
+
+/*
+ * Catch up with missed period expirations.
+ *
+ *   until (c_{j} == c)
+ *     x_{j} -= x_{j}/2;
+ *     c_{j}++;
+ */
+static
+void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
+{
+	unsigned long period = 1UL << (pg->shift - 1);
+	unsigned long period_mask = ~(period - 1);
+	unsigned long global_period;
+	unsigned long flags;
+
+	global_period = percpu_counter_read(&pg->events);
+	global_period &= period_mask;
+
+	/*
+	 * Fast path - check if the local and global period count still match
+	 * outside of the lock.
+	 */
+	if (pl->period == global_period)
+		return;
+
+	raw_spin_lock_irqsave(&pl->lock, flags);
+	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
+
+	/*
+	 * For each missed period, we half the local counter.
+	 * basically:
+	 *   pl->events >> (global_period - pl->period);
+	 */
+	period = (global_period - pl->period) >> (pg->shift - 1);
+	if (period < BITS_PER_LONG) {
+		s64 val = percpu_counter_read(&pl->events);
+
+		if (val < (nr_cpu_ids * PROP_BATCH))
+			val = percpu_counter_sum(&pl->events);
+
+		__percpu_counter_add(&pl->events, -val + (val >> period),
+					PROP_BATCH);
+	} else
+		percpu_counter_set(&pl->events, 0);
+
+	pl->period = global_period;
+	raw_spin_unlock_irqrestore(&pl->lock, flags);
+}
+
+/*
+ *   ++x_{j}, ++t
+ */
+void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
+{
+	struct prop_global *pg = prop_get_global(pd);
+
+	prop_norm_percpu(pg, pl);
+	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
+	percpu_counter_add(&pg->events, 1);
+	prop_put_global(pd, pg);
+}
+
+/*
+ * identical to __prop_inc_percpu, except that it limits this pl's fraction to
+ * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
+ */
+void __prop_inc_percpu_max(struct prop_descriptor *pd,
+			   struct prop_local_percpu *pl, long frac)
+{
+	struct prop_global *pg = prop_get_global(pd);
+
+	prop_norm_percpu(pg, pl);
+
+	if (unlikely(frac != PROP_FRAC_BASE)) {
+		unsigned long period_2 = 1UL << (pg->shift - 1);
+		unsigned long counter_mask = period_2 - 1;
+		unsigned long global_count;
+		long numerator, denominator;
+
+		numerator = percpu_counter_read_positive(&pl->events);
+		global_count = percpu_counter_read(&pg->events);
+		denominator = period_2 + (global_count & counter_mask);
+
+		if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
+			goto out_put;
+	}
+
+	percpu_counter_add(&pl->events, 1);
+	percpu_counter_add(&pg->events, 1);
+
+out_put:
+	prop_put_global(pd, pg);
+}
+
+/*
+ * Obtain a fraction of this proportion
+ *
+ *   p_{j} = x_{j} / (period/2 + t % period/2)
+ */
+void prop_fraction_percpu(struct prop_descriptor *pd,
+		struct prop_local_percpu *pl,
+		long *numerator, long *denominator)
+{
+	struct prop_global *pg = prop_get_global(pd);
+	unsigned long period_2 = 1UL << (pg->shift - 1);
+	unsigned long counter_mask = period_2 - 1;
+	unsigned long global_count;
+
+	prop_norm_percpu(pg, pl);
+	*numerator = percpu_counter_read_positive(&pl->events);
+
+	global_count = percpu_counter_read(&pg->events);
+	*denominator = period_2 + (global_count & counter_mask);
+
+	prop_put_global(pd, pg);
+}
+
+/*
+ * SINGLE
+ */
+
+int prop_local_init_single(struct prop_local_single *pl)
+{
+	raw_spin_lock_init(&pl->lock);
+	pl->shift = 0;
+	pl->period = 0;
+	pl->events = 0;
+	return 0;
+}
+
+void prop_local_destroy_single(struct prop_local_single *pl)
+{
+}
+
+/*
+ * Catch up with missed period expirations.
+ */
+static
+void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
+{
+	unsigned long period = 1UL << (pg->shift - 1);
+	unsigned long period_mask = ~(period - 1);
+	unsigned long global_period;
+	unsigned long flags;
+
+	global_period = percpu_counter_read(&pg->events);
+	global_period &= period_mask;
+
+	/*
+	 * Fast path - check if the local and global period count still match
+	 * outside of the lock.
+	 */
+	if (pl->period == global_period)
+		return;
+
+	raw_spin_lock_irqsave(&pl->lock, flags);
+	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
+	/*
+	 * For each missed period, we half the local counter.
+	 */
+	period = (global_period - pl->period) >> (pg->shift - 1);
+	if (likely(period < BITS_PER_LONG))
+		pl->events >>= period;
+	else
+		pl->events = 0;
+	pl->period = global_period;
+	raw_spin_unlock_irqrestore(&pl->lock, flags);
+}
+
+/*
+ *   ++x_{j}, ++t
+ */
+void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
+{
+	struct prop_global *pg = prop_get_global(pd);
+
+	prop_norm_single(pg, pl);
+	pl->events++;
+	percpu_counter_add(&pg->events, 1);
+	prop_put_global(pd, pg);
+}
+
+/*
+ * Obtain a fraction of this proportion
+ *
+ *   p_{j} = x_{j} / (period/2 + t % period/2)
+ */
+void prop_fraction_single(struct prop_descriptor *pd,
+	       	struct prop_local_single *pl,
+		long *numerator, long *denominator)
+{
+	struct prop_global *pg = prop_get_global(pd);
+	unsigned long period_2 = 1UL << (pg->shift - 1);
+	unsigned long counter_mask = period_2 - 1;
+	unsigned long global_count;
+
+	prop_norm_single(pg, pl);
+	*numerator = pl->events;
+
+	global_count = percpu_counter_read(&pg->events);
+	*denominator = period_2 + (global_count & counter_mask);
+
+	prop_put_global(pd, pg);
+}
author	Yunhong Jiang <yunhong.jiang@intel.com>	2015-08-04 12:17:53 -0700
committer	Yunhong Jiang <yunhong.jiang@intel.com>	2015-08-04 15:44:42 -0700
commit	9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (patch)
tree	1c9cafbcd35f783a87880a10f85d1a060db1a563 /kernel/lib/proportions.c
parent	98260f3884f4a202f9ca5eabed40b1354c489b29 (diff)