From e09b41010ba33a20a87472ee821fa407a5b8da36 Mon Sep 17 00:00:00 2001
From: José Pekkarinen <jose.pekkarinen@nokia.com>
Date: Mon, 11 Apr 2016 10:41:07 +0300
Subject: These changes are the raw update to linux-4.4.6-rt14. Kernel sources
 are taken from kernel.org, and rt patch from the rt wiki download page.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During the rebasing, the following patch collided:

Force tick interrupt and get rid of softirq magic(I70131fb85).

Collisions have been removed because its logic was found on the
source already.

Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769
Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
---
 kernel/arch/powerpc/perf/callchain.c   |   2 +-
 kernel/arch/powerpc/perf/core-book3s.c |  40 +++++--
 kernel/arch/powerpc/perf/hv-24x7.c     | 192 ++++++++++++++++++++++++++++++---
 kernel/arch/powerpc/perf/power8-pmu.c  |   3 +
 4 files changed, 213 insertions(+), 24 deletions(-)

(limited to 'kernel/arch/powerpc/perf')

diff --git a/kernel/arch/powerpc/perf/callchain.c b/kernel/arch/powerpc/perf/callchain.c
index ff09cde20..e04a6752b 100644
--- a/kernel/arch/powerpc/perf/callchain.c
+++ b/kernel/arch/powerpc/perf/callchain.c
@@ -127,7 +127,7 @@ static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
 		return -EFAULT;
 
 	local_irq_save(flags);
-	ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift);
+	ptep = find_linux_pte_or_hugepte(pgdir, addr, NULL, &shift);
 	if (!ptep)
 		goto err_out;
 	if (!shift)
diff --git a/kernel/arch/powerpc/perf/core-book3s.c b/kernel/arch/powerpc/perf/core-book3s.c
index d90893b76..d1e65ce54 100644
--- a/kernel/arch/powerpc/perf/core-book3s.c
+++ b/kernel/arch/powerpc/perf/core-book3s.c
@@ -48,12 +48,12 @@ struct cpu_hw_events {
 	unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
 	unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
 
-	unsigned int group_flag;
+	unsigned int txn_flags;
 	int n_txn_start;
 
 	/* BHRB bits */
 	u64				bhrb_filter;	/* BHRB HW branch filter */
-	int				bhrb_users;
+	unsigned int			bhrb_users;
 	void				*bhrb_context;
 	struct	perf_branch_stack	bhrb_stack;
 	struct	perf_branch_entry	bhrb_entries[BHRB_MAX_ENTRIES];
@@ -369,8 +369,8 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
 	if (!ppmu->bhrb_nr)
 		return;
 
+	WARN_ON_ONCE(!cpuhw->bhrb_users);
 	cpuhw->bhrb_users--;
-	WARN_ON_ONCE(cpuhw->bhrb_users < 0);
 	perf_sched_cb_dec(event->ctx->pmu);
 
 	if (!cpuhw->disabled && !cpuhw->bhrb_users) {
@@ -1441,7 +1441,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
 	 * skip the schedulability test here, it will be performed
 	 * at commit time(->commit_txn) as a whole
 	 */
-	if (cpuhw->group_flag & PERF_EVENT_TXN)
+	if (cpuhw->txn_flags & PERF_PMU_TXN_ADD)
 		goto nocheck;
 
 	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
@@ -1586,13 +1586,22 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags)
  * Start group events scheduling transaction
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
+ *
+ * We only support PERF_PMU_TXN_ADD transactions. Save the
+ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
+ * transactions.
  */
-static void power_pmu_start_txn(struct pmu *pmu)
+static void power_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 {
 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 
+	WARN_ON_ONCE(cpuhw->txn_flags);		/* txn already in flight */
+
+	cpuhw->txn_flags = txn_flags;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+
 	perf_pmu_disable(pmu);
-	cpuhw->group_flag |= PERF_EVENT_TXN;
 	cpuhw->n_txn_start = cpuhw->n_events;
 }
 
@@ -1604,8 +1613,15 @@ static void power_pmu_start_txn(struct pmu *pmu)
 static void power_pmu_cancel_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+	unsigned int txn_flags;
+
+	WARN_ON_ONCE(!cpuhw->txn_flags);	/* no txn in flight */
+
+	txn_flags = cpuhw->txn_flags;
+	cpuhw->txn_flags = 0;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
 
-	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 	perf_pmu_enable(pmu);
 }
 
@@ -1621,7 +1637,15 @@ static int power_pmu_commit_txn(struct pmu *pmu)
 
 	if (!ppmu)
 		return -EAGAIN;
+
 	cpuhw = this_cpu_ptr(&cpu_hw_events);
+	WARN_ON_ONCE(!cpuhw->txn_flags);	/* no txn in flight */
+
+	if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) {
+		cpuhw->txn_flags = 0;
+		return 0;
+	}
+
 	n = cpuhw->n_events;
 	if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
 		return -EAGAIN;
@@ -1632,7 +1656,7 @@ static int power_pmu_commit_txn(struct pmu *pmu)
 	for (i = cpuhw->n_txn_start; i < n; ++i)
 		cpuhw->event[i]->hw.config = cpuhw->events[i];
 
-	cpuhw->group_flag &= ~PERF_EVENT_TXN;
+	cpuhw->txn_flags = 0;
 	perf_pmu_enable(pmu);
 	return 0;
 }
diff --git a/kernel/arch/powerpc/perf/hv-24x7.c b/kernel/arch/powerpc/perf/hv-24x7.c
index ec2eb2063..9f9dfda9e 100644
--- a/kernel/arch/powerpc/perf/hv-24x7.c
+++ b/kernel/arch/powerpc/perf/hv-24x7.c
@@ -142,6 +142,15 @@ static struct attribute_group event_long_desc_group = {
 
 static struct kmem_cache *hv_page_cache;
 
+DEFINE_PER_CPU(int, hv_24x7_txn_flags);
+DEFINE_PER_CPU(int, hv_24x7_txn_err);
+
+struct hv_24x7_hw {
+	struct perf_event *events[255];
+};
+
+DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
+
 /*
  * request_buffer and result_buffer are not required to be 4k aligned,
  * but are not allowed to cross any 4k boundary. Aligning them to 4k is
@@ -320,6 +329,8 @@ static struct attribute *device_str_attr_create_(char *name, char *str)
 	if (!attr)
 		return NULL;
 
+	sysfs_attr_init(&attr->attr.attr);
+
 	attr->var = str;
 	attr->attr.attr.name = name;
 	attr->attr.attr.mode = 0444;
@@ -414,7 +425,7 @@ out_val:
 }
 
 static struct attribute *event_to_desc_attr(struct hv_24x7_event_data *event,
-				int nonce)
+					    int nonce)
 {
 	int nl, dl;
 	char *name = event_name(event, &nl);
@@ -442,7 +453,7 @@ event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce)
 }
 
 static ssize_t event_data_to_attrs(unsigned ix, struct attribute **attrs,
-		struct hv_24x7_event_data *event, int nonce)
+				   struct hv_24x7_event_data *event, int nonce)
 {
 	unsigned i;
 
@@ -510,7 +521,7 @@ static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
 }
 
 static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2,
-					size_t s2, unsigned d2)
+		       size_t s2, unsigned d2)
 {
 	int r = memord(v1, s1, v2, s2);
 
@@ -524,7 +535,7 @@ static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2,
 }
 
 static int event_uniq_add(struct rb_root *root, const char *name, int nl,
-				unsigned domain)
+			  unsigned domain)
 {
 	struct rb_node **new = &(root->rb_node), *parent = NULL;
 	struct event_uniq *data;
@@ -648,8 +659,8 @@ static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event,
 #define MAX_4K (SIZE_MAX / 4096)
 
 static int create_events_from_catalog(struct attribute ***events_,
-		struct attribute ***event_descs_,
-		struct attribute ***event_long_descs_)
+				      struct attribute ***event_descs_,
+				      struct attribute ***event_long_descs_)
 {
 	unsigned long hret;
 	size_t catalog_len, catalog_page_len, event_entry_count,
@@ -1006,8 +1017,8 @@ static const struct attribute_group *attr_groups[] = {
 };
 
 static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer,
-			struct hv_24x7_data_result_buffer *result_buffer,
-			unsigned long ret)
+			   struct hv_24x7_data_result_buffer *result_buffer,
+			   unsigned long ret)
 {
 	struct hv_24x7_request *req;
 
@@ -1024,7 +1035,7 @@ static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer,
  * Start the process for a new H_GET_24x7_DATA hcall.
  */
 static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
-			struct hv_24x7_data_result_buffer *result_buffer)
+			      struct hv_24x7_data_result_buffer *result_buffer)
 {
 
 	memset(request_buffer, 0, 4096);
@@ -1039,7 +1050,7 @@ static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
  * by 'init_24x7_request()' and 'add_event_to_24x7_request()'.
  */
 static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
-			struct hv_24x7_data_result_buffer *result_buffer)
+			     struct hv_24x7_data_result_buffer *result_buffer)
 {
 	unsigned long ret;
 
@@ -1102,7 +1113,6 @@ static unsigned long single_24x7_request(struct perf_event *event, u64 *count)
 	unsigned long ret;
 	struct hv_24x7_request_buffer *request_buffer;
 	struct hv_24x7_data_result_buffer *result_buffer;
-	struct hv_24x7_result *resb;
 
 	BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
 	BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
@@ -1123,8 +1133,7 @@ static unsigned long single_24x7_request(struct perf_event *event, u64 *count)
 	}
 
 	/* process result from hcall */
-	resb = &result_buffer->results[0];
-	*count = be64_to_cpu(resb->elements[0].element_data[0]);
+	*count = be64_to_cpu(result_buffer->results[0].elements[0].element_data[0]);
 
 out:
 	put_cpu_var(hv_24x7_reqb);
@@ -1231,9 +1240,48 @@ static void update_event_count(struct perf_event *event, u64 now)
 static void h_24x7_event_read(struct perf_event *event)
 {
 	u64 now;
+	struct hv_24x7_request_buffer *request_buffer;
+	struct hv_24x7_hw *h24x7hw;
+	int txn_flags;
+
+	txn_flags = __this_cpu_read(hv_24x7_txn_flags);
+
+	/*
+	 * If in a READ transaction, add this counter to the list of
+	 * counters to read during the next HCALL (i.e commit_txn()).
+	 * If not in a READ transaction, go ahead and make the HCALL
+	 * to read this counter by itself.
+	 */
+
+	if (txn_flags & PERF_PMU_TXN_READ) {
+		int i;
+		int ret;
+
+		if (__this_cpu_read(hv_24x7_txn_err))
+			return;
+
+		request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+
+		ret = add_event_to_24x7_request(event, request_buffer);
+		if (ret) {
+			__this_cpu_write(hv_24x7_txn_err, ret);
+		} else {
+			/*
+			 * Assoicate the event with the HCALL request index,
+			 * so ->commit_txn() can quickly find/update count.
+			 */
+			i = request_buffer->num_requests - 1;
+
+			h24x7hw = &get_cpu_var(hv_24x7_hw);
+			h24x7hw->events[i] = event;
+			put_cpu_var(h24x7hw);
+		}
 
-	now = h_24x7_get_value(event);
-	update_event_count(event, now);
+		put_cpu_var(hv_24x7_reqb);
+	} else {
+		now = h_24x7_get_value(event);
+		update_event_count(event, now);
+	}
 }
 
 static void h_24x7_event_start(struct perf_event *event, int flags)
@@ -1255,6 +1303,117 @@ static int h_24x7_event_add(struct perf_event *event, int flags)
 	return 0;
 }
 
+/*
+ * 24x7 counters only support READ transactions. They are
+ * always counting and dont need/support ADD transactions.
+ * Cache the flags, but otherwise ignore transactions that
+ * are not PERF_PMU_TXN_READ.
+ */
+static void h_24x7_event_start_txn(struct pmu *pmu, unsigned int flags)
+{
+	struct hv_24x7_request_buffer *request_buffer;
+	struct hv_24x7_data_result_buffer *result_buffer;
+
+	/* We should not be called if we are already in a txn */
+	WARN_ON_ONCE(__this_cpu_read(hv_24x7_txn_flags));
+
+	__this_cpu_write(hv_24x7_txn_flags, flags);
+	if (flags & ~PERF_PMU_TXN_READ)
+		return;
+
+	request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+	result_buffer = (void *)get_cpu_var(hv_24x7_resb);
+
+	init_24x7_request(request_buffer, result_buffer);
+
+	put_cpu_var(hv_24x7_resb);
+	put_cpu_var(hv_24x7_reqb);
+}
+
+/*
+ * Clean up transaction state.
+ *
+ * NOTE: Ignore state of request and result buffers for now.
+ *	 We will initialize them during the next read/txn.
+ */
+static void reset_txn(void)
+{
+	__this_cpu_write(hv_24x7_txn_flags, 0);
+	__this_cpu_write(hv_24x7_txn_err, 0);
+}
+
+/*
+ * 24x7 counters only support READ transactions. They are always counting
+ * and dont need/support ADD transactions. Clear ->txn_flags but otherwise
+ * ignore transactions that are not of type PERF_PMU_TXN_READ.
+ *
+ * For READ transactions, submit all pending 24x7 requests (i.e requests
+ * that were queued by h_24x7_event_read()), to the hypervisor and update
+ * the event counts.
+ */
+static int h_24x7_event_commit_txn(struct pmu *pmu)
+{
+	struct hv_24x7_request_buffer *request_buffer;
+	struct hv_24x7_data_result_buffer *result_buffer;
+	struct hv_24x7_result *resb;
+	struct perf_event *event;
+	u64 count;
+	int i, ret, txn_flags;
+	struct hv_24x7_hw *h24x7hw;
+
+	txn_flags = __this_cpu_read(hv_24x7_txn_flags);
+	WARN_ON_ONCE(!txn_flags);
+
+	ret = 0;
+	if (txn_flags & ~PERF_PMU_TXN_READ)
+		goto out;
+
+	ret = __this_cpu_read(hv_24x7_txn_err);
+	if (ret)
+		goto out;
+
+	request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+	result_buffer = (void *)get_cpu_var(hv_24x7_resb);
+
+	ret = make_24x7_request(request_buffer, result_buffer);
+	if (ret) {
+		log_24x7_hcall(request_buffer, result_buffer, ret);
+		goto put_reqb;
+	}
+
+	h24x7hw = &get_cpu_var(hv_24x7_hw);
+
+	/* Update event counts from hcall */
+	for (i = 0; i < request_buffer->num_requests; i++) {
+		resb = &result_buffer->results[i];
+		count = be64_to_cpu(resb->elements[0].element_data[0]);
+		event = h24x7hw->events[i];
+		h24x7hw->events[i] = NULL;
+		update_event_count(event, count);
+	}
+
+	put_cpu_var(hv_24x7_hw);
+
+put_reqb:
+	put_cpu_var(hv_24x7_resb);
+	put_cpu_var(hv_24x7_reqb);
+out:
+	reset_txn();
+	return ret;
+}
+
+/*
+ * 24x7 counters only support READ transactions. They are always counting
+ * and dont need/support ADD transactions. However, regardless of type
+ * of transaction, all we need to do is cleanup, so we don't have to check
+ * the type of transaction.
+ */
+static void h_24x7_event_cancel_txn(struct pmu *pmu)
+{
+	WARN_ON_ONCE(!__this_cpu_read(hv_24x7_txn_flags));
+	reset_txn();
+}
+
 static struct pmu h_24x7_pmu = {
 	.task_ctx_nr = perf_invalid_context,
 
@@ -1266,6 +1425,9 @@ static struct pmu h_24x7_pmu = {
 	.start       = h_24x7_event_start,
 	.stop        = h_24x7_event_stop,
 	.read        = h_24x7_event_read,
+	.start_txn   = h_24x7_event_start_txn,
+	.commit_txn  = h_24x7_event_commit_txn,
+	.cancel_txn  = h_24x7_event_cancel_txn,
 };
 
 static int hv_24x7_init(void)
diff --git a/kernel/arch/powerpc/perf/power8-pmu.c b/kernel/arch/powerpc/perf/power8-pmu.c
index 396351db6..7d5e29525 100644
--- a/kernel/arch/powerpc/perf/power8-pmu.c
+++ b/kernel/arch/powerpc/perf/power8-pmu.c
@@ -676,6 +676,9 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 	if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL)
 		return -1;
 
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL)
+		return -1;
+
 	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
 		pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
 		return pmu_bhrb_filter;
-- 
cgit