Add the rt linux 4.1.3-rt3 as base

Import the rt linux 4.1.3-rt3 as OPNFV kvm base. It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and the base is: commit 0917f823c59692d751951bf5ea699a2d1e2f26a2 Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Sat Jul 25 12:13:34 2015 +0200 Prepare v4.1.3-rt3 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> We lose all the git history this way and it's not good. We should apply another opnfv project repo in future. Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423 Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
author: Yunhong Jiang <yunhong.jiang@intel.com> 2015-08-04 12:17:53 -0700
committer: Yunhong Jiang <yunhong.jiang@intel.com> 2015-08-04 15:44:42 -0700
commit: 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (patch)
tree: 1c9cafbcd35f783a87880a10f85d1a060db1a563 /kernel/drivers/infiniband/hw/ehca/ehca_irq.c
parent: 98260f3884f4a202f9ca5eabed40b1354c489b29 (diff)
1 files changed, 870 insertions, 0 deletions
diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_irq.c b/kernel/drivers/infiniband/hw/ehca/ehca_irq.c
new file mode 100644
index 000000000..8615d7cf7
--- /dev/null
+++ b/kernel/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -0,0 +1,870 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Functions for EQs, NEQs and interrupts
+ *
+ *  Authors: Heiko J Schick <schickhj@de.ibm.com>
+ *           Khadija Souissi <souissi@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Joachim Fenkes <fenkes@de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/slab.h>
+#include <linux/smpboot.h>
+
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "ehca_iverbs.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+#include "ipz_pt_fn.h"
+
+#define EQE_COMPLETION_EVENT   EHCA_BMASK_IBM( 1,  1)
+#define EQE_CQ_QP_NUMBER       EHCA_BMASK_IBM( 8, 31)
+#define EQE_EE_IDENTIFIER      EHCA_BMASK_IBM( 2,  7)
+#define EQE_CQ_NUMBER          EHCA_BMASK_IBM( 8, 31)
+#define EQE_QP_NUMBER          EHCA_BMASK_IBM( 8, 31)
+#define EQE_QP_TOKEN           EHCA_BMASK_IBM(32, 63)
+#define EQE_CQ_TOKEN           EHCA_BMASK_IBM(32, 63)
+
+#define NEQE_COMPLETION_EVENT  EHCA_BMASK_IBM( 1,  1)
+#define NEQE_EVENT_CODE        EHCA_BMASK_IBM( 2,  7)
+#define NEQE_PORT_NUMBER       EHCA_BMASK_IBM( 8, 15)
+#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
+#define NEQE_DISRUPTIVE        EHCA_BMASK_IBM(16, 16)
+#define NEQE_SPECIFIC_EVENT    EHCA_BMASK_IBM(16, 23)
+
+#define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52, 63)
+#define ERROR_DATA_TYPE        EHCA_BMASK_IBM( 0,  7)
+
+static void queue_comp_task(struct ehca_cq *__cq);
+
+static struct ehca_comp_pool *pool;
+
+static inline void comp_event_callback(struct ehca_cq *cq)
+{
+	if (!cq->ib_cq.comp_handler)
+		return;
+
+	spin_lock(&cq->cb_lock);
+	cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
+	spin_unlock(&cq->cb_lock);
+
+	return;
+}
+
+static void print_error_data(struct ehca_shca *shca, void *data,
+			     u64 *rblock, int length)
+{
+	u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
+	u64 resource = rblock[1];
+
+	switch (type) {
+	case 0x1: /* Queue Pair */
+	{
+		struct ehca_qp *qp = (struct ehca_qp *)data;
+
+		/* only print error data if AER is set */
+		if (rblock[6] == 0)
+			return;
+
+		ehca_err(&shca->ib_device,
+			 "QP 0x%x (resource=%llx) has errors.",
+			 qp->ib_qp.qp_num, resource);
+		break;
+	}
+	case 0x4: /* Completion Queue */
+	{
+		struct ehca_cq *cq = (struct ehca_cq *)data;
+
+		ehca_err(&shca->ib_device,
+			 "CQ 0x%x (resource=%llx) has errors.",
+			 cq->cq_number, resource);
+		break;
+	}
+	default:
+		ehca_err(&shca->ib_device,
+			 "Unknown error type: %llx on %s.",
+			 type, shca->ib_device.name);
+		break;
+	}
+
+	ehca_err(&shca->ib_device, "Error data is available: %llx.", resource);
+	ehca_err(&shca->ib_device, "EHCA ----- error data begin "
+		 "---------------------------------------------------");
+	ehca_dmp(rblock, length, "resource=%llx", resource);
+	ehca_err(&shca->ib_device, "EHCA ----- error data end "
+		 "----------------------------------------------------");
+
+	return;
+}
+
+int ehca_error_data(struct ehca_shca *shca, void *data,
+		    u64 resource)
+{
+
+	unsigned long ret;
+	u64 *rblock;
+	unsigned long block_count;
+
+	rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
+		ret = -ENOMEM;
+		goto error_data1;
+	}
+
+	/* rblock must be 4K aligned and should be 4K large */
+	ret = hipz_h_error_data(shca->ipz_hca_handle,
+				resource,
+				rblock,
+				&block_count);
+
+	if (ret == H_R_STATE)
+		ehca_err(&shca->ib_device,
+			 "No error data is available: %llx.", resource);
+	else if (ret == H_SUCCESS) {
+		int length;
+
+		length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
+
+		if (length > EHCA_PAGESIZE)
+			length = EHCA_PAGESIZE;
+
+		print_error_data(shca, data, rblock, length);
+	} else
+		ehca_err(&shca->ib_device,
+			 "Error data could not be fetched: %llx", resource);
+
+	ehca_free_fw_ctrlblock(rblock);
+
+error_data1:
+	return ret;
+
+}
+
+static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,
+			      enum ib_event_type event_type)
+{
+	struct ib_event event;
+
+	/* PATH_MIG without the QP ever having been armed is false alarm */
+	if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed)
+		return;
+
+	event.device = &shca->ib_device;
+	event.event = event_type;
+
+	if (qp->ext_type == EQPT_SRQ) {
+		if (!qp->ib_srq.event_handler)
+			return;
+
+		event.element.srq = &qp->ib_srq;
+		qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context);
+	} else {
+		if (!qp->ib_qp.event_handler)
+			return;
+
+		event.element.qp = &qp->ib_qp;
+		qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+	}
+}
+
+static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
+			      enum ib_event_type event_type, int fatal)
+{
+	struct ehca_qp *qp;
+	u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
+
+	read_lock(&ehca_qp_idr_lock);
+	qp = idr_find(&ehca_qp_idr, token);
+	if (qp)
+		atomic_inc(&qp->nr_events);
+	read_unlock(&ehca_qp_idr_lock);
+
+	if (!qp)
+		return;
+
+	if (fatal)
+		ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
+
+	dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ?
+			  IB_EVENT_SRQ_ERR : event_type);
+
+	/*
+	 * eHCA only processes one WQE at a time for SRQ base QPs,
+	 * so the last WQE has been processed as soon as the QP enters
+	 * error state.
+	 */
+	if (fatal && qp->ext_type == EQPT_SRQBASE)
+		dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
+
+	if (atomic_dec_and_test(&qp->nr_events))
+		wake_up(&qp->wait_completion);
+	return;
+}
+
+static void cq_event_callback(struct ehca_shca *shca,
+			      u64 eqe)
+{
+	struct ehca_cq *cq;
+	u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
+
+	read_lock(&ehca_cq_idr_lock);
+	cq = idr_find(&ehca_cq_idr, token);
+	if (cq)
+		atomic_inc(&cq->nr_events);
+	read_unlock(&ehca_cq_idr_lock);
+
+	if (!cq)
+		return;
+
+	ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
+
+	if (atomic_dec_and_test(&cq->nr_events))
+		wake_up(&cq->wait_completion);
+
+	return;
+}
+
+static void parse_identifier(struct ehca_shca *shca, u64 eqe)
+{
+	u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
+
+	switch (identifier) {
+	case 0x02: /* path migrated */
+		qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0);
+		break;
+	case 0x03: /* communication established */
+		qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0);
+		break;
+	case 0x04: /* send queue drained */
+		qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0);
+		break;
+	case 0x05: /* QP error */
+	case 0x06: /* QP error */
+		qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1);
+		break;
+	case 0x07: /* CQ error */
+	case 0x08: /* CQ error */
+		cq_event_callback(shca, eqe);
+		break;
+	case 0x09: /* MRMWPTE error */
+		ehca_err(&shca->ib_device, "MRMWPTE error.");
+		break;
+	case 0x0A: /* port event */
+		ehca_err(&shca->ib_device, "Port event.");
+		break;
+	case 0x0B: /* MR access error */
+		ehca_err(&shca->ib_device, "MR access error.");
+		break;
+	case 0x0C: /* EQ error */
+		ehca_err(&shca->ib_device, "EQ error.");
+		break;
+	case 0x0D: /* P/Q_Key mismatch */
+		ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
+		break;
+	case 0x10: /* sampling complete */
+		ehca_err(&shca->ib_device, "Sampling complete.");
+		break;
+	case 0x11: /* unaffiliated access error */
+		ehca_err(&shca->ib_device, "Unaffiliated access error.");
+		break;
+	case 0x12: /* path migrating */
+		ehca_err(&shca->ib_device, "Path migrating.");
+		break;
+	case 0x13: /* interface trace stopped */
+		ehca_err(&shca->ib_device, "Interface trace stopped.");
+		break;
+	case 0x14: /* first error capture info available */
+		ehca_info(&shca->ib_device, "First error capture available");
+		break;
+	case 0x15: /* SRQ limit reached */
+		qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0);
+		break;
+	default:
+		ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
+			 identifier, shca->ib_device.name);
+		break;
+	}
+
+	return;
+}
+
+static void dispatch_port_event(struct ehca_shca *shca, int port_num,
+				enum ib_event_type type, const char *msg)
+{
+	struct ib_event event;
+
+	ehca_info(&shca->ib_device, "port %d %s.", port_num, msg);
+	event.device = &shca->ib_device;
+	event.event = type;
+	event.element.port_num = port_num;
+	ib_dispatch_event(&event);
+}
+
+static void notify_port_conf_change(struct ehca_shca *shca, int port_num)
+{
+	struct ehca_sma_attr  new_attr;
+	struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr;
+
+	ehca_query_sma_attr(shca, port_num, &new_attr);
+
+	if (new_attr.sm_sl  != old_attr->sm_sl ||
+	    new_attr.sm_lid != old_attr->sm_lid)
+		dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE,
+				    "SM changed");
+
+	if (new_attr.lid != old_attr->lid ||
+	    new_attr.lmc != old_attr->lmc)
+		dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE,
+				    "LID changed");
+
+	if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len ||
+	    memcmp(new_attr.pkeys, old_attr->pkeys,
+		   sizeof(u16) * new_attr.pkey_tbl_len))
+		dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE,
+				    "P_Key changed");
+
+	*old_attr = new_attr;
+}
+
+/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */
+static int replay_modify_qp(struct ehca_sport *sport)
+{
+	int aqp1_destroyed;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sport->mod_sqp_lock, flags);
+
+	aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI];
+
+	if (sport->ibqp_sqp[IB_QPT_SMI])
+		ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);
+	if (!aqp1_destroyed)
+		ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);
+
+	spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
+
+	return aqp1_destroyed;
+}
+
+static void parse_ec(struct ehca_shca *shca, u64 eqe)
+{
+	u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
+	u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
+	u8 spec_event;
+	struct ehca_sport *sport = &shca->sport[port - 1];
+
+	switch (ec) {
+	case 0x30: /* port availability change */
+		if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
+			/* only replay modify_qp calls in autodetect mode;
+			 * if AQP1 was destroyed, the port is already down
+			 * again and we can drop the event.
+			 */
+			if (ehca_nr_ports < 0)
+				if (replay_modify_qp(sport))
+					break;
+
+			sport->port_state = IB_PORT_ACTIVE;
+			dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
+					    "is active");
+			ehca_query_sma_attr(shca, port, &sport->saved_attr);
+		} else {
+			sport->port_state = IB_PORT_DOWN;
+			dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
+					    "is inactive");
+		}
+		break;
+	case 0x31:
+		/* port configuration change
+		 * disruptive change is caused by
+		 * LID, PKEY or SM change
+		 */
+		if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) {
+			ehca_warn(&shca->ib_device, "disruptive port "
+				  "%d configuration change", port);
+
+			sport->port_state = IB_PORT_DOWN;
+			dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
+					    "is inactive");
+
+			sport->port_state = IB_PORT_ACTIVE;
+			dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
+					    "is active");
+			ehca_query_sma_attr(shca, port,
+					    &sport->saved_attr);
+		} else
+			notify_port_conf_change(shca, port);
+		break;
+	case 0x32: /* adapter malfunction */
+		ehca_err(&shca->ib_device, "Adapter malfunction.");
+		break;
+	case 0x33:  /* trace stopped */
+		ehca_err(&shca->ib_device, "Traced stopped.");
+		break;
+	case 0x34: /* util async event */
+		spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe);
+		if (spec_event == 0x80) /* client reregister required */
+			dispatch_port_event(shca, port,
+					    IB_EVENT_CLIENT_REREGISTER,
+					    "client reregister req.");
+		else
+			ehca_warn(&shca->ib_device, "Unknown util async "
+				  "event %x on port %x", spec_event, port);
+		break;
+	default:
+		ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
+			 ec, shca->ib_device.name);
+		break;
+	}
+
+	return;
+}
+
+static inline void reset_eq_pending(struct ehca_cq *cq)
+{
+	u64 CQx_EP;
+	struct h_galpa gal = cq->galpas.kernel;
+
+	hipz_galpa_store_cq(gal, cqx_ep, 0x0);
+	CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
+
+	return;
+}
+
+irqreturn_t ehca_interrupt_neq(int irq, void *dev_id)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
+
+	tasklet_hi_schedule(&shca->neq.interrupt_task);
+
+	return IRQ_HANDLED;
+}
+
+void ehca_tasklet_neq(unsigned long data)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)data;
+	struct ehca_eqe *eqe;
+	u64 ret;
+
+	eqe = ehca_poll_eq(shca, &shca->neq);
+
+	while (eqe) {
+		if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
+			parse_ec(shca, eqe->entry);
+
+		eqe = ehca_poll_eq(shca, &shca->neq);
+	}
+
+	ret = hipz_h_reset_event(shca->ipz_hca_handle,
+				 shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
+
+	if (ret != H_SUCCESS)
+		ehca_err(&shca->ib_device, "Can't clear notification events.");
+
+	return;
+}
+
+irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
+{
+	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
+
+	tasklet_hi_schedule(&shca->eq.interrupt_task);
+
+	return IRQ_HANDLED;
+}
+
+
+static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
+{
+	u64 eqe_value;
+	u32 token;
+	struct ehca_cq *cq;
+
+	eqe_value = eqe->entry;
+	ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value);
+	if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
+		ehca_dbg(&shca->ib_device, "Got completion event");
+		token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
+		read_lock(&ehca_cq_idr_lock);
+		cq = idr_find(&ehca_cq_idr, token);
+		if (cq)
+			atomic_inc(&cq->nr_events);
+		read_unlock(&ehca_cq_idr_lock);
+		if (cq == NULL) {
+			ehca_err(&shca->ib_device,
+				 "Invalid eqe for non-existing cq token=%x",
+				 token);
+			return;
+		}
+		reset_eq_pending(cq);
+		if (ehca_scaling_code)
+			queue_comp_task(cq);
+		else {
+			comp_event_callback(cq);
+			if (atomic_dec_and_test(&cq->nr_events))
+				wake_up(&cq->wait_completion);
+		}
+	} else {
+		ehca_dbg(&shca->ib_device, "Got non completion event");
+		parse_identifier(shca, eqe_value);
+	}
+}
+
+void ehca_process_eq(struct ehca_shca *shca, int is_irq)
+{
+	struct ehca_eq *eq = &shca->eq;
+	struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
+	u64 eqe_value, ret;
+	int eqe_cnt, i;
+	int eq_empty = 0;
+
+	spin_lock(&eq->irq_spinlock);
+	if (is_irq) {
+		const int max_query_cnt = 100;
+		int query_cnt = 0;
+		int int_state = 1;
+		do {
+			int_state = hipz_h_query_int_state(
+				shca->ipz_hca_handle, eq->ist);
+			query_cnt++;
+			iosync();
+		} while (int_state && query_cnt < max_query_cnt);
+		if (unlikely((query_cnt == max_query_cnt)))
+			ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
+				 int_state, query_cnt);
+	}
+
+	/* read out all eqes */
+	eqe_cnt = 0;
+	do {
+		u32 token;
+		eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq);
+		if (!eqe_cache[eqe_cnt].eqe)
+			break;
+		eqe_value = eqe_cache[eqe_cnt].eqe->entry;
+		if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
+			token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
+			read_lock(&ehca_cq_idr_lock);
+			eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
+			if (eqe_cache[eqe_cnt].cq)
+				atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events);
+			read_unlock(&ehca_cq_idr_lock);
+			if (!eqe_cache[eqe_cnt].cq) {
+				ehca_err(&shca->ib_device,
+					 "Invalid eqe for non-existing cq "
+					 "token=%x", token);
+				continue;
+			}
+		} else
+			eqe_cache[eqe_cnt].cq = NULL;
+		eqe_cnt++;
+	} while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
+	if (!eqe_cnt) {
+		if (is_irq)
+			ehca_dbg(&shca->ib_device,
+				 "No eqe found for irq event");
+		goto unlock_irq_spinlock;
+	} else if (!is_irq) {
+		ret = hipz_h_eoi(eq->ist);
+		if (ret != H_SUCCESS)
+			ehca_err(&shca->ib_device,
+				 "bad return code EOI -rc = %lld\n", ret);
+		ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
+	}
+	if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
+		ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
+	/* enable irq for new packets */
+	for (i = 0; i < eqe_cnt; i++) {
+		if (eq->eqe_cache[i].cq)
+			reset_eq_pending(eq->eqe_cache[i].cq);
+	}
+	/* check eq */
+	spin_lock(&eq->spinlock);
+	eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
+	spin_unlock(&eq->spinlock);
+	/* call completion handler for cached eqes */
+	for (i = 0; i < eqe_cnt; i++)
+		if (eq->eqe_cache[i].cq) {
+			if (ehca_scaling_code)
+				queue_comp_task(eq->eqe_cache[i].cq);
+			else {
+				struct ehca_cq *cq = eq->eqe_cache[i].cq;
+				comp_event_callback(cq);
+				if (atomic_dec_and_test(&cq->nr_events))
+					wake_up(&cq->wait_completion);
+			}
+		} else {
+			ehca_dbg(&shca->ib_device, "Got non completion event");
+			parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
+		}
+	/* poll eq if not empty */
+	if (eq_empty)
+		goto unlock_irq_spinlock;
+	do {
+		struct ehca_eqe *eqe;
+		eqe = ehca_poll_eq(shca, &shca->eq);
+		if (!eqe)
+			break;
+		process_eqe(shca, eqe);
+	} while (1);
+
+unlock_irq_spinlock:
+	spin_unlock(&eq->irq_spinlock);
+}
+
+void ehca_tasklet_eq(unsigned long data)
+{
+	ehca_process_eq((struct ehca_shca*)data, 1);
+}
+
+static int find_next_online_cpu(struct ehca_comp_pool *pool)
+{
+	int cpu;
+	unsigned long flags;
+
+	WARN_ON_ONCE(!in_interrupt());
+	if (ehca_debug_level >= 3)
+		ehca_dmp(cpu_online_mask, cpumask_size(), "");
+
+	spin_lock_irqsave(&pool->last_cpu_lock, flags);
+	do {
+		cpu = cpumask_next(pool->last_cpu, cpu_online_mask);
+		if (cpu >= nr_cpu_ids)
+			cpu = cpumask_first(cpu_online_mask);
+		pool->last_cpu = cpu;
+	} while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active);
+	spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
+
+	return cpu;
+}
+
+static void __queue_comp_task(struct ehca_cq *__cq,
+			      struct ehca_cpu_comp_task *cct,
+			      struct task_struct *thread)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cct->task_lock, flags);
+	spin_lock(&__cq->task_lock);
+
+	if (__cq->nr_callbacks == 0) {
+		__cq->nr_callbacks++;
+		list_add_tail(&__cq->entry, &cct->cq_list);
+		cct->cq_jobs++;
+		wake_up_process(thread);
+	} else
+		__cq->nr_callbacks++;
+
+	spin_unlock(&__cq->task_lock);
+	spin_unlock_irqrestore(&cct->task_lock, flags);
+}
+
+static void queue_comp_task(struct ehca_cq *__cq)
+{
+	int cpu_id;
+	struct ehca_cpu_comp_task *cct;
+	struct task_struct *thread;
+	int cq_jobs;
+	unsigned long flags;
+
+	cpu_id = find_next_online_cpu(pool);
+	BUG_ON(!cpu_online(cpu_id));
+
+	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+	thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
+	BUG_ON(!cct || !thread);
+
+	spin_lock_irqsave(&cct->task_lock, flags);
+	cq_jobs = cct->cq_jobs;
+	spin_unlock_irqrestore(&cct->task_lock, flags);
+	if (cq_jobs > 0) {
+		cpu_id = find_next_online_cpu(pool);
+		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+		thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
+		BUG_ON(!cct || !thread);
+	}
+	__queue_comp_task(__cq, cct, thread);
+}
+
+static void run_comp_task(struct ehca_cpu_comp_task *cct)
+{
+	struct ehca_cq *cq;
+
+	while (!list_empty(&cct->cq_list)) {
+		cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
+		spin_unlock_irq(&cct->task_lock);
+
+		comp_event_callback(cq);
+		if (atomic_dec_and_test(&cq->nr_events))
+			wake_up(&cq->wait_completion);
+
+		spin_lock_irq(&cct->task_lock);
+		spin_lock(&cq->task_lock);
+		cq->nr_callbacks--;
+		if (!cq->nr_callbacks) {
+			list_del_init(cct->cq_list.next);
+			cct->cq_jobs--;
+		}
+		spin_unlock(&cq->task_lock);
+	}
+}
+
+static void comp_task_park(unsigned int cpu)
+{
+	struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+	struct ehca_cpu_comp_task *target;
+	struct task_struct *thread;
+	struct ehca_cq *cq, *tmp;
+	LIST_HEAD(list);
+
+	spin_lock_irq(&cct->task_lock);
+	cct->cq_jobs = 0;
+	cct->active = 0;
+	list_splice_init(&cct->cq_list, &list);
+	spin_unlock_irq(&cct->task_lock);
+
+	cpu = find_next_online_cpu(pool);
+	target = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+	thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu);
+	spin_lock_irq(&target->task_lock);
+	list_for_each_entry_safe(cq, tmp, &list, entry) {
+		list_del(&cq->entry);
+		__queue_comp_task(cq, target, thread);
+	}
+	spin_unlock_irq(&target->task_lock);
+}
+
+static void comp_task_stop(unsigned int cpu, bool online)
+{
+	struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+
+	spin_lock_irq(&cct->task_lock);
+	cct->cq_jobs = 0;
+	cct->active = 0;
+	WARN_ON(!list_empty(&cct->cq_list));
+	spin_unlock_irq(&cct->task_lock);
+}
+
+static int comp_task_should_run(unsigned int cpu)
+{
+	struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+
+	return cct->cq_jobs;
+}
+
+static void comp_task(unsigned int cpu)
+{
+	struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks);
+	int cql_empty;
+
+	spin_lock_irq(&cct->task_lock);
+	cql_empty = list_empty(&cct->cq_list);
+	if (!cql_empty) {
+		__set_current_state(TASK_RUNNING);
+		run_comp_task(cct);
+	}
+	spin_unlock_irq(&cct->task_lock);
+}
+
+static struct smp_hotplug_thread comp_pool_threads = {
+	.thread_should_run	= comp_task_should_run,
+	.thread_fn		= comp_task,
+	.thread_comm		= "ehca_comp/%u",
+	.cleanup		= comp_task_stop,
+	.park			= comp_task_park,
+};
+
+int ehca_create_comp_pool(void)
+{
+	int cpu, ret = -ENOMEM;
+
+	if (!ehca_scaling_code)
+		return 0;
+
+	pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
+	if (pool == NULL)
+		return -ENOMEM;
+
+	spin_lock_init(&pool->last_cpu_lock);
+	pool->last_cpu = cpumask_any(cpu_online_mask);
+
+	pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
+	if (!pool->cpu_comp_tasks)
+		goto out_pool;
+
+	pool->cpu_comp_threads = alloc_percpu(struct task_struct *);
+	if (!pool->cpu_comp_threads)
+		goto out_tasks;
+
+	for_each_present_cpu(cpu) {
+		struct ehca_cpu_comp_task *cct;
+
+		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+		spin_lock_init(&cct->task_lock);
+		INIT_LIST_HEAD(&cct->cq_list);
+	}
+
+	comp_pool_threads.store = pool->cpu_comp_threads;
+	ret = smpboot_register_percpu_thread(&comp_pool_threads);
+	if (ret)
+		goto out_threads;
+
+	pr_info("eHCA scaling code enabled\n");
+	return ret;
+
+out_threads:
+	free_percpu(pool->cpu_comp_threads);
+out_tasks:
+	free_percpu(pool->cpu_comp_tasks);
+out_pool:
+	kfree(pool);
+	return ret;
+}
+
+void ehca_destroy_comp_pool(void)
+{
+	if (!ehca_scaling_code)
+		return;
+
+	smpboot_unregister_percpu_thread(&comp_pool_threads);
+
+	free_percpu(pool->cpu_comp_threads);
+	free_percpu(pool->cpu_comp_tasks);
+	kfree(pool);
+}
author	Yunhong Jiang <yunhong.jiang@intel.com>	2015-08-04 12:17:53 -0700
committer	Yunhong Jiang <yunhong.jiang@intel.com>	2015-08-04 15:44:42 -0700
commit	9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (patch)
tree	1c9cafbcd35f783a87880a10f85d1a060db1a563 /kernel/drivers/infiniband/hw/ehca/ehca_irq.c
parent	98260f3884f4a202f9ca5eabed40b1354c489b29 (diff)