diff options
author | Yunhong Jiang <yunhong.jiang@intel.com> | 2015-08-04 12:17:53 -0700 |
---|---|---|
committer | Yunhong Jiang <yunhong.jiang@intel.com> | 2015-08-04 15:44:42 -0700 |
commit | 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (patch) | |
tree | 1c9cafbcd35f783a87880a10f85d1a060db1a563 /kernel/drivers/infiniband/hw/ehca/ehca_irq.c | |
parent | 98260f3884f4a202f9ca5eabed40b1354c489b29 (diff) |
Add the rt linux 4.1.3-rt3 as base
Import the rt linux 4.1.3-rt3 as OPNFV kvm base.
It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and
the base is:
commit 0917f823c59692d751951bf5ea699a2d1e2f26a2
Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sat Jul 25 12:13:34 2015 +0200
Prepare v4.1.3-rt3
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
We lose all the git history this way and it's not good. We
should apply another opnfv project repo in future.
Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
Diffstat (limited to 'kernel/drivers/infiniband/hw/ehca/ehca_irq.c')
-rw-r--r-- | kernel/drivers/infiniband/hw/ehca/ehca_irq.c | 870 |
1 files changed, 870 insertions, 0 deletions
diff --git a/kernel/drivers/infiniband/hw/ehca/ehca_irq.c b/kernel/drivers/infiniband/hw/ehca/ehca_irq.c new file mode 100644 index 000000000..8615d7cf7 --- /dev/null +++ b/kernel/drivers/infiniband/hw/ehca/ehca_irq.c @@ -0,0 +1,870 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Functions for EQs, NEQs and interrupts + * + * Authors: Heiko J Schick <schickhj@de.ibm.com> + * Khadija Souissi <souissi@de.ibm.com> + * Hoang-Nam Nguyen <hnguyen@de.ibm.com> + * Joachim Fenkes <fenkes@de.ibm.com> + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/slab.h> +#include <linux/smpboot.h> + +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "ehca_iverbs.h" +#include "ehca_tools.h" +#include "hcp_if.h" +#include "hipz_fns.h" +#include "ipz_pt_fn.h" + +#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7) +#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63) +#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63) + +#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7) +#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) +#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) +#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) +#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23) + +#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) +#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) + +static void queue_comp_task(struct ehca_cq *__cq); + +static struct ehca_comp_pool *pool; + +static inline void comp_event_callback(struct ehca_cq *cq) +{ + if (!cq->ib_cq.comp_handler) + return; + + spin_lock(&cq->cb_lock); + cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context); + spin_unlock(&cq->cb_lock); + + return; +} + +static void print_error_data(struct ehca_shca *shca, void *data, + u64 *rblock, int length) +{ + u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); + u64 resource = rblock[1]; + + switch (type) { + case 0x1: /* Queue Pair */ + { + struct ehca_qp *qp = (struct ehca_qp *)data; + + /* only print error data if AER is set */ + if (rblock[6] == 0) + return; + + ehca_err(&shca->ib_device, + "QP 0x%x (resource=%llx) has errors.", + qp->ib_qp.qp_num, resource); + break; + } + case 0x4: /* Completion Queue */ + { + struct ehca_cq *cq = (struct ehca_cq *)data; + + ehca_err(&shca->ib_device, + "CQ 0x%x (resource=%llx) has errors.", + cq->cq_number, resource); + break; + } + default: + ehca_err(&shca->ib_device, + "Unknown error type: %llx on %s.", + type, shca->ib_device.name); + break; + } + + ehca_err(&shca->ib_device, "Error data is available: %llx.", resource); + ehca_err(&shca->ib_device, "EHCA ----- error data begin " + "---------------------------------------------------"); + ehca_dmp(rblock, length, "resource=%llx", resource); + ehca_err(&shca->ib_device, "EHCA ----- error data end " + "----------------------------------------------------"); + + return; +} + +int ehca_error_data(struct ehca_shca *shca, void *data, + u64 resource) +{ + + unsigned long ret; + u64 *rblock; + unsigned long block_count; + + rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); + if (!rblock) { + ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); + ret = -ENOMEM; + goto error_data1; + } + + /* rblock must be 4K aligned and should be 4K large */ + ret = hipz_h_error_data(shca->ipz_hca_handle, + resource, + rblock, + &block_count); + + if (ret == H_R_STATE) + ehca_err(&shca->ib_device, + "No error data is available: %llx.", resource); + else if (ret == H_SUCCESS) { + int length; + + length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]); + + if (length > EHCA_PAGESIZE) + length = EHCA_PAGESIZE; + + print_error_data(shca, data, rblock, length); + } else + ehca_err(&shca->ib_device, + "Error data could not be fetched: %llx", resource); + + ehca_free_fw_ctrlblock(rblock); + +error_data1: + return ret; + +} + +static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp, + enum ib_event_type event_type) +{ + struct ib_event event; + + /* PATH_MIG without the QP ever having been armed is false alarm */ + if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed) + return; + + event.device = &shca->ib_device; + event.event = event_type; + + if (qp->ext_type == EQPT_SRQ) { + if (!qp->ib_srq.event_handler) + return; + + event.element.srq = &qp->ib_srq; + qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context); + } else { + if (!qp->ib_qp.event_handler) + return; + + event.element.qp = &qp->ib_qp; + qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); + } +} + +static void qp_event_callback(struct ehca_shca *shca, u64 eqe, + enum ib_event_type event_type, int fatal) +{ + struct ehca_qp *qp; + u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); + + read_lock(&ehca_qp_idr_lock); + qp = idr_find(&ehca_qp_idr, token); + if (qp) + atomic_inc(&qp->nr_events); + read_unlock(&ehca_qp_idr_lock); + + if (!qp) + return; + + if (fatal) + ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); + + dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ? + IB_EVENT_SRQ_ERR : event_type); + + /* + * eHCA only processes one WQE at a time for SRQ base QPs, + * so the last WQE has been processed as soon as the QP enters + * error state. + */ + if (fatal && qp->ext_type == EQPT_SRQBASE) + dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED); + + if (atomic_dec_and_test(&qp->nr_events)) + wake_up(&qp->wait_completion); + return; +} + +static void cq_event_callback(struct ehca_shca *shca, + u64 eqe) +{ + struct ehca_cq *cq; + u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); + + read_lock(&ehca_cq_idr_lock); + cq = idr_find(&ehca_cq_idr, token); + if (cq) + atomic_inc(&cq->nr_events); + read_unlock(&ehca_cq_idr_lock); + + if (!cq) + return; + + ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); + + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + + return; +} + +static void parse_identifier(struct ehca_shca *shca, u64 eqe) +{ + u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe); + + switch (identifier) { + case 0x02: /* path migrated */ + qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0); + break; + case 0x03: /* communication established */ + qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0); + break; + case 0x04: /* send queue drained */ + qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0); + break; + case 0x05: /* QP error */ + case 0x06: /* QP error */ + qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1); + break; + case 0x07: /* CQ error */ + case 0x08: /* CQ error */ + cq_event_callback(shca, eqe); + break; + case 0x09: /* MRMWPTE error */ + ehca_err(&shca->ib_device, "MRMWPTE error."); + break; + case 0x0A: /* port event */ + ehca_err(&shca->ib_device, "Port event."); + break; + case 0x0B: /* MR access error */ + ehca_err(&shca->ib_device, "MR access error."); + break; + case 0x0C: /* EQ error */ + ehca_err(&shca->ib_device, "EQ error."); + break; + case 0x0D: /* P/Q_Key mismatch */ + ehca_err(&shca->ib_device, "P/Q_Key mismatch."); + break; + case 0x10: /* sampling complete */ + ehca_err(&shca->ib_device, "Sampling complete."); + break; + case 0x11: /* unaffiliated access error */ + ehca_err(&shca->ib_device, "Unaffiliated access error."); + break; + case 0x12: /* path migrating */ + ehca_err(&shca->ib_device, "Path migrating."); + break; + case 0x13: /* interface trace stopped */ + ehca_err(&shca->ib_device, "Interface trace stopped."); + break; + case 0x14: /* first error capture info available */ + ehca_info(&shca->ib_device, "First error capture available"); + break; + case 0x15: /* SRQ limit reached */ + qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0); + break; + default: + ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.", + identifier, shca->ib_device.name); + break; + } + + return; +} + +static void dispatch_port_event(struct ehca_shca *shca, int port_num, + enum ib_event_type type, const char *msg) +{ + struct ib_event event; + + ehca_info(&shca->ib_device, "port %d %s.", port_num, msg); + event.device = &shca->ib_device; + event.event = type; + event.element.port_num = port_num; + ib_dispatch_event(&event); +} + +static void notify_port_conf_change(struct ehca_shca *shca, int port_num) +{ + struct ehca_sma_attr new_attr; + struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr; + + ehca_query_sma_attr(shca, port_num, &new_attr); + + if (new_attr.sm_sl != old_attr->sm_sl || + new_attr.sm_lid != old_attr->sm_lid) + dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE, + "SM changed"); + + if (new_attr.lid != old_attr->lid || + new_attr.lmc != old_attr->lmc) + dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE, + "LID changed"); + + if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len || + memcmp(new_attr.pkeys, old_attr->pkeys, + sizeof(u16) * new_attr.pkey_tbl_len)) + dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE, + "P_Key changed"); + + *old_attr = new_attr; +} + +/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */ +static int replay_modify_qp(struct ehca_sport *sport) +{ + int aqp1_destroyed; + unsigned long flags; + + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + + aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI]; + + if (sport->ibqp_sqp[IB_QPT_SMI]) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); + if (!aqp1_destroyed) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); + + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + + return aqp1_destroyed; +} + +static void parse_ec(struct ehca_shca *shca, u64 eqe) +{ + u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); + u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); + u8 spec_event; + struct ehca_sport *sport = &shca->sport[port - 1]; + + switch (ec) { + case 0x30: /* port availability change */ + if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { + /* only replay modify_qp calls in autodetect mode; + * if AQP1 was destroyed, the port is already down + * again and we can drop the event. + */ + if (ehca_nr_ports < 0) + if (replay_modify_qp(sport)) + break; + + sport->port_state = IB_PORT_ACTIVE; + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + ehca_query_sma_attr(shca, port, &sport->saved_attr); + } else { + sport->port_state = IB_PORT_DOWN; + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); + } + break; + case 0x31: + /* port configuration change + * disruptive change is caused by + * LID, PKEY or SM change + */ + if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) { + ehca_warn(&shca->ib_device, "disruptive port " + "%d configuration change", port); + + sport->port_state = IB_PORT_DOWN; + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); + + sport->port_state = IB_PORT_ACTIVE; + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + ehca_query_sma_attr(shca, port, + &sport->saved_attr); + } else + notify_port_conf_change(shca, port); + break; + case 0x32: /* adapter malfunction */ + ehca_err(&shca->ib_device, "Adapter malfunction."); + break; + case 0x33: /* trace stopped */ + ehca_err(&shca->ib_device, "Traced stopped."); + break; + case 0x34: /* util async event */ + spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe); + if (spec_event == 0x80) /* client reregister required */ + dispatch_port_event(shca, port, + IB_EVENT_CLIENT_REREGISTER, + "client reregister req."); + else + ehca_warn(&shca->ib_device, "Unknown util async " + "event %x on port %x", spec_event, port); + break; + default: + ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", + ec, shca->ib_device.name); + break; + } + + return; +} + +static inline void reset_eq_pending(struct ehca_cq *cq) +{ + u64 CQx_EP; + struct h_galpa gal = cq->galpas.kernel; + + hipz_galpa_store_cq(gal, cqx_ep, 0x0); + CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep)); + + return; +} + +irqreturn_t ehca_interrupt_neq(int irq, void *dev_id) +{ + struct ehca_shca *shca = (struct ehca_shca*)dev_id; + + tasklet_hi_schedule(&shca->neq.interrupt_task); + + return IRQ_HANDLED; +} + +void ehca_tasklet_neq(unsigned long data) +{ + struct ehca_shca *shca = (struct ehca_shca*)data; + struct ehca_eqe *eqe; + u64 ret; + + eqe = ehca_poll_eq(shca, &shca->neq); + + while (eqe) { + if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) + parse_ec(shca, eqe->entry); + + eqe = ehca_poll_eq(shca, &shca->neq); + } + + ret = hipz_h_reset_event(shca->ipz_hca_handle, + shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL); + + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, "Can't clear notification events."); + + return; +} + +irqreturn_t ehca_interrupt_eq(int irq, void *dev_id) +{ + struct ehca_shca *shca = (struct ehca_shca*)dev_id; + + tasklet_hi_schedule(&shca->eq.interrupt_task); + + return IRQ_HANDLED; +} + + +static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) +{ + u64 eqe_value; + u32 token; + struct ehca_cq *cq; + + eqe_value = eqe->entry; + ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value); + if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { + ehca_dbg(&shca->ib_device, "Got completion event"); + token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); + read_lock(&ehca_cq_idr_lock); + cq = idr_find(&ehca_cq_idr, token); + if (cq) + atomic_inc(&cq->nr_events); + read_unlock(&ehca_cq_idr_lock); + if (cq == NULL) { + ehca_err(&shca->ib_device, + "Invalid eqe for non-existing cq token=%x", + token); + return; + } + reset_eq_pending(cq); + if (ehca_scaling_code) + queue_comp_task(cq); + else { + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + } + } else { + ehca_dbg(&shca->ib_device, "Got non completion event"); + parse_identifier(shca, eqe_value); + } +} + +void ehca_process_eq(struct ehca_shca *shca, int is_irq) +{ + struct ehca_eq *eq = &shca->eq; + struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; + u64 eqe_value, ret; + int eqe_cnt, i; + int eq_empty = 0; + + spin_lock(&eq->irq_spinlock); + if (is_irq) { + const int max_query_cnt = 100; + int query_cnt = 0; + int int_state = 1; + do { + int_state = hipz_h_query_int_state( + shca->ipz_hca_handle, eq->ist); + query_cnt++; + iosync(); + } while (int_state && query_cnt < max_query_cnt); + if (unlikely((query_cnt == max_query_cnt))) + ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x", + int_state, query_cnt); + } + + /* read out all eqes */ + eqe_cnt = 0; + do { + u32 token; + eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq); + if (!eqe_cache[eqe_cnt].eqe) + break; + eqe_value = eqe_cache[eqe_cnt].eqe->entry; + if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { + token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); + read_lock(&ehca_cq_idr_lock); + eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); + if (eqe_cache[eqe_cnt].cq) + atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events); + read_unlock(&ehca_cq_idr_lock); + if (!eqe_cache[eqe_cnt].cq) { + ehca_err(&shca->ib_device, + "Invalid eqe for non-existing cq " + "token=%x", token); + continue; + } + } else + eqe_cache[eqe_cnt].cq = NULL; + eqe_cnt++; + } while (eqe_cnt < EHCA_EQE_CACHE_SIZE); + if (!eqe_cnt) { + if (is_irq) + ehca_dbg(&shca->ib_device, + "No eqe found for irq event"); + goto unlock_irq_spinlock; + } else if (!is_irq) { + ret = hipz_h_eoi(eq->ist); + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, + "bad return code EOI -rc = %lld\n", ret); + ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); + } + if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) + ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); + /* enable irq for new packets */ + for (i = 0; i < eqe_cnt; i++) { + if (eq->eqe_cache[i].cq) + reset_eq_pending(eq->eqe_cache[i].cq); + } + /* check eq */ + spin_lock(&eq->spinlock); + eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue)); + spin_unlock(&eq->spinlock); + /* call completion handler for cached eqes */ + for (i = 0; i < eqe_cnt; i++) + if (eq->eqe_cache[i].cq) { + if (ehca_scaling_code) + queue_comp_task(eq->eqe_cache[i].cq); + else { + struct ehca_cq *cq = eq->eqe_cache[i].cq; + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + } + } else { + ehca_dbg(&shca->ib_device, "Got non completion event"); + parse_identifier(shca, eq->eqe_cache[i].eqe->entry); + } + /* poll eq if not empty */ + if (eq_empty) + goto unlock_irq_spinlock; + do { + struct ehca_eqe *eqe; + eqe = ehca_poll_eq(shca, &shca->eq); + if (!eqe) + break; + process_eqe(shca, eqe); + } while (1); + +unlock_irq_spinlock: + spin_unlock(&eq->irq_spinlock); +} + +void ehca_tasklet_eq(unsigned long data) +{ + ehca_process_eq((struct ehca_shca*)data, 1); +} + +static int find_next_online_cpu(struct ehca_comp_pool *pool) +{ + int cpu; + unsigned long flags; + + WARN_ON_ONCE(!in_interrupt()); + if (ehca_debug_level >= 3) + ehca_dmp(cpu_online_mask, cpumask_size(), ""); + + spin_lock_irqsave(&pool->last_cpu_lock, flags); + do { + cpu = cpumask_next(pool->last_cpu, cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(cpu_online_mask); + pool->last_cpu = cpu; + } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active); + spin_unlock_irqrestore(&pool->last_cpu_lock, flags); + + return cpu; +} + +static void __queue_comp_task(struct ehca_cq *__cq, + struct ehca_cpu_comp_task *cct, + struct task_struct *thread) +{ + unsigned long flags; + + spin_lock_irqsave(&cct->task_lock, flags); + spin_lock(&__cq->task_lock); + + if (__cq->nr_callbacks == 0) { + __cq->nr_callbacks++; + list_add_tail(&__cq->entry, &cct->cq_list); + cct->cq_jobs++; + wake_up_process(thread); + } else + __cq->nr_callbacks++; + + spin_unlock(&__cq->task_lock); + spin_unlock_irqrestore(&cct->task_lock, flags); +} + +static void queue_comp_task(struct ehca_cq *__cq) +{ + int cpu_id; + struct ehca_cpu_comp_task *cct; + struct task_struct *thread; + int cq_jobs; + unsigned long flags; + + cpu_id = find_next_online_cpu(pool); + BUG_ON(!cpu_online(cpu_id)); + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); + + spin_lock_irqsave(&cct->task_lock, flags); + cq_jobs = cct->cq_jobs; + spin_unlock_irqrestore(&cct->task_lock, flags); + if (cq_jobs > 0) { + cpu_id = find_next_online_cpu(pool); + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); + } + __queue_comp_task(__cq, cct, thread); +} + +static void run_comp_task(struct ehca_cpu_comp_task *cct) +{ + struct ehca_cq *cq; + + while (!list_empty(&cct->cq_list)) { + cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); + spin_unlock_irq(&cct->task_lock); + + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + + spin_lock_irq(&cct->task_lock); + spin_lock(&cq->task_lock); + cq->nr_callbacks--; + if (!cq->nr_callbacks) { + list_del_init(cct->cq_list.next); + cct->cq_jobs--; + } + spin_unlock(&cq->task_lock); + } +} + +static void comp_task_park(unsigned int cpu) +{ + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + struct ehca_cpu_comp_task *target; + struct task_struct *thread; + struct ehca_cq *cq, *tmp; + LIST_HEAD(list); + + spin_lock_irq(&cct->task_lock); + cct->cq_jobs = 0; + cct->active = 0; + list_splice_init(&cct->cq_list, &list); + spin_unlock_irq(&cct->task_lock); + + cpu = find_next_online_cpu(pool); + target = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu); + spin_lock_irq(&target->task_lock); + list_for_each_entry_safe(cq, tmp, &list, entry) { + list_del(&cq->entry); + __queue_comp_task(cq, target, thread); + } + spin_unlock_irq(&target->task_lock); +} + +static void comp_task_stop(unsigned int cpu, bool online) +{ + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + + spin_lock_irq(&cct->task_lock); + cct->cq_jobs = 0; + cct->active = 0; + WARN_ON(!list_empty(&cct->cq_list)); + spin_unlock_irq(&cct->task_lock); +} + +static int comp_task_should_run(unsigned int cpu) +{ + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + + return cct->cq_jobs; +} + +static void comp_task(unsigned int cpu) +{ + struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks); + int cql_empty; + + spin_lock_irq(&cct->task_lock); + cql_empty = list_empty(&cct->cq_list); + if (!cql_empty) { + __set_current_state(TASK_RUNNING); + run_comp_task(cct); + } + spin_unlock_irq(&cct->task_lock); +} + +static struct smp_hotplug_thread comp_pool_threads = { + .thread_should_run = comp_task_should_run, + .thread_fn = comp_task, + .thread_comm = "ehca_comp/%u", + .cleanup = comp_task_stop, + .park = comp_task_park, +}; + +int ehca_create_comp_pool(void) +{ + int cpu, ret = -ENOMEM; + + if (!ehca_scaling_code) + return 0; + + pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL); + if (pool == NULL) + return -ENOMEM; + + spin_lock_init(&pool->last_cpu_lock); + pool->last_cpu = cpumask_any(cpu_online_mask); + + pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); + if (!pool->cpu_comp_tasks) + goto out_pool; + + pool->cpu_comp_threads = alloc_percpu(struct task_struct *); + if (!pool->cpu_comp_threads) + goto out_tasks; + + for_each_present_cpu(cpu) { + struct ehca_cpu_comp_task *cct; + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + spin_lock_init(&cct->task_lock); + INIT_LIST_HEAD(&cct->cq_list); + } + + comp_pool_threads.store = pool->cpu_comp_threads; + ret = smpboot_register_percpu_thread(&comp_pool_threads); + if (ret) + goto out_threads; + + pr_info("eHCA scaling code enabled\n"); + return ret; + +out_threads: + free_percpu(pool->cpu_comp_threads); +out_tasks: + free_percpu(pool->cpu_comp_tasks); +out_pool: + kfree(pool); + return ret; +} + +void ehca_destroy_comp_pool(void) +{ + if (!ehca_scaling_code) + return; + + smpboot_unregister_percpu_thread(&comp_pool_threads); + + free_percpu(pool->cpu_comp_threads); + free_percpu(pool->cpu_comp_tasks); + kfree(pool); +} |