diff options
author | José Pekkarinen <jose.pekkarinen@nokia.com> | 2016-04-11 10:41:07 +0300 |
---|---|---|
committer | José Pekkarinen <jose.pekkarinen@nokia.com> | 2016-04-13 08:17:18 +0300 |
commit | e09b41010ba33a20a87472ee821fa407a5b8da36 (patch) | |
tree | d10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/drivers/misc | |
parent | f93b97fd65072de626c074dbe099a1fff05ce060 (diff) |
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page.
During the rebasing, the following patch collided:
Force tick interrupt and get rid of softirq magic(I70131fb85).
Collisions have been removed because its logic was found on the
source already.
Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769
Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/drivers/misc')
151 files changed, 21335 insertions, 6610 deletions
diff --git a/kernel/drivers/misc/Kconfig b/kernel/drivers/misc/Kconfig index 453a61660..3540f684f 100644 --- a/kernel/drivers/misc/Kconfig +++ b/kernel/drivers/misc/Kconfig @@ -309,6 +309,16 @@ config HP_ILO To compile this driver as a module, choose M here: the module will be called hpilo. +config QCOM_COINCELL + tristate "Qualcomm coincell charger support" + depends on MFD_SPMI_PMIC || COMPILE_TEST + help + This driver supports the coincell block found inside of + Qualcomm PMICs. The coincell charger provides a means to + charge a coincell battery or backup capacitor which is used + to maintain PMIC register and RTC state in the absence of + external power. + config SGI_GRU tristate "SGI GRU driver" depends on X86_UV && SMP @@ -442,7 +452,7 @@ config TI_DAC7512 config VMWARE_BALLOON tristate "VMware Balloon Driver" - depends on X86 && HYPERVISOR_GUEST + depends on VMWARE_VMCI && X86 && HYPERVISOR_GUEST help This is VMware physical memory management driver which acts like a "balloon" that can be inflated to reclaim physical pages @@ -558,7 +568,6 @@ source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig" source "drivers/misc/ti-st/Kconfig" source "drivers/misc/lis3lv02d/Kconfig" -source "drivers/misc/carma/Kconfig" source "drivers/misc/altera-stapl/Kconfig" source "drivers/misc/mei/Kconfig" source "drivers/misc/vmw_vmci/Kconfig" diff --git a/kernel/drivers/misc/Makefile b/kernel/drivers/misc/Makefile index 6a8e39388..ec4aecba0 100644 --- a/kernel/drivers/misc/Makefile +++ b/kernel/drivers/misc/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_LKDTM) += lkdtm.o obj-$(CONFIG_TIFM_CORE) += tifm_core.o obj-$(CONFIG_TIFM_7XX1) += tifm_7xx1.o obj-$(CONFIG_PHANTOM) += phantom.o +obj-$(CONFIG_QCOM_COINCELL) += qcom-coincell.o obj-$(CONFIG_SENSORS_BH1780) += bh1780gli.o obj-$(CONFIG_SENSORS_BH1770) += bh1770glc.o obj-$(CONFIG_SENSORS_APDS990X) += apds990x.o @@ -45,7 +46,6 @@ obj-$(CONFIG_ARM_CHARLCD) += arm-charlcd.o obj-$(CONFIG_PCH_PHUB) += pch_phub.o obj-y += ti-st/ obj-y += lis3lv02d/ -obj-y += carma/ obj-$(CONFIG_USB_SWITCH_FSA9480) += fsa9480.o obj-$(CONFIG_ALTERA_STAPL) +=altera-stapl/ obj-$(CONFIG_INTEL_MEI) += mei/ diff --git a/kernel/drivers/misc/ad525x_dpot-i2c.c b/kernel/drivers/misc/ad525x_dpot-i2c.c index 705b881e1..4f832002d 100644 --- a/kernel/drivers/misc/ad525x_dpot-i2c.c +++ b/kernel/drivers/misc/ad525x_dpot-i2c.c @@ -106,7 +106,6 @@ MODULE_DEVICE_TABLE(i2c, ad_dpot_id); static struct i2c_driver ad_dpot_i2c_driver = { .driver = { .name = "ad_dpot", - .owner = THIS_MODULE, }, .probe = ad_dpot_i2c_probe, .remove = ad_dpot_i2c_remove, @@ -118,4 +117,3 @@ module_i2c_driver(ad_dpot_i2c_driver); MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>"); MODULE_DESCRIPTION("digital potentiometer I2C bus driver"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("i2c:ad_dpot"); diff --git a/kernel/drivers/misc/ad525x_dpot-spi.c b/kernel/drivers/misc/ad525x_dpot-spi.c index f4c82eafa..39a7f517e 100644 --- a/kernel/drivers/misc/ad525x_dpot-spi.c +++ b/kernel/drivers/misc/ad525x_dpot-spi.c @@ -132,7 +132,6 @@ MODULE_DEVICE_TABLE(spi, ad_dpot_spi_id); static struct spi_driver ad_dpot_spi_driver = { .driver = { .name = "ad_dpot", - .owner = THIS_MODULE, }, .probe = ad_dpot_spi_probe, .remove = ad_dpot_spi_remove, diff --git a/kernel/drivers/misc/altera-stapl/altera.c b/kernel/drivers/misc/altera-stapl/altera.c index bca2630d0..f53e217e9 100644 --- a/kernel/drivers/misc/altera-stapl/altera.c +++ b/kernel/drivers/misc/altera-stapl/altera.c @@ -2451,7 +2451,7 @@ int altera_init(struct altera_config *config, const struct firmware *fw) astate->config = config; if (!astate->config->jtag_io) { - dprintk(KERN_INFO "%s: using byteblaster!\n", __func__); + dprintk("%s: using byteblaster!\n", __func__); astate->config->jtag_io = netup_jtag_io_lpt; } diff --git a/kernel/drivers/misc/apds990x.c b/kernel/drivers/misc/apds990x.c index 3739ffa9c..a3e789b85 100644 --- a/kernel/drivers/misc/apds990x.c +++ b/kernel/drivers/misc/apds990x.c @@ -1275,7 +1275,6 @@ static const struct dev_pm_ops apds990x_pm_ops = { static struct i2c_driver apds990x_driver = { .driver = { .name = "apds990x", - .owner = THIS_MODULE, .pm = &apds990x_pm_ops, }, .probe = apds990x_probe, diff --git a/kernel/drivers/misc/atmel_tclib.c b/kernel/drivers/misc/atmel_tclib.c index 0ca05c3ec..ac24a4bd6 100644 --- a/kernel/drivers/misc/atmel_tclib.c +++ b/kernel/drivers/misc/atmel_tclib.c @@ -125,6 +125,10 @@ static int __init tc_probe(struct platform_device *pdev) if (IS_ERR(clk)) return PTR_ERR(clk); + tc->slow_clk = devm_clk_get(&pdev->dev, "slow_clk"); + if (IS_ERR(tc->slow_clk)) + return PTR_ERR(tc->slow_clk); + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); tc->regs = devm_ioremap_resource(&pdev->dev, r); if (IS_ERR(tc->regs)) diff --git a/kernel/drivers/misc/bh1770glc.c b/kernel/drivers/misc/bh1770glc.c index b756381b8..753d7ecda 100644 --- a/kernel/drivers/misc/bh1770glc.c +++ b/kernel/drivers/misc/bh1770glc.c @@ -1396,7 +1396,6 @@ static const struct dev_pm_ops bh1770_pm_ops = { static struct i2c_driver bh1770_driver = { .driver = { .name = "bh1770glc", - .owner = THIS_MODULE, .pm = &bh1770_pm_ops, }, .probe = bh1770_probe, diff --git a/kernel/drivers/misc/bmp085-i2c.c b/kernel/drivers/misc/bmp085-i2c.c index a7c16295b..f35c218aa 100644 --- a/kernel/drivers/misc/bmp085-i2c.c +++ b/kernel/drivers/misc/bmp085-i2c.c @@ -66,7 +66,6 @@ MODULE_DEVICE_TABLE(i2c, bmp085_id); static struct i2c_driver bmp085_i2c_driver = { .driver = { - .owner = THIS_MODULE, .name = BMP085_NAME, }, .id_table = bmp085_id, diff --git a/kernel/drivers/misc/bmp085-spi.c b/kernel/drivers/misc/bmp085-spi.c index 864ecac32..17ecbf95f 100644 --- a/kernel/drivers/misc/bmp085-spi.c +++ b/kernel/drivers/misc/bmp085-spi.c @@ -64,7 +64,6 @@ MODULE_DEVICE_TABLE(spi, bmp085_id); static struct spi_driver bmp085_spi_driver = { .driver = { - .owner = THIS_MODULE, .name = BMP085_NAME, .of_match_table = bmp085_of_match }, diff --git a/kernel/drivers/misc/c2port/core.c b/kernel/drivers/misc/c2port/core.c index 464419b36..cc8645b53 100644 --- a/kernel/drivers/misc/c2port/core.c +++ b/kernel/drivers/misc/c2port/core.c @@ -926,7 +926,7 @@ struct c2port_device *c2port_device_register(char *name, c2dev->dev = device_create(c2port_class, NULL, 0, c2dev, "c2port%d", c2dev->id); - if (unlikely(IS_ERR(c2dev->dev))) { + if (IS_ERR(c2dev->dev)) { ret = PTR_ERR(c2dev->dev); goto error_device_create; } diff --git a/kernel/drivers/misc/carma/Kconfig b/kernel/drivers/misc/carma/Kconfig deleted file mode 100644 index 295882bfb..000000000 --- a/kernel/drivers/misc/carma/Kconfig +++ /dev/null @@ -1,15 +0,0 @@ -config CARMA_FPGA - tristate "CARMA DATA-FPGA Access Driver" - depends on FSL_SOC && PPC_83xx && HAS_DMA && FSL_DMA - default n - help - Say Y here to include support for communicating with the data - processing FPGAs on the OVRO CARMA board. - -config CARMA_FPGA_PROGRAM - tristate "CARMA DATA-FPGA Programmer" - depends on FSL_SOC && PPC_83xx && HAS_DMA && FSL_DMA - default n - help - Say Y here to include support for programming the data processing - FPGAs on the OVRO CARMA board. diff --git a/kernel/drivers/misc/carma/Makefile b/kernel/drivers/misc/carma/Makefile deleted file mode 100644 index ff36ac2ce..000000000 --- a/kernel/drivers/misc/carma/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -obj-$(CONFIG_CARMA_FPGA) += carma-fpga.o -obj-$(CONFIG_CARMA_FPGA_PROGRAM) += carma-fpga-program.o diff --git a/kernel/drivers/misc/carma/carma-fpga-program.c b/kernel/drivers/misc/carma/carma-fpga-program.c deleted file mode 100644 index 0b1bd85e4..000000000 --- a/kernel/drivers/misc/carma/carma-fpga-program.c +++ /dev/null @@ -1,1182 +0,0 @@ -/* - * CARMA Board DATA-FPGA Programmer - * - * Copyright (c) 2009-2011 Ira W. Snyder <iws@ovro.caltech.edu> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/dma-mapping.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> -#include <linux/of_platform.h> -#include <linux/completion.h> -#include <linux/miscdevice.h> -#include <linux/dmaengine.h> -#include <linux/fsldma.h> -#include <linux/interrupt.h> -#include <linux/highmem.h> -#include <linux/vmalloc.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/mutex.h> -#include <linux/delay.h> -#include <linux/init.h> -#include <linux/leds.h> -#include <linux/slab.h> -#include <linux/kref.h> -#include <linux/fs.h> -#include <linux/io.h> - -/* MPC8349EMDS specific get_immrbase() */ -#include <sysdev/fsl_soc.h> - -static const char drv_name[] = "carma-fpga-program"; - -/* - * Firmware images are always this exact size - * - * 12849552 bytes for a CARMA Digitizer Board (EP2S90 FPGAs) - * 18662880 bytes for a CARMA Correlator Board (EP2S130 FPGAs) - */ -#define FW_SIZE_EP2S90 12849552 -#define FW_SIZE_EP2S130 18662880 - -struct fpga_dev { - struct miscdevice miscdev; - - /* Reference count */ - struct kref ref; - - /* Device Registers */ - struct device *dev; - void __iomem *regs; - void __iomem *immr; - - /* Freescale DMA Device */ - struct dma_chan *chan; - - /* Interrupts */ - int irq, status; - struct completion completion; - - /* FPGA Bitfile */ - struct mutex lock; - - void *vaddr; - struct scatterlist *sglist; - int sglen; - int nr_pages; - bool buf_allocated; - - /* max size and written bytes */ - size_t fw_size; - size_t bytes; -}; - -static int fpga_dma_init(struct fpga_dev *priv, int nr_pages) -{ - struct page *pg; - int i; - - priv->vaddr = vmalloc_32(nr_pages << PAGE_SHIFT); - if (NULL == priv->vaddr) { - pr_debug("vmalloc_32(%d pages) failed\n", nr_pages); - return -ENOMEM; - } - - pr_debug("vmalloc is at addr 0x%08lx, size=%d\n", - (unsigned long)priv->vaddr, - nr_pages << PAGE_SHIFT); - - memset(priv->vaddr, 0, nr_pages << PAGE_SHIFT); - priv->nr_pages = nr_pages; - - priv->sglist = vzalloc(priv->nr_pages * sizeof(*priv->sglist)); - if (NULL == priv->sglist) - goto vzalloc_err; - - sg_init_table(priv->sglist, priv->nr_pages); - for (i = 0; i < priv->nr_pages; i++) { - pg = vmalloc_to_page(priv->vaddr + i * PAGE_SIZE); - if (NULL == pg) - goto vmalloc_to_page_err; - sg_set_page(&priv->sglist[i], pg, PAGE_SIZE, 0); - } - return 0; - -vmalloc_to_page_err: - vfree(priv->sglist); - priv->sglist = NULL; -vzalloc_err: - vfree(priv->vaddr); - priv->vaddr = NULL; - return -ENOMEM; -} - -static int fpga_dma_map(struct fpga_dev *priv) -{ - priv->sglen = dma_map_sg(priv->dev, priv->sglist, - priv->nr_pages, DMA_TO_DEVICE); - - if (0 == priv->sglen) { - pr_warn("%s: dma_map_sg failed\n", __func__); - return -ENOMEM; - } - return 0; -} - -static int fpga_dma_unmap(struct fpga_dev *priv) -{ - if (!priv->sglen) - return 0; - - dma_unmap_sg(priv->dev, priv->sglist, priv->sglen, DMA_TO_DEVICE); - priv->sglen = 0; - return 0; -} - -/* - * FPGA Bitfile Helpers - */ - -/** - * fpga_drop_firmware_data() - drop the bitfile image from memory - * @priv: the driver's private data structure - * - * LOCKING: must hold priv->lock - */ -static void fpga_drop_firmware_data(struct fpga_dev *priv) -{ - vfree(priv->sglist); - vfree(priv->vaddr); - priv->buf_allocated = false; - priv->bytes = 0; -} - -/* - * Private Data Reference Count - */ - -static void fpga_dev_remove(struct kref *ref) -{ - struct fpga_dev *priv = container_of(ref, struct fpga_dev, ref); - - /* free any firmware image that was not programmed */ - fpga_drop_firmware_data(priv); - - mutex_destroy(&priv->lock); - kfree(priv); -} - -/* - * LED Trigger (could be a seperate module) - */ - -/* - * NOTE: this whole thing does have the problem that whenever the led's are - * NOTE: first set to use the fpga trigger, they could be in the wrong state - */ - -DEFINE_LED_TRIGGER(ledtrig_fpga); - -static void ledtrig_fpga_programmed(bool enabled) -{ - if (enabled) - led_trigger_event(ledtrig_fpga, LED_FULL); - else - led_trigger_event(ledtrig_fpga, LED_OFF); -} - -/* - * FPGA Register Helpers - */ - -/* Register Definitions */ -#define FPGA_CONFIG_CONTROL 0x40 -#define FPGA_CONFIG_STATUS 0x44 -#define FPGA_CONFIG_FIFO_SIZE 0x48 -#define FPGA_CONFIG_FIFO_USED 0x4C -#define FPGA_CONFIG_TOTAL_BYTE_COUNT 0x50 -#define FPGA_CONFIG_CUR_BYTE_COUNT 0x54 - -#define FPGA_FIFO_ADDRESS 0x3000 - -static int fpga_fifo_size(void __iomem *regs) -{ - return ioread32be(regs + FPGA_CONFIG_FIFO_SIZE); -} - -#define CFG_STATUS_ERR_MASK 0xfffe - -static int fpga_config_error(void __iomem *regs) -{ - return ioread32be(regs + FPGA_CONFIG_STATUS) & CFG_STATUS_ERR_MASK; -} - -static int fpga_fifo_empty(void __iomem *regs) -{ - return ioread32be(regs + FPGA_CONFIG_FIFO_USED) == 0; -} - -static void fpga_fifo_write(void __iomem *regs, u32 val) -{ - iowrite32be(val, regs + FPGA_FIFO_ADDRESS); -} - -static void fpga_set_byte_count(void __iomem *regs, u32 count) -{ - iowrite32be(count, regs + FPGA_CONFIG_TOTAL_BYTE_COUNT); -} - -#define CFG_CTL_ENABLE (1 << 0) -#define CFG_CTL_RESET (1 << 1) -#define CFG_CTL_DMA (1 << 2) - -static void fpga_programmer_enable(struct fpga_dev *priv, bool dma) -{ - u32 val; - - val = (dma) ? (CFG_CTL_ENABLE | CFG_CTL_DMA) : CFG_CTL_ENABLE; - iowrite32be(val, priv->regs + FPGA_CONFIG_CONTROL); -} - -static void fpga_programmer_disable(struct fpga_dev *priv) -{ - iowrite32be(0x0, priv->regs + FPGA_CONFIG_CONTROL); -} - -static void fpga_dump_registers(struct fpga_dev *priv) -{ - u32 control, status, size, used, total, curr; - - /* good status: do nothing */ - if (priv->status == 0) - return; - - /* Dump all status registers */ - control = ioread32be(priv->regs + FPGA_CONFIG_CONTROL); - status = ioread32be(priv->regs + FPGA_CONFIG_STATUS); - size = ioread32be(priv->regs + FPGA_CONFIG_FIFO_SIZE); - used = ioread32be(priv->regs + FPGA_CONFIG_FIFO_USED); - total = ioread32be(priv->regs + FPGA_CONFIG_TOTAL_BYTE_COUNT); - curr = ioread32be(priv->regs + FPGA_CONFIG_CUR_BYTE_COUNT); - - dev_err(priv->dev, "Configuration failed, dumping status registers\n"); - dev_err(priv->dev, "Control: 0x%.8x\n", control); - dev_err(priv->dev, "Status: 0x%.8x\n", status); - dev_err(priv->dev, "FIFO Size: 0x%.8x\n", size); - dev_err(priv->dev, "FIFO Used: 0x%.8x\n", used); - dev_err(priv->dev, "FIFO Total: 0x%.8x\n", total); - dev_err(priv->dev, "FIFO Curr: 0x%.8x\n", curr); -} - -/* - * FPGA Power Supply Code - */ - -#define CTL_PWR_CONTROL 0x2006 -#define CTL_PWR_STATUS 0x200A -#define CTL_PWR_FAIL 0x200B - -#define PWR_CONTROL_ENABLE 0x01 - -#define PWR_STATUS_ERROR_MASK 0x10 -#define PWR_STATUS_GOOD 0x0f - -/* - * Determine if the FPGA power is good for all supplies - */ -static bool fpga_power_good(struct fpga_dev *priv) -{ - u8 val; - - val = ioread8(priv->regs + CTL_PWR_STATUS); - if (val & PWR_STATUS_ERROR_MASK) - return false; - - return val == PWR_STATUS_GOOD; -} - -/* - * Disable the FPGA power supplies - */ -static void fpga_disable_power_supplies(struct fpga_dev *priv) -{ - unsigned long start; - u8 val; - - iowrite8(0x0, priv->regs + CTL_PWR_CONTROL); - - /* - * Wait 500ms for the power rails to discharge - * - * Without this delay, the CTL-CPLD state machine can get into a - * state where it is waiting for the power-goods to assert, but they - * never do. This only happens when enabling and disabling the - * power sequencer very rapidly. - * - * The loop below will also wait for the power goods to de-assert, - * but testing has shown that they are always disabled by the time - * the sleep completes. However, omitting the sleep and only waiting - * for the power-goods to de-assert was not sufficient to ensure - * that the power sequencer would not wedge itself. - */ - msleep(500); - - start = jiffies; - while (time_before(jiffies, start + HZ)) { - val = ioread8(priv->regs + CTL_PWR_STATUS); - if (!(val & PWR_STATUS_GOOD)) - break; - - usleep_range(5000, 10000); - } - - val = ioread8(priv->regs + CTL_PWR_STATUS); - if (val & PWR_STATUS_GOOD) { - dev_err(priv->dev, "power disable failed: " - "power goods: status 0x%.2x\n", val); - } - - if (val & PWR_STATUS_ERROR_MASK) { - dev_err(priv->dev, "power disable failed: " - "alarm bit set: status 0x%.2x\n", val); - } -} - -/** - * fpga_enable_power_supplies() - enable the DATA-FPGA power supplies - * @priv: the driver's private data structure - * - * Enable the DATA-FPGA power supplies, waiting up to 1 second for - * them to enable successfully. - * - * Returns 0 on success, -ERRNO otherwise - */ -static int fpga_enable_power_supplies(struct fpga_dev *priv) -{ - unsigned long start = jiffies; - - if (fpga_power_good(priv)) { - dev_dbg(priv->dev, "power was already good\n"); - return 0; - } - - iowrite8(PWR_CONTROL_ENABLE, priv->regs + CTL_PWR_CONTROL); - while (time_before(jiffies, start + HZ)) { - if (fpga_power_good(priv)) - return 0; - - usleep_range(5000, 10000); - } - - return fpga_power_good(priv) ? 0 : -ETIMEDOUT; -} - -/* - * Determine if the FPGA power supplies are all enabled - */ -static bool fpga_power_enabled(struct fpga_dev *priv) -{ - u8 val; - - val = ioread8(priv->regs + CTL_PWR_CONTROL); - if (val & PWR_CONTROL_ENABLE) - return true; - - return false; -} - -/* - * Determine if the FPGA's are programmed and running correctly - */ -static bool fpga_running(struct fpga_dev *priv) -{ - if (!fpga_power_good(priv)) - return false; - - /* Check the config done bit */ - return ioread32be(priv->regs + FPGA_CONFIG_STATUS) & (1 << 18); -} - -/* - * FPGA Programming Code - */ - -/** - * fpga_program_block() - put a block of data into the programmer's FIFO - * @priv: the driver's private data structure - * @buf: the data to program - * @count: the length of data to program (must be a multiple of 4 bytes) - * - * Returns 0 on success, -ERRNO otherwise - */ -static int fpga_program_block(struct fpga_dev *priv, void *buf, size_t count) -{ - u32 *data = buf; - int size = fpga_fifo_size(priv->regs); - int i, len; - unsigned long timeout; - - /* enforce correct data length for the FIFO */ - BUG_ON(count % 4 != 0); - - while (count > 0) { - - /* Get the size of the block to write (maximum is FIFO_SIZE) */ - len = min_t(size_t, count, size); - timeout = jiffies + HZ / 4; - - /* Write the block */ - for (i = 0; i < len / 4; i++) - fpga_fifo_write(priv->regs, data[i]); - - /* Update the amounts left */ - count -= len; - data += len / 4; - - /* Wait for the fifo to empty */ - while (true) { - - if (fpga_fifo_empty(priv->regs)) { - break; - } else { - dev_dbg(priv->dev, "Fifo not empty\n"); - cpu_relax(); - } - - if (fpga_config_error(priv->regs)) { - dev_err(priv->dev, "Error detected\n"); - return -EIO; - } - - if (time_after(jiffies, timeout)) { - dev_err(priv->dev, "Fifo drain timeout\n"); - return -ETIMEDOUT; - } - - usleep_range(5000, 10000); - } - } - - return 0; -} - -/** - * fpga_program_cpu() - program the DATA-FPGA's using the CPU - * @priv: the driver's private data structure - * - * This is useful when the DMA programming method fails. It is possible to - * wedge the Freescale DMA controller such that the DMA programming method - * always fails. This method has always succeeded. - * - * Returns 0 on success, -ERRNO otherwise - */ -static noinline int fpga_program_cpu(struct fpga_dev *priv) -{ - int ret; - unsigned long timeout; - - /* Disable the programmer */ - fpga_programmer_disable(priv); - - /* Set the total byte count */ - fpga_set_byte_count(priv->regs, priv->bytes); - dev_dbg(priv->dev, "total byte count %u bytes\n", priv->bytes); - - /* Enable the controller for programming */ - fpga_programmer_enable(priv, false); - dev_dbg(priv->dev, "enabled the controller\n"); - - /* Write each chunk of the FPGA bitfile to FPGA programmer */ - ret = fpga_program_block(priv, priv->vaddr, priv->bytes); - if (ret) - goto out_disable_controller; - - /* Wait for the interrupt handler to signal that programming finished */ - timeout = wait_for_completion_timeout(&priv->completion, 2 * HZ); - if (!timeout) { - dev_err(priv->dev, "Timed out waiting for completion\n"); - ret = -ETIMEDOUT; - goto out_disable_controller; - } - - /* Retrieve the status from the interrupt handler */ - ret = priv->status; - -out_disable_controller: - fpga_programmer_disable(priv); - return ret; -} - -#define FIFO_DMA_ADDRESS 0xf0003000 -#define FIFO_MAX_LEN 4096 - -/** - * fpga_program_dma() - program the DATA-FPGA's using the DMA engine - * @priv: the driver's private data structure - * - * Program the DATA-FPGA's using the Freescale DMA engine. This requires that - * the engine is programmed such that the hardware DMA request lines can - * control the entire DMA transaction. The system controller FPGA then - * completely offloads the programming from the CPU. - * - * Returns 0 on success, -ERRNO otherwise - */ -static noinline int fpga_program_dma(struct fpga_dev *priv) -{ - struct dma_chan *chan = priv->chan; - struct dma_async_tx_descriptor *tx; - size_t num_pages, len, avail = 0; - struct dma_slave_config config; - struct scatterlist *sg; - struct sg_table table; - dma_cookie_t cookie; - int ret, i; - unsigned long timeout; - - /* Disable the programmer */ - fpga_programmer_disable(priv); - - /* Allocate a scatterlist for the DMA destination */ - num_pages = DIV_ROUND_UP(priv->bytes, FIFO_MAX_LEN); - ret = sg_alloc_table(&table, num_pages, GFP_KERNEL); - if (ret) { - dev_err(priv->dev, "Unable to allocate dst scatterlist\n"); - ret = -ENOMEM; - goto out_return; - } - - /* - * This is an ugly hack - * - * We fill in a scatterlist as if it were mapped for DMA. This is - * necessary because there exists no better structure for this - * inside the kernel code. - * - * As an added bonus, we can use the DMAEngine API for all of this, - * rather than inventing another extremely similar API. - */ - avail = priv->bytes; - for_each_sg(table.sgl, sg, num_pages, i) { - len = min_t(size_t, avail, FIFO_MAX_LEN); - sg_dma_address(sg) = FIFO_DMA_ADDRESS; - sg_dma_len(sg) = len; - - avail -= len; - } - - /* Map the buffer for DMA */ - ret = fpga_dma_map(priv); - if (ret) { - dev_err(priv->dev, "Unable to map buffer for DMA\n"); - goto out_free_table; - } - - /* - * Configure the DMA channel to transfer FIFO_SIZE / 2 bytes per - * transaction, and then put it under external control - */ - memset(&config, 0, sizeof(config)); - config.direction = DMA_MEM_TO_DEV; - config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - config.dst_maxburst = fpga_fifo_size(priv->regs) / 2 / 4; - ret = dmaengine_slave_config(chan, &config); - if (ret) { - dev_err(priv->dev, "DMA slave configuration failed\n"); - goto out_dma_unmap; - } - - ret = fsl_dma_external_start(chan, 1); - if (ret) { - dev_err(priv->dev, "DMA external control setup failed\n"); - goto out_dma_unmap; - } - - /* setup and submit the DMA transaction */ - - tx = dmaengine_prep_dma_sg(chan, table.sgl, num_pages, - priv->sglist, priv->sglen, 0); - if (!tx) { - dev_err(priv->dev, "Unable to prep DMA transaction\n"); - ret = -ENOMEM; - goto out_dma_unmap; - } - - cookie = tx->tx_submit(tx); - if (dma_submit_error(cookie)) { - dev_err(priv->dev, "Unable to submit DMA transaction\n"); - ret = -ENOMEM; - goto out_dma_unmap; - } - - dma_async_issue_pending(chan); - - /* Set the total byte count */ - fpga_set_byte_count(priv->regs, priv->bytes); - dev_dbg(priv->dev, "total byte count %u bytes\n", priv->bytes); - - /* Enable the controller for DMA programming */ - fpga_programmer_enable(priv, true); - dev_dbg(priv->dev, "enabled the controller\n"); - - /* Wait for the interrupt handler to signal that programming finished */ - timeout = wait_for_completion_timeout(&priv->completion, 2 * HZ); - if (!timeout) { - dev_err(priv->dev, "Timed out waiting for completion\n"); - ret = -ETIMEDOUT; - goto out_disable_controller; - } - - /* Retrieve the status from the interrupt handler */ - ret = priv->status; - -out_disable_controller: - fpga_programmer_disable(priv); -out_dma_unmap: - fpga_dma_unmap(priv); -out_free_table: - sg_free_table(&table); -out_return: - return ret; -} - -/* - * Interrupt Handling - */ - -static irqreturn_t fpga_irq(int irq, void *dev_id) -{ - struct fpga_dev *priv = dev_id; - - /* Save the status */ - priv->status = fpga_config_error(priv->regs) ? -EIO : 0; - dev_dbg(priv->dev, "INTERRUPT status %d\n", priv->status); - fpga_dump_registers(priv); - - /* Disabling the programmer clears the interrupt */ - fpga_programmer_disable(priv); - - /* Notify any waiters */ - complete(&priv->completion); - - return IRQ_HANDLED; -} - -/* - * SYSFS Helpers - */ - -/** - * fpga_do_stop() - deconfigure (reset) the DATA-FPGA's - * @priv: the driver's private data structure - * - * LOCKING: must hold priv->lock - */ -static int fpga_do_stop(struct fpga_dev *priv) -{ - u32 val; - - /* Set the led to unprogrammed */ - ledtrig_fpga_programmed(false); - - /* Pulse the config line to reset the FPGA's */ - val = CFG_CTL_ENABLE | CFG_CTL_RESET; - iowrite32be(val, priv->regs + FPGA_CONFIG_CONTROL); - iowrite32be(0x0, priv->regs + FPGA_CONFIG_CONTROL); - - return 0; -} - -static noinline int fpga_do_program(struct fpga_dev *priv) -{ - int ret; - - if (priv->bytes != priv->fw_size) { - dev_err(priv->dev, "Incorrect bitfile size: got %zu bytes, " - "should be %zu bytes\n", - priv->bytes, priv->fw_size); - return -EINVAL; - } - - if (!fpga_power_enabled(priv)) { - dev_err(priv->dev, "Power not enabled\n"); - return -EINVAL; - } - - if (!fpga_power_good(priv)) { - dev_err(priv->dev, "Power not good\n"); - return -EINVAL; - } - - /* Set the LED to unprogrammed */ - ledtrig_fpga_programmed(false); - - /* Try to program the FPGA's using DMA */ - ret = fpga_program_dma(priv); - - /* If DMA failed or doesn't exist, try with CPU */ - if (ret) { - dev_warn(priv->dev, "Falling back to CPU programming\n"); - ret = fpga_program_cpu(priv); - } - - if (ret) { - dev_err(priv->dev, "Unable to program FPGA's\n"); - return ret; - } - - /* Drop the firmware bitfile from memory */ - fpga_drop_firmware_data(priv); - - dev_dbg(priv->dev, "FPGA programming successful\n"); - ledtrig_fpga_programmed(true); - - return 0; -} - -/* - * File Operations - */ - -static int fpga_open(struct inode *inode, struct file *filp) -{ - /* - * The miscdevice layer puts our struct miscdevice into the - * filp->private_data field. We use this to find our private - * data and then overwrite it with our own private structure. - */ - struct fpga_dev *priv = container_of(filp->private_data, - struct fpga_dev, miscdev); - unsigned int nr_pages; - int ret; - - /* We only allow one process at a time */ - ret = mutex_lock_interruptible(&priv->lock); - if (ret) - return ret; - - filp->private_data = priv; - kref_get(&priv->ref); - - /* Truncation: drop any existing data */ - if (filp->f_flags & O_TRUNC) - priv->bytes = 0; - - /* Check if we have already allocated a buffer */ - if (priv->buf_allocated) - return 0; - - /* Allocate a buffer to hold enough data for the bitfile */ - nr_pages = DIV_ROUND_UP(priv->fw_size, PAGE_SIZE); - ret = fpga_dma_init(priv, nr_pages); - if (ret) { - dev_err(priv->dev, "unable to allocate data buffer\n"); - mutex_unlock(&priv->lock); - kref_put(&priv->ref, fpga_dev_remove); - return ret; - } - - priv->buf_allocated = true; - return 0; -} - -static int fpga_release(struct inode *inode, struct file *filp) -{ - struct fpga_dev *priv = filp->private_data; - - mutex_unlock(&priv->lock); - kref_put(&priv->ref, fpga_dev_remove); - return 0; -} - -static ssize_t fpga_write(struct file *filp, const char __user *buf, - size_t count, loff_t *f_pos) -{ - struct fpga_dev *priv = filp->private_data; - - /* FPGA bitfiles have an exact size: disallow anything else */ - if (priv->bytes >= priv->fw_size) - return -ENOSPC; - - count = min_t(size_t, priv->fw_size - priv->bytes, count); - if (copy_from_user(priv->vaddr + priv->bytes, buf, count)) - return -EFAULT; - - priv->bytes += count; - return count; -} - -static ssize_t fpga_read(struct file *filp, char __user *buf, size_t count, - loff_t *f_pos) -{ - struct fpga_dev *priv = filp->private_data; - return simple_read_from_buffer(buf, count, f_pos, - priv->vaddr, priv->bytes); -} - -static loff_t fpga_llseek(struct file *filp, loff_t offset, int origin) -{ - struct fpga_dev *priv = filp->private_data; - - /* only read-only opens are allowed to seek */ - if ((filp->f_flags & O_ACCMODE) != O_RDONLY) - return -EINVAL; - - return fixed_size_llseek(filp, offset, origin, priv->fw_size); -} - -static const struct file_operations fpga_fops = { - .open = fpga_open, - .release = fpga_release, - .write = fpga_write, - .read = fpga_read, - .llseek = fpga_llseek, -}; - -/* - * Device Attributes - */ - -static ssize_t pfail_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct fpga_dev *priv = dev_get_drvdata(dev); - u8 val; - - val = ioread8(priv->regs + CTL_PWR_FAIL); - return snprintf(buf, PAGE_SIZE, "0x%.2x\n", val); -} - -static ssize_t pgood_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct fpga_dev *priv = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", fpga_power_good(priv)); -} - -static ssize_t penable_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct fpga_dev *priv = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", fpga_power_enabled(priv)); -} - -static ssize_t penable_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct fpga_dev *priv = dev_get_drvdata(dev); - unsigned long val; - int ret; - - ret = kstrtoul(buf, 0, &val); - if (ret) - return ret; - - if (val) { - ret = fpga_enable_power_supplies(priv); - if (ret) - return ret; - } else { - fpga_do_stop(priv); - fpga_disable_power_supplies(priv); - } - - return count; -} - -static ssize_t program_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct fpga_dev *priv = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", fpga_running(priv)); -} - -static ssize_t program_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct fpga_dev *priv = dev_get_drvdata(dev); - unsigned long val; - int ret; - - ret = kstrtoul(buf, 0, &val); - if (ret) - return ret; - - /* We can't have an image writer and be programming simultaneously */ - if (mutex_lock_interruptible(&priv->lock)) - return -ERESTARTSYS; - - /* Program or Reset the FPGA's */ - ret = val ? fpga_do_program(priv) : fpga_do_stop(priv); - if (ret) - goto out_unlock; - - /* Success */ - ret = count; - -out_unlock: - mutex_unlock(&priv->lock); - return ret; -} - -static DEVICE_ATTR(power_fail, S_IRUGO, pfail_show, NULL); -static DEVICE_ATTR(power_good, S_IRUGO, pgood_show, NULL); -static DEVICE_ATTR(power_enable, S_IRUGO | S_IWUSR, - penable_show, penable_store); - -static DEVICE_ATTR(program, S_IRUGO | S_IWUSR, - program_show, program_store); - -static struct attribute *fpga_attributes[] = { - &dev_attr_power_fail.attr, - &dev_attr_power_good.attr, - &dev_attr_power_enable.attr, - &dev_attr_program.attr, - NULL, -}; - -static const struct attribute_group fpga_attr_group = { - .attrs = fpga_attributes, -}; - -/* - * OpenFirmware Device Subsystem - */ - -#define SYS_REG_VERSION 0x00 -#define SYS_REG_GEOGRAPHIC 0x10 - -static bool dma_filter(struct dma_chan *chan, void *data) -{ - /* - * DMA Channel #0 is the only acceptable device - * - * This probably won't survive an unload/load cycle of the Freescale - * DMAEngine driver, but that won't be a problem - */ - return chan->chan_id == 0 && chan->device->dev_id == 0; -} - -static int fpga_of_remove(struct platform_device *op) -{ - struct fpga_dev *priv = platform_get_drvdata(op); - struct device *this_device = priv->miscdev.this_device; - - sysfs_remove_group(&this_device->kobj, &fpga_attr_group); - misc_deregister(&priv->miscdev); - - free_irq(priv->irq, priv); - irq_dispose_mapping(priv->irq); - - /* make sure the power supplies are off */ - fpga_disable_power_supplies(priv); - - /* unmap registers */ - iounmap(priv->immr); - iounmap(priv->regs); - - dma_release_channel(priv->chan); - - /* drop our reference to the private data structure */ - kref_put(&priv->ref, fpga_dev_remove); - return 0; -} - -/* CTL-CPLD Version Register */ -#define CTL_CPLD_VERSION 0x2000 - -static int fpga_of_probe(struct platform_device *op) -{ - struct device_node *of_node = op->dev.of_node; - struct device *this_device; - struct fpga_dev *priv; - dma_cap_mask_t mask; - u32 ver; - int ret; - - /* Allocate private data */ - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) { - dev_err(&op->dev, "Unable to allocate private data\n"); - ret = -ENOMEM; - goto out_return; - } - - /* Setup the miscdevice */ - priv->miscdev.minor = MISC_DYNAMIC_MINOR; - priv->miscdev.name = drv_name; - priv->miscdev.fops = &fpga_fops; - - kref_init(&priv->ref); - - platform_set_drvdata(op, priv); - priv->dev = &op->dev; - mutex_init(&priv->lock); - init_completion(&priv->completion); - - dev_set_drvdata(priv->dev, priv); - dma_cap_zero(mask); - dma_cap_set(DMA_MEMCPY, mask); - dma_cap_set(DMA_SLAVE, mask); - dma_cap_set(DMA_SG, mask); - - /* Get control of DMA channel #0 */ - priv->chan = dma_request_channel(mask, dma_filter, NULL); - if (!priv->chan) { - dev_err(&op->dev, "Unable to acquire DMA channel #0\n"); - ret = -ENODEV; - goto out_free_priv; - } - - /* Remap the registers for use */ - priv->regs = of_iomap(of_node, 0); - if (!priv->regs) { - dev_err(&op->dev, "Unable to ioremap registers\n"); - ret = -ENOMEM; - goto out_dma_release_channel; - } - - /* Remap the IMMR for use */ - priv->immr = ioremap(get_immrbase(), 0x100000); - if (!priv->immr) { - dev_err(&op->dev, "Unable to ioremap IMMR\n"); - ret = -ENOMEM; - goto out_unmap_regs; - } - - /* - * Check that external DMA is configured - * - * U-Boot does this for us, but we should check it and bail out if - * there is a problem. Failing to have this register setup correctly - * will cause the DMA controller to transfer a single cacheline - * worth of data, then wedge itself. - */ - if ((ioread32be(priv->immr + 0x114) & 0xE00) != 0xE00) { - dev_err(&op->dev, "External DMA control not configured\n"); - ret = -ENODEV; - goto out_unmap_immr; - } - - /* - * Check the CTL-CPLD version - * - * This driver uses the CTL-CPLD DATA-FPGA power sequencer, and we - * don't want to run on any version of the CTL-CPLD that does not use - * a compatible register layout. - * - * v2: changed register layout, added power sequencer - * v3: added glitch filter on the i2c overcurrent/overtemp outputs - */ - ver = ioread8(priv->regs + CTL_CPLD_VERSION); - if (ver != 0x02 && ver != 0x03) { - dev_err(&op->dev, "CTL-CPLD is not version 0x02 or 0x03!\n"); - ret = -ENODEV; - goto out_unmap_immr; - } - - /* Set the exact size that the firmware image should be */ - ver = ioread32be(priv->regs + SYS_REG_VERSION); - priv->fw_size = (ver & (1 << 18)) ? FW_SIZE_EP2S130 : FW_SIZE_EP2S90; - - /* Find the correct IRQ number */ - priv->irq = irq_of_parse_and_map(of_node, 0); - if (priv->irq == NO_IRQ) { - dev_err(&op->dev, "Unable to find IRQ line\n"); - ret = -ENODEV; - goto out_unmap_immr; - } - - /* Request the IRQ */ - ret = request_irq(priv->irq, fpga_irq, IRQF_SHARED, drv_name, priv); - if (ret) { - dev_err(&op->dev, "Unable to request IRQ %d\n", priv->irq); - ret = -ENODEV; - goto out_irq_dispose_mapping; - } - - /* Reset and stop the FPGA's, just in case */ - fpga_do_stop(priv); - - /* Register the miscdevice */ - ret = misc_register(&priv->miscdev); - if (ret) { - dev_err(&op->dev, "Unable to register miscdevice\n"); - goto out_free_irq; - } - - /* Create the sysfs files */ - this_device = priv->miscdev.this_device; - dev_set_drvdata(this_device, priv); - ret = sysfs_create_group(&this_device->kobj, &fpga_attr_group); - if (ret) { - dev_err(&op->dev, "Unable to create sysfs files\n"); - goto out_misc_deregister; - } - - dev_info(priv->dev, "CARMA FPGA Programmer: %s rev%s with %s FPGAs\n", - (ver & (1 << 17)) ? "Correlator" : "Digitizer", - (ver & (1 << 16)) ? "B" : "A", - (ver & (1 << 18)) ? "EP2S130" : "EP2S90"); - - return 0; - -out_misc_deregister: - misc_deregister(&priv->miscdev); -out_free_irq: - free_irq(priv->irq, priv); -out_irq_dispose_mapping: - irq_dispose_mapping(priv->irq); -out_unmap_immr: - iounmap(priv->immr); -out_unmap_regs: - iounmap(priv->regs); -out_dma_release_channel: - dma_release_channel(priv->chan); -out_free_priv: - kref_put(&priv->ref, fpga_dev_remove); -out_return: - return ret; -} - -static const struct of_device_id fpga_of_match[] = { - { .compatible = "carma,fpga-programmer", }, - {}, -}; - -static struct platform_driver fpga_of_driver = { - .probe = fpga_of_probe, - .remove = fpga_of_remove, - .driver = { - .name = drv_name, - .of_match_table = fpga_of_match, - }, -}; - -/* - * Module Init / Exit - */ - -static int __init fpga_init(void) -{ - led_trigger_register_simple("fpga", &ledtrig_fpga); - return platform_driver_register(&fpga_of_driver); -} - -static void __exit fpga_exit(void) -{ - platform_driver_unregister(&fpga_of_driver); - led_trigger_unregister_simple(ledtrig_fpga); -} - -MODULE_AUTHOR("Ira W. Snyder <iws@ovro.caltech.edu>"); -MODULE_DESCRIPTION("CARMA Board DATA-FPGA Programmer"); -MODULE_LICENSE("GPL"); - -module_init(fpga_init); -module_exit(fpga_exit); diff --git a/kernel/drivers/misc/carma/carma-fpga.c b/kernel/drivers/misc/carma/carma-fpga.c deleted file mode 100644 index 5aba3fd78..000000000 --- a/kernel/drivers/misc/carma/carma-fpga.c +++ /dev/null @@ -1,1507 +0,0 @@ -/* - * CARMA DATA-FPGA Access Driver - * - * Copyright (c) 2009-2011 Ira W. Snyder <iws@ovro.caltech.edu> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -/* - * FPGA Memory Dump Format - * - * FPGA #0 control registers (32 x 32-bit words) - * FPGA #1 control registers (32 x 32-bit words) - * FPGA #2 control registers (32 x 32-bit words) - * FPGA #3 control registers (32 x 32-bit words) - * SYSFPGA control registers (32 x 32-bit words) - * FPGA #0 correlation array (NUM_CORL0 correlation blocks) - * FPGA #1 correlation array (NUM_CORL1 correlation blocks) - * FPGA #2 correlation array (NUM_CORL2 correlation blocks) - * FPGA #3 correlation array (NUM_CORL3 correlation blocks) - * - * Each correlation array consists of: - * - * Correlation Data (2 x NUM_LAGSn x 32-bit words) - * Pipeline Metadata (2 x NUM_METAn x 32-bit words) - * Quantization Counters (2 x NUM_QCNTn x 32-bit words) - * - * The NUM_CORLn, NUM_LAGSn, NUM_METAn, and NUM_QCNTn values come from - * the FPGA configuration registers. They do not change once the FPGA's - * have been programmed, they only change on re-programming. - */ - -/* - * Basic Description: - * - * This driver is used to capture correlation spectra off of the four data - * processing FPGAs. The FPGAs are often reprogrammed at runtime, therefore - * this driver supports dynamic enable/disable of capture while the device - * remains open. - * - * The nominal capture rate is 64Hz (every 15.625ms). To facilitate this fast - * capture rate, all buffers are pre-allocated to avoid any potentially long - * running memory allocations while capturing. - * - * There are two lists and one pointer which are used to keep track of the - * different states of data buffers. - * - * 1) free list - * This list holds all empty data buffers which are ready to receive data. - * - * 2) inflight pointer - * This pointer holds the currently inflight data buffer. This buffer is having - * data copied into it by the DMA engine. - * - * 3) used list - * This list holds data buffers which have been filled, and are waiting to be - * read by userspace. - * - * All buffers start life on the free list, then move successively to the - * inflight pointer, and then to the used list. After they have been read by - * userspace, they are moved back to the free list. The cycle repeats as long - * as necessary. - * - * It should be noted that all buffers are mapped and ready for DMA when they - * are on any of the three lists. They are only unmapped when they are in the - * process of being read by userspace. - */ - -/* - * Notes on the IRQ masking scheme: - * - * The IRQ masking scheme here is different than most other hardware. The only - * way for the DATA-FPGAs to detect if the kernel has taken too long to copy - * the data is if the status registers are not cleared before the next - * correlation data dump is ready. - * - * The interrupt line is connected to the status registers, such that when they - * are cleared, the interrupt is de-asserted. Therein lies our problem. We need - * to schedule a long-running DMA operation and return from the interrupt - * handler quickly, but we cannot clear the status registers. - * - * To handle this, the system controller FPGA has the capability to connect the - * interrupt line to a user-controlled GPIO pin. This pin is driven high - * (unasserted) and left that way. To mask the interrupt, we change the - * interrupt source to the GPIO pin. Tada, we hid the interrupt. :) - */ - -#include <linux/of_address.h> -#include <linux/of_irq.h> -#include <linux/of_platform.h> -#include <linux/dma-mapping.h> -#include <linux/miscdevice.h> -#include <linux/interrupt.h> -#include <linux/dmaengine.h> -#include <linux/seq_file.h> -#include <linux/highmem.h> -#include <linux/debugfs.h> -#include <linux/vmalloc.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/poll.h> -#include <linux/slab.h> -#include <linux/kref.h> -#include <linux/io.h> - -/* system controller registers */ -#define SYS_IRQ_SOURCE_CTL 0x24 -#define SYS_IRQ_OUTPUT_EN 0x28 -#define SYS_IRQ_OUTPUT_DATA 0x2C -#define SYS_IRQ_INPUT_DATA 0x30 -#define SYS_FPGA_CONFIG_STATUS 0x44 - -/* GPIO IRQ line assignment */ -#define IRQ_CORL_DONE 0x10 - -/* FPGA registers */ -#define MMAP_REG_VERSION 0x00 -#define MMAP_REG_CORL_CONF1 0x08 -#define MMAP_REG_CORL_CONF2 0x0C -#define MMAP_REG_STATUS 0x48 - -#define SYS_FPGA_BLOCK 0xF0000000 - -#define DATA_FPGA_START 0x400000 -#define DATA_FPGA_SIZE 0x80000 - -static const char drv_name[] = "carma-fpga"; - -#define NUM_FPGA 4 - -#define MIN_DATA_BUFS 8 -#define MAX_DATA_BUFS 64 - -struct fpga_info { - unsigned int num_lag_ram; - unsigned int blk_size; -}; - -struct data_buf { - struct list_head entry; - void *vaddr; - struct scatterlist *sglist; - int sglen; - int nr_pages; - size_t size; -}; - -struct fpga_device { - /* character device */ - struct miscdevice miscdev; - struct device *dev; - struct mutex mutex; - - /* reference count */ - struct kref ref; - - /* FPGA registers and information */ - struct fpga_info info[NUM_FPGA]; - void __iomem *regs; - int irq; - - /* FPGA Physical Address/Size Information */ - resource_size_t phys_addr; - size_t phys_size; - - /* DMA structures */ - struct sg_table corl_table; - unsigned int corl_nents; - struct dma_chan *chan; - - /* Protection for all members below */ - spinlock_t lock; - - /* Device enable/disable flag */ - bool enabled; - - /* Correlation data buffers */ - wait_queue_head_t wait; - struct list_head free; - struct list_head used; - struct data_buf *inflight; - - /* Information about data buffers */ - unsigned int num_dropped; - unsigned int num_buffers; - size_t bufsize; - struct dentry *dbg_entry; -}; - -struct fpga_reader { - struct fpga_device *priv; - struct data_buf *buf; - off_t buf_start; -}; - -static void fpga_device_release(struct kref *ref) -{ - struct fpga_device *priv = container_of(ref, struct fpga_device, ref); - - /* the last reader has exited, cleanup the last bits */ - mutex_destroy(&priv->mutex); - kfree(priv); -} - -/* - * Data Buffer Allocation Helpers - */ - -static int carma_dma_init(struct data_buf *buf, int nr_pages) -{ - struct page *pg; - int i; - - buf->vaddr = vmalloc_32(nr_pages << PAGE_SHIFT); - if (NULL == buf->vaddr) { - pr_debug("vmalloc_32(%d pages) failed\n", nr_pages); - return -ENOMEM; - } - - pr_debug("vmalloc is at addr 0x%08lx, size=%d\n", - (unsigned long)buf->vaddr, - nr_pages << PAGE_SHIFT); - - memset(buf->vaddr, 0, nr_pages << PAGE_SHIFT); - buf->nr_pages = nr_pages; - - buf->sglist = vzalloc(buf->nr_pages * sizeof(*buf->sglist)); - if (NULL == buf->sglist) - goto vzalloc_err; - - sg_init_table(buf->sglist, buf->nr_pages); - for (i = 0; i < buf->nr_pages; i++) { - pg = vmalloc_to_page(buf->vaddr + i * PAGE_SIZE); - if (NULL == pg) - goto vmalloc_to_page_err; - sg_set_page(&buf->sglist[i], pg, PAGE_SIZE, 0); - } - return 0; - -vmalloc_to_page_err: - vfree(buf->sglist); - buf->sglist = NULL; -vzalloc_err: - vfree(buf->vaddr); - buf->vaddr = NULL; - return -ENOMEM; -} - -static int carma_dma_map(struct device *dev, struct data_buf *buf) -{ - buf->sglen = dma_map_sg(dev, buf->sglist, - buf->nr_pages, DMA_FROM_DEVICE); - - if (0 == buf->sglen) { - pr_warn("%s: dma_map_sg failed\n", __func__); - return -ENOMEM; - } - return 0; -} - -static int carma_dma_unmap(struct device *dev, struct data_buf *buf) -{ - if (!buf->sglen) - return 0; - - dma_unmap_sg(dev, buf->sglist, buf->sglen, DMA_FROM_DEVICE); - buf->sglen = 0; - return 0; -} - -/** - * data_free_buffer() - free a single data buffer and all allocated memory - * @buf: the buffer to free - * - * This will free all of the pages allocated to the given data buffer, and - * then free the structure itself - */ -static void data_free_buffer(struct data_buf *buf) -{ - /* It is ok to free a NULL buffer */ - if (!buf) - return; - - /* free all memory */ - vfree(buf->sglist); - vfree(buf->vaddr); - kfree(buf); -} - -/** - * data_alloc_buffer() - allocate and fill a data buffer with pages - * @bytes: the number of bytes required - * - * This allocates all space needed for a data buffer. It must be mapped before - * use in a DMA transaction using carma_dma_map(). - * - * Returns NULL on failure - */ -static struct data_buf *data_alloc_buffer(const size_t bytes) -{ - unsigned int nr_pages; - struct data_buf *buf; - int ret; - - /* calculate the number of pages necessary */ - nr_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); - - /* allocate the buffer structure */ - buf = kzalloc(sizeof(*buf), GFP_KERNEL); - if (!buf) - goto out_return; - - /* initialize internal fields */ - INIT_LIST_HEAD(&buf->entry); - buf->size = bytes; - - /* allocate the buffer */ - ret = carma_dma_init(buf, nr_pages); - if (ret) - goto out_free_buf; - - return buf; - -out_free_buf: - kfree(buf); -out_return: - return NULL; -} - -/** - * data_free_buffers() - free all allocated buffers - * @priv: the driver's private data structure - * - * Free all buffers allocated by the driver (except those currently in the - * process of being read by userspace). - * - * LOCKING: must hold dev->mutex - * CONTEXT: user - */ -static void data_free_buffers(struct fpga_device *priv) -{ - struct data_buf *buf, *tmp; - - /* the device should be stopped, no DMA in progress */ - BUG_ON(priv->inflight != NULL); - - list_for_each_entry_safe(buf, tmp, &priv->free, entry) { - list_del_init(&buf->entry); - carma_dma_unmap(priv->dev, buf); - data_free_buffer(buf); - } - - list_for_each_entry_safe(buf, tmp, &priv->used, entry) { - list_del_init(&buf->entry); - carma_dma_unmap(priv->dev, buf); - data_free_buffer(buf); - } - - priv->num_buffers = 0; - priv->bufsize = 0; -} - -/** - * data_alloc_buffers() - allocate 1 seconds worth of data buffers - * @priv: the driver's private data structure - * - * Allocate enough buffers for a whole second worth of data - * - * This routine will attempt to degrade nicely by succeeding even if a full - * second worth of data buffers could not be allocated, as long as a minimum - * number were allocated. In this case, it will print a message to the kernel - * log. - * - * The device must not be modifying any lists when this is called. - * - * CONTEXT: user - * LOCKING: must hold dev->mutex - * - * Returns 0 on success, -ERRNO otherwise - */ -static int data_alloc_buffers(struct fpga_device *priv) -{ - struct data_buf *buf; - int i, ret; - - for (i = 0; i < MAX_DATA_BUFS; i++) { - - /* allocate a buffer */ - buf = data_alloc_buffer(priv->bufsize); - if (!buf) - break; - - /* map it for DMA */ - ret = carma_dma_map(priv->dev, buf); - if (ret) { - data_free_buffer(buf); - break; - } - - /* add it to the list of free buffers */ - list_add_tail(&buf->entry, &priv->free); - priv->num_buffers++; - } - - /* Make sure we allocated the minimum required number of buffers */ - if (priv->num_buffers < MIN_DATA_BUFS) { - dev_err(priv->dev, "Unable to allocate enough data buffers\n"); - data_free_buffers(priv); - return -ENOMEM; - } - - /* Warn if we are running in a degraded state, but do not fail */ - if (priv->num_buffers < MAX_DATA_BUFS) { - dev_warn(priv->dev, - "Unable to allocate %d buffers, using %d buffers instead\n", - MAX_DATA_BUFS, i); - } - - return 0; -} - -/* - * DMA Operations Helpers - */ - -/** - * fpga_start_addr() - get the physical address a DATA-FPGA - * @priv: the driver's private data structure - * @fpga: the DATA-FPGA number (zero based) - */ -static dma_addr_t fpga_start_addr(struct fpga_device *priv, unsigned int fpga) -{ - return priv->phys_addr + 0x400000 + (0x80000 * fpga); -} - -/** - * fpga_block_addr() - get the physical address of a correlation data block - * @priv: the driver's private data structure - * @fpga: the DATA-FPGA number (zero based) - * @blknum: the correlation block number (zero based) - */ -static dma_addr_t fpga_block_addr(struct fpga_device *priv, unsigned int fpga, - unsigned int blknum) -{ - return fpga_start_addr(priv, fpga) + (0x10000 * (1 + blknum)); -} - -#define REG_BLOCK_SIZE (32 * 4) - -/** - * data_setup_corl_table() - create the scatterlist for correlation dumps - * @priv: the driver's private data structure - * - * Create the scatterlist for transferring a correlation dump from the - * DATA FPGAs. This structure will be reused for each buffer than needs - * to be filled with correlation data. - * - * Returns 0 on success, -ERRNO otherwise - */ -static int data_setup_corl_table(struct fpga_device *priv) -{ - struct sg_table *table = &priv->corl_table; - struct scatterlist *sg; - struct fpga_info *info; - int i, j, ret; - - /* Calculate the number of entries needed */ - priv->corl_nents = (1 + NUM_FPGA) * REG_BLOCK_SIZE; - for (i = 0; i < NUM_FPGA; i++) - priv->corl_nents += priv->info[i].num_lag_ram; - - /* Allocate the scatterlist table */ - ret = sg_alloc_table(table, priv->corl_nents, GFP_KERNEL); - if (ret) { - dev_err(priv->dev, "unable to allocate DMA table\n"); - return ret; - } - - /* Add the DATA FPGA registers to the scatterlist */ - sg = table->sgl; - for (i = 0; i < NUM_FPGA; i++) { - sg_dma_address(sg) = fpga_start_addr(priv, i); - sg_dma_len(sg) = REG_BLOCK_SIZE; - sg = sg_next(sg); - } - - /* Add the SYS-FPGA registers to the scatterlist */ - sg_dma_address(sg) = SYS_FPGA_BLOCK; - sg_dma_len(sg) = REG_BLOCK_SIZE; - sg = sg_next(sg); - - /* Add the FPGA correlation data blocks to the scatterlist */ - for (i = 0; i < NUM_FPGA; i++) { - info = &priv->info[i]; - for (j = 0; j < info->num_lag_ram; j++) { - sg_dma_address(sg) = fpga_block_addr(priv, i, j); - sg_dma_len(sg) = info->blk_size; - sg = sg_next(sg); - } - } - - /* - * All physical addresses and lengths are present in the structure - * now. It can be reused for every FPGA DATA interrupt - */ - return 0; -} - -/* - * FPGA Register Access Helpers - */ - -static void fpga_write_reg(struct fpga_device *priv, unsigned int fpga, - unsigned int reg, u32 val) -{ - const int fpga_start = DATA_FPGA_START + (fpga * DATA_FPGA_SIZE); - iowrite32be(val, priv->regs + fpga_start + reg); -} - -static u32 fpga_read_reg(struct fpga_device *priv, unsigned int fpga, - unsigned int reg) -{ - const int fpga_start = DATA_FPGA_START + (fpga * DATA_FPGA_SIZE); - return ioread32be(priv->regs + fpga_start + reg); -} - -/** - * data_calculate_bufsize() - calculate the data buffer size required - * @priv: the driver's private data structure - * - * Calculate the total buffer size needed to hold a single block - * of correlation data - * - * CONTEXT: user - * - * Returns 0 on success, -ERRNO otherwise - */ -static int data_calculate_bufsize(struct fpga_device *priv) -{ - u32 num_corl, num_lags, num_meta, num_qcnt, num_pack; - u32 conf1, conf2, version; - u32 num_lag_ram, blk_size; - int i; - - /* Each buffer starts with the 5 FPGA register areas */ - priv->bufsize = (1 + NUM_FPGA) * REG_BLOCK_SIZE; - - /* Read and store the configuration data for each FPGA */ - for (i = 0; i < NUM_FPGA; i++) { - version = fpga_read_reg(priv, i, MMAP_REG_VERSION); - conf1 = fpga_read_reg(priv, i, MMAP_REG_CORL_CONF1); - conf2 = fpga_read_reg(priv, i, MMAP_REG_CORL_CONF2); - - /* minor version 2 and later */ - if ((version & 0x000000FF) >= 2) { - num_corl = (conf1 & 0x000000F0) >> 4; - num_pack = (conf1 & 0x00000F00) >> 8; - num_lags = (conf1 & 0x00FFF000) >> 12; - num_meta = (conf1 & 0x7F000000) >> 24; - num_qcnt = (conf2 & 0x00000FFF) >> 0; - } else { - num_corl = (conf1 & 0x000000F0) >> 4; - num_pack = 1; /* implied */ - num_lags = (conf1 & 0x000FFF00) >> 8; - num_meta = (conf1 & 0x7FF00000) >> 20; - num_qcnt = (conf2 & 0x00000FFF) >> 0; - } - - num_lag_ram = (num_corl + num_pack - 1) / num_pack; - blk_size = ((num_pack * num_lags) + num_meta + num_qcnt) * 8; - - priv->info[i].num_lag_ram = num_lag_ram; - priv->info[i].blk_size = blk_size; - priv->bufsize += num_lag_ram * blk_size; - - dev_dbg(priv->dev, "FPGA %d NUM_CORL: %d\n", i, num_corl); - dev_dbg(priv->dev, "FPGA %d NUM_PACK: %d\n", i, num_pack); - dev_dbg(priv->dev, "FPGA %d NUM_LAGS: %d\n", i, num_lags); - dev_dbg(priv->dev, "FPGA %d NUM_META: %d\n", i, num_meta); - dev_dbg(priv->dev, "FPGA %d NUM_QCNT: %d\n", i, num_qcnt); - dev_dbg(priv->dev, "FPGA %d BLK_SIZE: %d\n", i, blk_size); - } - - dev_dbg(priv->dev, "TOTAL BUFFER SIZE: %zu bytes\n", priv->bufsize); - return 0; -} - -/* - * Interrupt Handling - */ - -/** - * data_disable_interrupts() - stop the device from generating interrupts - * @priv: the driver's private data structure - * - * Hide interrupts by switching to GPIO interrupt source - * - * LOCKING: must hold dev->lock - */ -static void data_disable_interrupts(struct fpga_device *priv) -{ - /* hide the interrupt by switching the IRQ driver to GPIO */ - iowrite32be(0x2F, priv->regs + SYS_IRQ_SOURCE_CTL); -} - -/** - * data_enable_interrupts() - allow the device to generate interrupts - * @priv: the driver's private data structure - * - * Unhide interrupts by switching to the FPGA interrupt source. At the - * same time, clear the DATA-FPGA status registers. - * - * LOCKING: must hold dev->lock - */ -static void data_enable_interrupts(struct fpga_device *priv) -{ - /* clear the actual FPGA corl_done interrupt */ - fpga_write_reg(priv, 0, MMAP_REG_STATUS, 0x0); - fpga_write_reg(priv, 1, MMAP_REG_STATUS, 0x0); - fpga_write_reg(priv, 2, MMAP_REG_STATUS, 0x0); - fpga_write_reg(priv, 3, MMAP_REG_STATUS, 0x0); - - /* flush the writes */ - fpga_read_reg(priv, 0, MMAP_REG_STATUS); - fpga_read_reg(priv, 1, MMAP_REG_STATUS); - fpga_read_reg(priv, 2, MMAP_REG_STATUS); - fpga_read_reg(priv, 3, MMAP_REG_STATUS); - - /* switch back to the external interrupt source */ - iowrite32be(0x3F, priv->regs + SYS_IRQ_SOURCE_CTL); -} - -/** - * data_dma_cb() - DMAEngine callback for DMA completion - * @data: the driver's private data structure - * - * Complete a DMA transfer from the DATA-FPGA's - * - * This is called via the DMA callback mechanism, and will handle moving the - * completed DMA transaction to the used list, and then wake any processes - * waiting for new data - * - * CONTEXT: any, softirq expected - */ -static void data_dma_cb(void *data) -{ - struct fpga_device *priv = data; - unsigned long flags; - - spin_lock_irqsave(&priv->lock, flags); - - /* If there is no inflight buffer, we've got a bug */ - BUG_ON(priv->inflight == NULL); - - /* Move the inflight buffer onto the used list */ - list_move_tail(&priv->inflight->entry, &priv->used); - priv->inflight = NULL; - - /* - * If data dumping is still enabled, then clear the FPGA - * status registers and re-enable FPGA interrupts - */ - if (priv->enabled) - data_enable_interrupts(priv); - - spin_unlock_irqrestore(&priv->lock, flags); - - /* - * We've changed both the inflight and used lists, so we need - * to wake up any processes that are blocking for those events - */ - wake_up(&priv->wait); -} - -/** - * data_submit_dma() - prepare and submit the required DMA to fill a buffer - * @priv: the driver's private data structure - * @buf: the data buffer - * - * Prepare and submit the necessary DMA transactions to fill a correlation - * data buffer. - * - * LOCKING: must hold dev->lock - * CONTEXT: hardirq only - * - * Returns 0 on success, -ERRNO otherwise - */ -static int data_submit_dma(struct fpga_device *priv, struct data_buf *buf) -{ - struct scatterlist *dst_sg, *src_sg; - unsigned int dst_nents, src_nents; - struct dma_chan *chan = priv->chan; - struct dma_async_tx_descriptor *tx; - dma_cookie_t cookie; - dma_addr_t dst, src; - unsigned long dma_flags = 0; - - dst_sg = buf->sglist; - dst_nents = buf->sglen; - - src_sg = priv->corl_table.sgl; - src_nents = priv->corl_nents; - - /* - * All buffers passed to this function should be ready and mapped - * for DMA already. Therefore, we don't need to do anything except - * submit it to the Freescale DMA Engine for processing - */ - - /* setup the scatterlist to scatterlist transfer */ - tx = chan->device->device_prep_dma_sg(chan, - dst_sg, dst_nents, - src_sg, src_nents, - 0); - if (!tx) { - dev_err(priv->dev, "unable to prep scatterlist DMA\n"); - return -ENOMEM; - } - - /* submit the transaction to the DMA controller */ - cookie = tx->tx_submit(tx); - if (dma_submit_error(cookie)) { - dev_err(priv->dev, "unable to submit scatterlist DMA\n"); - return -ENOMEM; - } - - /* Prepare the re-read of the SYS-FPGA block */ - dst = sg_dma_address(dst_sg) + (NUM_FPGA * REG_BLOCK_SIZE); - src = SYS_FPGA_BLOCK; - tx = chan->device->device_prep_dma_memcpy(chan, dst, src, - REG_BLOCK_SIZE, - dma_flags); - if (!tx) { - dev_err(priv->dev, "unable to prep SYS-FPGA DMA\n"); - return -ENOMEM; - } - - /* Setup the callback */ - tx->callback = data_dma_cb; - tx->callback_param = priv; - - /* submit the transaction to the DMA controller */ - cookie = tx->tx_submit(tx); - if (dma_submit_error(cookie)) { - dev_err(priv->dev, "unable to submit SYS-FPGA DMA\n"); - return -ENOMEM; - } - - return 0; -} - -#define CORL_DONE 0x1 -#define CORL_ERR 0x2 - -static irqreturn_t data_irq(int irq, void *dev_id) -{ - struct fpga_device *priv = dev_id; - bool submitted = false; - struct data_buf *buf; - u32 status; - int i; - - /* detect spurious interrupts via FPGA status */ - for (i = 0; i < 4; i++) { - status = fpga_read_reg(priv, i, MMAP_REG_STATUS); - if (!(status & (CORL_DONE | CORL_ERR))) { - dev_err(priv->dev, "spurious irq detected (FPGA)\n"); - return IRQ_NONE; - } - } - - /* detect spurious interrupts via raw IRQ pin readback */ - status = ioread32be(priv->regs + SYS_IRQ_INPUT_DATA); - if (status & IRQ_CORL_DONE) { - dev_err(priv->dev, "spurious irq detected (IRQ)\n"); - return IRQ_NONE; - } - - spin_lock(&priv->lock); - - /* - * This is an error case that should never happen. - * - * If this driver has a bug and manages to re-enable interrupts while - * a DMA is in progress, then we will hit this statement and should - * start paying attention immediately. - */ - BUG_ON(priv->inflight != NULL); - - /* hide the interrupt by switching the IRQ driver to GPIO */ - data_disable_interrupts(priv); - - /* If there are no free buffers, drop this data */ - if (list_empty(&priv->free)) { - priv->num_dropped++; - goto out; - } - - buf = list_first_entry(&priv->free, struct data_buf, entry); - list_del_init(&buf->entry); - BUG_ON(buf->size != priv->bufsize); - - /* Submit a DMA transfer to get the correlation data */ - if (data_submit_dma(priv, buf)) { - dev_err(priv->dev, "Unable to setup DMA transfer\n"); - list_move_tail(&buf->entry, &priv->free); - goto out; - } - - /* Save the buffer for the DMA callback */ - priv->inflight = buf; - submitted = true; - - /* Start the DMA Engine */ - dma_async_issue_pending(priv->chan); - -out: - /* If no DMA was submitted, re-enable interrupts */ - if (!submitted) - data_enable_interrupts(priv); - - spin_unlock(&priv->lock); - return IRQ_HANDLED; -} - -/* - * Realtime Device Enable Helpers - */ - -/** - * data_device_enable() - enable the device for buffered dumping - * @priv: the driver's private data structure - * - * Enable the device for buffered dumping. Allocates buffers and hooks up - * the interrupt handler. When this finishes, data will come pouring in. - * - * LOCKING: must hold dev->mutex - * CONTEXT: user context only - * - * Returns 0 on success, -ERRNO otherwise - */ -static int data_device_enable(struct fpga_device *priv) -{ - bool enabled; - u32 val; - int ret; - - /* multiple enables are safe: they do nothing */ - spin_lock_irq(&priv->lock); - enabled = priv->enabled; - spin_unlock_irq(&priv->lock); - if (enabled) - return 0; - - /* check that the FPGAs are programmed */ - val = ioread32be(priv->regs + SYS_FPGA_CONFIG_STATUS); - if (!(val & (1 << 18))) { - dev_err(priv->dev, "DATA-FPGAs are not enabled\n"); - return -ENODATA; - } - - /* read the FPGAs to calculate the buffer size */ - ret = data_calculate_bufsize(priv); - if (ret) { - dev_err(priv->dev, "unable to calculate buffer size\n"); - goto out_error; - } - - /* allocate the correlation data buffers */ - ret = data_alloc_buffers(priv); - if (ret) { - dev_err(priv->dev, "unable to allocate buffers\n"); - goto out_error; - } - - /* setup the source scatterlist for dumping correlation data */ - ret = data_setup_corl_table(priv); - if (ret) { - dev_err(priv->dev, "unable to setup correlation DMA table\n"); - goto out_error; - } - - /* prevent the FPGAs from generating interrupts */ - data_disable_interrupts(priv); - - /* hookup the irq handler */ - ret = request_irq(priv->irq, data_irq, IRQF_SHARED, drv_name, priv); - if (ret) { - dev_err(priv->dev, "unable to request IRQ handler\n"); - goto out_error; - } - - /* allow the DMA callback to re-enable FPGA interrupts */ - spin_lock_irq(&priv->lock); - priv->enabled = true; - spin_unlock_irq(&priv->lock); - - /* allow the FPGAs to generate interrupts */ - data_enable_interrupts(priv); - return 0; - -out_error: - sg_free_table(&priv->corl_table); - priv->corl_nents = 0; - - data_free_buffers(priv); - return ret; -} - -/** - * data_device_disable() - disable the device for buffered dumping - * @priv: the driver's private data structure - * - * Disable the device for buffered dumping. Stops new DMA transactions from - * being generated, waits for all outstanding DMA to complete, and then frees - * all buffers. - * - * LOCKING: must hold dev->mutex - * CONTEXT: user only - * - * Returns 0 on success, -ERRNO otherwise - */ -static int data_device_disable(struct fpga_device *priv) -{ - spin_lock_irq(&priv->lock); - - /* allow multiple disable */ - if (!priv->enabled) { - spin_unlock_irq(&priv->lock); - return 0; - } - - /* - * Mark the device disabled - * - * This stops DMA callbacks from re-enabling interrupts - */ - priv->enabled = false; - - /* prevent the FPGAs from generating interrupts */ - data_disable_interrupts(priv); - - /* wait until all ongoing DMA has finished */ - while (priv->inflight != NULL) { - spin_unlock_irq(&priv->lock); - wait_event(priv->wait, priv->inflight == NULL); - spin_lock_irq(&priv->lock); - } - - spin_unlock_irq(&priv->lock); - - /* unhook the irq handler */ - free_irq(priv->irq, priv); - - /* free the correlation table */ - sg_free_table(&priv->corl_table); - priv->corl_nents = 0; - - /* free all buffers: the free and used lists are not being changed */ - data_free_buffers(priv); - return 0; -} - -/* - * DEBUGFS Interface - */ -#ifdef CONFIG_DEBUG_FS - -/* - * Count the number of entries in the given list - */ -static unsigned int list_num_entries(struct list_head *list) -{ - struct list_head *entry; - unsigned int ret = 0; - - list_for_each(entry, list) - ret++; - - return ret; -} - -static int data_debug_show(struct seq_file *f, void *offset) -{ - struct fpga_device *priv = f->private; - - spin_lock_irq(&priv->lock); - - seq_printf(f, "enabled: %d\n", priv->enabled); - seq_printf(f, "bufsize: %d\n", priv->bufsize); - seq_printf(f, "num_buffers: %d\n", priv->num_buffers); - seq_printf(f, "num_free: %d\n", list_num_entries(&priv->free)); - seq_printf(f, "inflight: %d\n", priv->inflight != NULL); - seq_printf(f, "num_used: %d\n", list_num_entries(&priv->used)); - seq_printf(f, "num_dropped: %d\n", priv->num_dropped); - - spin_unlock_irq(&priv->lock); - return 0; -} - -static int data_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, data_debug_show, inode->i_private); -} - -static const struct file_operations data_debug_fops = { - .owner = THIS_MODULE, - .open = data_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int data_debugfs_init(struct fpga_device *priv) -{ - priv->dbg_entry = debugfs_create_file(drv_name, S_IRUGO, NULL, priv, - &data_debug_fops); - return PTR_ERR_OR_ZERO(priv->dbg_entry); -} - -static void data_debugfs_exit(struct fpga_device *priv) -{ - debugfs_remove(priv->dbg_entry); -} - -#else - -static inline int data_debugfs_init(struct fpga_device *priv) -{ - return 0; -} - -static inline void data_debugfs_exit(struct fpga_device *priv) -{ -} - -#endif /* CONFIG_DEBUG_FS */ - -/* - * SYSFS Attributes - */ - -static ssize_t data_en_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct fpga_device *priv = dev_get_drvdata(dev); - int ret; - - spin_lock_irq(&priv->lock); - ret = snprintf(buf, PAGE_SIZE, "%u\n", priv->enabled); - spin_unlock_irq(&priv->lock); - - return ret; -} - -static ssize_t data_en_set(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct fpga_device *priv = dev_get_drvdata(dev); - unsigned long enable; - int ret; - - ret = kstrtoul(buf, 0, &enable); - if (ret) { - dev_err(priv->dev, "unable to parse enable input\n"); - return ret; - } - - /* protect against concurrent enable/disable */ - ret = mutex_lock_interruptible(&priv->mutex); - if (ret) - return ret; - - if (enable) - ret = data_device_enable(priv); - else - ret = data_device_disable(priv); - - if (ret) { - dev_err(priv->dev, "device %s failed\n", - enable ? "enable" : "disable"); - count = ret; - goto out_unlock; - } - -out_unlock: - mutex_unlock(&priv->mutex); - return count; -} - -static DEVICE_ATTR(enable, S_IWUSR | S_IRUGO, data_en_show, data_en_set); - -static struct attribute *data_sysfs_attrs[] = { - &dev_attr_enable.attr, - NULL, -}; - -static const struct attribute_group rt_sysfs_attr_group = { - .attrs = data_sysfs_attrs, -}; - -/* - * FPGA Realtime Data Character Device - */ - -static int data_open(struct inode *inode, struct file *filp) -{ - /* - * The miscdevice layer puts our struct miscdevice into the - * filp->private_data field. We use this to find our private - * data and then overwrite it with our own private structure. - */ - struct fpga_device *priv = container_of(filp->private_data, - struct fpga_device, miscdev); - struct fpga_reader *reader; - int ret; - - /* allocate private data */ - reader = kzalloc(sizeof(*reader), GFP_KERNEL); - if (!reader) - return -ENOMEM; - - reader->priv = priv; - reader->buf = NULL; - - filp->private_data = reader; - ret = nonseekable_open(inode, filp); - if (ret) { - dev_err(priv->dev, "nonseekable-open failed\n"); - kfree(reader); - return ret; - } - - /* - * success, increase the reference count of the private data structure - * so that it doesn't disappear if the device is unbound - */ - kref_get(&priv->ref); - return 0; -} - -static int data_release(struct inode *inode, struct file *filp) -{ - struct fpga_reader *reader = filp->private_data; - struct fpga_device *priv = reader->priv; - - /* free the per-reader structure */ - data_free_buffer(reader->buf); - kfree(reader); - filp->private_data = NULL; - - /* decrement our reference count to the private data */ - kref_put(&priv->ref, fpga_device_release); - return 0; -} - -static ssize_t data_read(struct file *filp, char __user *ubuf, size_t count, - loff_t *f_pos) -{ - struct fpga_reader *reader = filp->private_data; - struct fpga_device *priv = reader->priv; - struct list_head *used = &priv->used; - bool drop_buffer = false; - struct data_buf *dbuf; - size_t avail; - void *data; - int ret; - - /* check if we already have a partial buffer */ - if (reader->buf) { - dbuf = reader->buf; - goto have_buffer; - } - - spin_lock_irq(&priv->lock); - - /* Block until there is at least one buffer on the used list */ - while (list_empty(used)) { - spin_unlock_irq(&priv->lock); - - if (filp->f_flags & O_NONBLOCK) - return -EAGAIN; - - ret = wait_event_interruptible(priv->wait, !list_empty(used)); - if (ret) - return ret; - - spin_lock_irq(&priv->lock); - } - - /* Grab the first buffer off of the used list */ - dbuf = list_first_entry(used, struct data_buf, entry); - list_del_init(&dbuf->entry); - - spin_unlock_irq(&priv->lock); - - /* Buffers are always mapped: unmap it */ - carma_dma_unmap(priv->dev, dbuf); - - /* save the buffer for later */ - reader->buf = dbuf; - reader->buf_start = 0; - -have_buffer: - /* Get the number of bytes available */ - avail = dbuf->size - reader->buf_start; - data = dbuf->vaddr + reader->buf_start; - - /* Get the number of bytes we can transfer */ - count = min(count, avail); - - /* Copy the data to the userspace buffer */ - if (copy_to_user(ubuf, data, count)) - return -EFAULT; - - /* Update the amount of available space */ - avail -= count; - - /* - * If there is still some data available, save the buffer for the - * next userspace call to read() and return - */ - if (avail > 0) { - reader->buf_start += count; - reader->buf = dbuf; - return count; - } - - /* - * Get the buffer ready to be reused for DMA - * - * If it fails, we pretend that the read never happed and return - * -EFAULT to userspace. The read will be retried. - */ - ret = carma_dma_map(priv->dev, dbuf); - if (ret) { - dev_err(priv->dev, "unable to remap buffer for DMA\n"); - return -EFAULT; - } - - /* Lock against concurrent enable/disable */ - spin_lock_irq(&priv->lock); - - /* the reader is finished with this buffer */ - reader->buf = NULL; - - /* - * One of two things has happened, the device is disabled, or the - * device has been reconfigured underneath us. In either case, we - * should just throw away the buffer. - * - * Lockdep complains if this is done under the spinlock, so we - * handle it during the unlock path. - */ - if (!priv->enabled || dbuf->size != priv->bufsize) { - drop_buffer = true; - goto out_unlock; - } - - /* The buffer is safe to reuse, so add it back to the free list */ - list_add_tail(&dbuf->entry, &priv->free); - -out_unlock: - spin_unlock_irq(&priv->lock); - - if (drop_buffer) { - carma_dma_unmap(priv->dev, dbuf); - data_free_buffer(dbuf); - } - - return count; -} - -static unsigned int data_poll(struct file *filp, struct poll_table_struct *tbl) -{ - struct fpga_reader *reader = filp->private_data; - struct fpga_device *priv = reader->priv; - unsigned int mask = 0; - - poll_wait(filp, &priv->wait, tbl); - - if (!list_empty(&priv->used)) - mask |= POLLIN | POLLRDNORM; - - return mask; -} - -static int data_mmap(struct file *filp, struct vm_area_struct *vma) -{ - struct fpga_reader *reader = filp->private_data; - struct fpga_device *priv = reader->priv; - unsigned long offset, vsize, psize, addr; - - /* VMA properties */ - offset = vma->vm_pgoff << PAGE_SHIFT; - vsize = vma->vm_end - vma->vm_start; - psize = priv->phys_size - offset; - addr = (priv->phys_addr + offset) >> PAGE_SHIFT; - - /* Check against the FPGA region's physical memory size */ - if (vsize > psize) { - dev_err(priv->dev, "requested mmap mapping too large\n"); - return -EINVAL; - } - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - return io_remap_pfn_range(vma, vma->vm_start, addr, vsize, - vma->vm_page_prot); -} - -static const struct file_operations data_fops = { - .owner = THIS_MODULE, - .open = data_open, - .release = data_release, - .read = data_read, - .poll = data_poll, - .mmap = data_mmap, - .llseek = no_llseek, -}; - -/* - * OpenFirmware Device Subsystem - */ - -static bool dma_filter(struct dma_chan *chan, void *data) -{ - /* - * DMA Channel #0 is used for the FPGA Programmer, so ignore it - * - * This probably won't survive an unload/load cycle of the Freescale - * DMAEngine driver, but that won't be a problem - */ - if (chan->chan_id == 0 && chan->device->dev_id == 0) - return false; - - return true; -} - -static int data_of_probe(struct platform_device *op) -{ - struct device_node *of_node = op->dev.of_node; - struct device *this_device; - struct fpga_device *priv; - struct resource res; - dma_cap_mask_t mask; - int ret; - - /* Allocate private data */ - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) { - dev_err(&op->dev, "Unable to allocate device private data\n"); - ret = -ENOMEM; - goto out_return; - } - - platform_set_drvdata(op, priv); - priv->dev = &op->dev; - kref_init(&priv->ref); - mutex_init(&priv->mutex); - - dev_set_drvdata(priv->dev, priv); - spin_lock_init(&priv->lock); - INIT_LIST_HEAD(&priv->free); - INIT_LIST_HEAD(&priv->used); - init_waitqueue_head(&priv->wait); - - /* Setup the misc device */ - priv->miscdev.minor = MISC_DYNAMIC_MINOR; - priv->miscdev.name = drv_name; - priv->miscdev.fops = &data_fops; - - /* Get the physical address of the FPGA registers */ - ret = of_address_to_resource(of_node, 0, &res); - if (ret) { - dev_err(&op->dev, "Unable to find FPGA physical address\n"); - ret = -ENODEV; - goto out_free_priv; - } - - priv->phys_addr = res.start; - priv->phys_size = resource_size(&res); - - /* ioremap the registers for use */ - priv->regs = of_iomap(of_node, 0); - if (!priv->regs) { - dev_err(&op->dev, "Unable to ioremap registers\n"); - ret = -ENOMEM; - goto out_free_priv; - } - - dma_cap_zero(mask); - dma_cap_set(DMA_MEMCPY, mask); - dma_cap_set(DMA_INTERRUPT, mask); - dma_cap_set(DMA_SLAVE, mask); - dma_cap_set(DMA_SG, mask); - - /* Request a DMA channel */ - priv->chan = dma_request_channel(mask, dma_filter, NULL); - if (!priv->chan) { - dev_err(&op->dev, "Unable to request DMA channel\n"); - ret = -ENODEV; - goto out_unmap_regs; - } - - /* Find the correct IRQ number */ - priv->irq = irq_of_parse_and_map(of_node, 0); - if (priv->irq == NO_IRQ) { - dev_err(&op->dev, "Unable to find IRQ line\n"); - ret = -ENODEV; - goto out_release_dma; - } - - /* Drive the GPIO for FPGA IRQ high (no interrupt) */ - iowrite32be(IRQ_CORL_DONE, priv->regs + SYS_IRQ_OUTPUT_DATA); - - /* Register the miscdevice */ - ret = misc_register(&priv->miscdev); - if (ret) { - dev_err(&op->dev, "Unable to register miscdevice\n"); - goto out_irq_dispose_mapping; - } - - /* Create the debugfs files */ - ret = data_debugfs_init(priv); - if (ret) { - dev_err(&op->dev, "Unable to create debugfs files\n"); - goto out_misc_deregister; - } - - /* Create the sysfs files */ - this_device = priv->miscdev.this_device; - dev_set_drvdata(this_device, priv); - ret = sysfs_create_group(&this_device->kobj, &rt_sysfs_attr_group); - if (ret) { - dev_err(&op->dev, "Unable to create sysfs files\n"); - goto out_data_debugfs_exit; - } - - dev_info(&op->dev, "CARMA FPGA Realtime Data Driver Loaded\n"); - return 0; - -out_data_debugfs_exit: - data_debugfs_exit(priv); -out_misc_deregister: - misc_deregister(&priv->miscdev); -out_irq_dispose_mapping: - irq_dispose_mapping(priv->irq); -out_release_dma: - dma_release_channel(priv->chan); -out_unmap_regs: - iounmap(priv->regs); -out_free_priv: - kref_put(&priv->ref, fpga_device_release); -out_return: - return ret; -} - -static int data_of_remove(struct platform_device *op) -{ - struct fpga_device *priv = platform_get_drvdata(op); - struct device *this_device = priv->miscdev.this_device; - - /* remove all sysfs files, now the device cannot be re-enabled */ - sysfs_remove_group(&this_device->kobj, &rt_sysfs_attr_group); - - /* remove all debugfs files */ - data_debugfs_exit(priv); - - /* disable the device from generating data */ - data_device_disable(priv); - - /* remove the character device to stop new readers from appearing */ - misc_deregister(&priv->miscdev); - - /* cleanup everything not needed by readers */ - irq_dispose_mapping(priv->irq); - dma_release_channel(priv->chan); - iounmap(priv->regs); - - /* release our reference */ - kref_put(&priv->ref, fpga_device_release); - return 0; -} - -static const struct of_device_id data_of_match[] = { - { .compatible = "carma,carma-fpga", }, - {}, -}; - -static struct platform_driver data_of_driver = { - .probe = data_of_probe, - .remove = data_of_remove, - .driver = { - .name = drv_name, - .of_match_table = data_of_match, - }, -}; - -module_platform_driver(data_of_driver); - -MODULE_AUTHOR("Ira W. Snyder <iws@ovro.caltech.edu>"); -MODULE_DESCRIPTION("CARMA DATA-FPGA Access Driver"); -MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/cxl/Kconfig b/kernel/drivers/misc/cxl/Kconfig index a990b39b4..8756d06e2 100644 --- a/kernel/drivers/misc/cxl/Kconfig +++ b/kernel/drivers/misc/cxl/Kconfig @@ -7,10 +7,20 @@ config CXL_BASE default n select PPC_COPRO_BASE +config CXL_KERNEL_API + bool + default n + +config CXL_EEH + bool + default n + config CXL tristate "Support for IBM Coherent Accelerators (CXL)" - depends on PPC_POWERNV && PCI_MSI + depends on PPC_POWERNV && PCI_MSI && EEH select CXL_BASE + select CXL_KERNEL_API + select CXL_EEH default m help Select this option to enable driver support for IBM Coherent diff --git a/kernel/drivers/misc/cxl/Makefile b/kernel/drivers/misc/cxl/Makefile index edb494d3f..6982f603f 100644 --- a/kernel/drivers/misc/cxl/Makefile +++ b/kernel/drivers/misc/cxl/Makefile @@ -1,4 +1,8 @@ -cxl-y += main.o file.o irq.o fault.o native.o context.o sysfs.o debugfs.o pci.o trace.o +ccflags-y := -Werror -Wno-unused-const-variable + +cxl-y += main.o file.o irq.o fault.o native.o +cxl-y += context.o sysfs.o debugfs.o pci.o trace.o +cxl-y += vphb.o api.o obj-$(CONFIG_CXL) += cxl.o obj-$(CONFIG_CXL_BASE) += base.o diff --git a/kernel/drivers/misc/cxl/api.c b/kernel/drivers/misc/cxl/api.c new file mode 100644 index 000000000..103baf0e0 --- /dev/null +++ b/kernel/drivers/misc/cxl/api.c @@ -0,0 +1,369 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/pci.h> +#include <linux/slab.h> +#include <linux/anon_inodes.h> +#include <linux/file.h> +#include <misc/cxl.h> +#include <linux/fs.h> + +#include "cxl.h" + +struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) +{ + struct address_space *mapping; + struct cxl_afu *afu; + struct cxl_context *ctx; + int rc; + + afu = cxl_pci_to_afu(dev); + + get_device(&afu->dev); + ctx = cxl_context_alloc(); + if (IS_ERR(ctx)) { + rc = PTR_ERR(ctx); + goto err_dev; + } + + ctx->kernelapi = true; + + /* + * Make our own address space since we won't have one from the + * filesystem like the user api has, and even if we do associate a file + * with this context we don't want to use the global anonymous inode's + * address space as that can invalidate unrelated users: + */ + mapping = kmalloc(sizeof(struct address_space), GFP_KERNEL); + if (!mapping) { + rc = -ENOMEM; + goto err_ctx; + } + address_space_init_once(mapping); + + /* Make it a slave context. We can promote it later? */ + rc = cxl_context_init(ctx, afu, false, mapping); + if (rc) + goto err_mapping; + + cxl_assign_psn_space(ctx); + + return ctx; + +err_mapping: + kfree(mapping); +err_ctx: + kfree(ctx); +err_dev: + put_device(&afu->dev); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_GPL(cxl_dev_context_init); + +struct cxl_context *cxl_get_context(struct pci_dev *dev) +{ + return dev->dev.archdata.cxl_ctx; +} +EXPORT_SYMBOL_GPL(cxl_get_context); + +struct device *cxl_get_phys_dev(struct pci_dev *dev) +{ + struct cxl_afu *afu; + + afu = cxl_pci_to_afu(dev); + + return afu->adapter->dev.parent; +} +EXPORT_SYMBOL_GPL(cxl_get_phys_dev); + +int cxl_release_context(struct cxl_context *ctx) +{ + if (ctx->status >= STARTED) + return -EBUSY; + + put_device(&ctx->afu->dev); + + cxl_context_free(ctx); + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_release_context); + +int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) +{ + if (num == 0) + num = ctx->afu->pp_irqs; + return afu_allocate_irqs(ctx, num); +} +EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs); + +void cxl_free_afu_irqs(struct cxl_context *ctx) +{ + afu_irq_name_free(ctx); + cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); +} +EXPORT_SYMBOL_GPL(cxl_free_afu_irqs); + +static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num) +{ + __u16 range; + int r; + + WARN_ON(num == 0); + + for (r = 0; r < CXL_IRQ_RANGES; r++) { + range = ctx->irqs.range[r]; + if (num < range) { + return ctx->irqs.offset[r] + num; + } + num -= range; + } + return 0; +} + +int cxl_map_afu_irq(struct cxl_context *ctx, int num, + irq_handler_t handler, void *cookie, char *name) +{ + irq_hw_number_t hwirq; + + /* + * Find interrupt we are to register. + */ + hwirq = cxl_find_afu_irq(ctx, num); + if (!hwirq) + return -ENOENT; + + return cxl_map_irq(ctx->afu->adapter, hwirq, handler, cookie, name); +} +EXPORT_SYMBOL_GPL(cxl_map_afu_irq); + +void cxl_unmap_afu_irq(struct cxl_context *ctx, int num, void *cookie) +{ + irq_hw_number_t hwirq; + unsigned int virq; + + hwirq = cxl_find_afu_irq(ctx, num); + if (!hwirq) + return; + + virq = irq_find_mapping(NULL, hwirq); + if (virq) + cxl_unmap_irq(virq, cookie); +} +EXPORT_SYMBOL_GPL(cxl_unmap_afu_irq); + +/* + * Start a context + * Code here similar to afu_ioctl_start_work(). + */ +int cxl_start_context(struct cxl_context *ctx, u64 wed, + struct task_struct *task) +{ + int rc = 0; + bool kernel = true; + + pr_devel("%s: pe: %i\n", __func__, ctx->pe); + + mutex_lock(&ctx->status_mutex); + if (ctx->status == STARTED) + goto out; /* already started */ + + if (task) { + ctx->pid = get_task_pid(task, PIDTYPE_PID); + get_pid(ctx->pid); + kernel = false; + } + + cxl_ctx_get(); + + if ((rc = cxl_attach_process(ctx, kernel, wed , 0))) { + put_pid(ctx->pid); + cxl_ctx_put(); + goto out; + } + + ctx->status = STARTED; +out: + mutex_unlock(&ctx->status_mutex); + return rc; +} +EXPORT_SYMBOL_GPL(cxl_start_context); + +int cxl_process_element(struct cxl_context *ctx) +{ + return ctx->pe; +} +EXPORT_SYMBOL_GPL(cxl_process_element); + +/* Stop a context. Returns 0 on success, otherwise -Errno */ +int cxl_stop_context(struct cxl_context *ctx) +{ + return __detach_context(ctx); +} +EXPORT_SYMBOL_GPL(cxl_stop_context); + +void cxl_set_master(struct cxl_context *ctx) +{ + ctx->master = true; + cxl_assign_psn_space(ctx); +} +EXPORT_SYMBOL_GPL(cxl_set_master); + +/* wrappers around afu_* file ops which are EXPORTED */ +int cxl_fd_open(struct inode *inode, struct file *file) +{ + return afu_open(inode, file); +} +EXPORT_SYMBOL_GPL(cxl_fd_open); +int cxl_fd_release(struct inode *inode, struct file *file) +{ + return afu_release(inode, file); +} +EXPORT_SYMBOL_GPL(cxl_fd_release); +long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + return afu_ioctl(file, cmd, arg); +} +EXPORT_SYMBOL_GPL(cxl_fd_ioctl); +int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm) +{ + return afu_mmap(file, vm); +} +EXPORT_SYMBOL_GPL(cxl_fd_mmap); +unsigned int cxl_fd_poll(struct file *file, struct poll_table_struct *poll) +{ + return afu_poll(file, poll); +} +EXPORT_SYMBOL_GPL(cxl_fd_poll); +ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count, + loff_t *off) +{ + return afu_read(file, buf, count, off); +} +EXPORT_SYMBOL_GPL(cxl_fd_read); + +#define PATCH_FOPS(NAME) if (!fops->NAME) fops->NAME = afu_fops.NAME + +/* Get a struct file and fd for a context and attach the ops */ +struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops, + int *fd) +{ + struct file *file; + int rc, flags, fdtmp; + + flags = O_RDWR | O_CLOEXEC; + + /* This code is similar to anon_inode_getfd() */ + rc = get_unused_fd_flags(flags); + if (rc < 0) + return ERR_PTR(rc); + fdtmp = rc; + + /* + * Patch the file ops. Needs to be careful that this is rentrant safe. + */ + if (fops) { + PATCH_FOPS(open); + PATCH_FOPS(poll); + PATCH_FOPS(read); + PATCH_FOPS(release); + PATCH_FOPS(unlocked_ioctl); + PATCH_FOPS(compat_ioctl); + PATCH_FOPS(mmap); + } else /* use default ops */ + fops = (struct file_operations *)&afu_fops; + + file = anon_inode_getfile("cxl", fops, ctx, flags); + if (IS_ERR(file)) + goto err_fd; + + file->f_mapping = ctx->mapping; + + *fd = fdtmp; + return file; + +err_fd: + put_unused_fd(fdtmp); + return NULL; +} +EXPORT_SYMBOL_GPL(cxl_get_fd); + +struct cxl_context *cxl_fops_get_context(struct file *file) +{ + return file->private_data; +} +EXPORT_SYMBOL_GPL(cxl_fops_get_context); + +int cxl_start_work(struct cxl_context *ctx, + struct cxl_ioctl_start_work *work) +{ + int rc; + + /* code taken from afu_ioctl_start_work */ + if (!(work->flags & CXL_START_WORK_NUM_IRQS)) + work->num_interrupts = ctx->afu->pp_irqs; + else if ((work->num_interrupts < ctx->afu->pp_irqs) || + (work->num_interrupts > ctx->afu->irqs_max)) { + return -EINVAL; + } + + rc = afu_register_irqs(ctx, work->num_interrupts); + if (rc) + return rc; + + rc = cxl_start_context(ctx, work->work_element_descriptor, current); + if (rc < 0) { + afu_release_irqs(ctx, ctx); + return rc; + } + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_start_work); + +void __iomem *cxl_psa_map(struct cxl_context *ctx) +{ + struct cxl_afu *afu = ctx->afu; + int rc; + + rc = cxl_afu_check_and_enable(afu); + if (rc) + return NULL; + + pr_devel("%s: psn_phys%llx size:%llx\n", + __func__, afu->psn_phys, afu->adapter->ps_size); + return ioremap(ctx->psn_phys, ctx->psn_size); +} +EXPORT_SYMBOL_GPL(cxl_psa_map); + +void cxl_psa_unmap(void __iomem *addr) +{ + iounmap(addr); +} +EXPORT_SYMBOL_GPL(cxl_psa_unmap); + +int cxl_afu_reset(struct cxl_context *ctx) +{ + struct cxl_afu *afu = ctx->afu; + int rc; + + rc = __cxl_afu_reset(afu); + if (rc) + return rc; + + return cxl_afu_check_and_enable(afu); +} +EXPORT_SYMBOL_GPL(cxl_afu_reset); + +void cxl_perst_reloads_same_image(struct cxl_afu *afu, + bool perst_reloads_same_image) +{ + afu->adapter->perst_same_image = perst_reloads_same_image; +} +EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image); diff --git a/kernel/drivers/misc/cxl/base.c b/kernel/drivers/misc/cxl/base.c index 0654ad836..a9f0dd325 100644 --- a/kernel/drivers/misc/cxl/base.c +++ b/kernel/drivers/misc/cxl/base.c @@ -10,7 +10,7 @@ #include <linux/module.h> #include <linux/rcupdate.h> #include <asm/errno.h> -#include <misc/cxl.h> +#include <misc/cxl-base.h> #include "cxl.h" /* protected by rcu */ diff --git a/kernel/drivers/misc/cxl/context.c b/kernel/drivers/misc/cxl/context.c index e4dc8cdf6..2faa1270d 100644 --- a/kernel/drivers/misc/cxl/context.c +++ b/kernel/drivers/misc/cxl/context.c @@ -126,6 +126,18 @@ static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (ctx->status != STARTED) { mutex_unlock(&ctx->status_mutex); pr_devel("%s: Context not started, failing problem state access\n", __func__); + if (ctx->mmio_err_ff) { + if (!ctx->ff_page) { + ctx->ff_page = alloc_page(GFP_USER); + if (!ctx->ff_page) + return VM_FAULT_OOM; + memset(page_address(ctx->ff_page), 0xff, PAGE_SIZE); + } + get_page(ctx->ff_page); + vmf->page = ctx->ff_page; + vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); + return 0; + } return VM_FAULT_SIGBUS; } @@ -145,8 +157,16 @@ static const struct vm_operations_struct cxl_mmap_vmops = { */ int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma) { + u64 start = vma->vm_pgoff << PAGE_SHIFT; u64 len = vma->vm_end - vma->vm_start; - len = min(len, ctx->psn_size); + + if (ctx->afu->current_mode == CXL_MODE_DEDICATED) { + if (start + len > ctx->afu->adapter->ps_size) + return -EINVAL; + } else { + if (start + len > ctx->psn_size) + return -EINVAL; + } if (ctx->afu->current_mode != CXL_MODE_DEDICATED) { /* make sure there is a valid per process space for this AFU */ @@ -174,7 +194,7 @@ int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma) * return until all outstanding interrupts for this context have completed. The * hardware should no longer access *ctx after this has returned. */ -static void __detach_context(struct cxl_context *ctx) +int __detach_context(struct cxl_context *ctx) { enum cxl_context_status status; @@ -183,12 +203,17 @@ static void __detach_context(struct cxl_context *ctx) ctx->status = CLOSED; mutex_unlock(&ctx->status_mutex); if (status != STARTED) - return; + return -EBUSY; - WARN_ON(cxl_detach_process(ctx)); - afu_release_irqs(ctx); + /* Only warn if we detached while the link was OK. + * If detach fails when hw is down, we don't care. + */ + WARN_ON(cxl_detach_process(ctx) && + cxl_adapter_link_ok(ctx->afu->adapter)); flush_work(&ctx->fault_work); /* Only needed for dedicated process */ - wake_up_all(&ctx->wq); + put_pid(ctx->pid); + cxl_ctx_put(); + return 0; } /* @@ -199,7 +224,14 @@ static void __detach_context(struct cxl_context *ctx) */ void cxl_context_detach(struct cxl_context *ctx) { - __detach_context(ctx); + int rc; + + rc = __detach_context(ctx); + if (rc) + return; + + afu_release_irqs(ctx, ctx); + wake_up_all(&ctx->wq); } /* @@ -216,7 +248,7 @@ void cxl_context_detach_all(struct cxl_afu *afu) * Anything done in here needs to be setup before the IDR is * created and torn down after the IDR removed */ - __detach_context(ctx); + cxl_context_detach(ctx); /* * We are force detaching - remove any active PSA mappings so @@ -232,16 +264,27 @@ void cxl_context_detach_all(struct cxl_afu *afu) mutex_unlock(&afu->contexts_lock); } -void cxl_context_free(struct cxl_context *ctx) +static void reclaim_ctx(struct rcu_head *rcu) { - mutex_lock(&ctx->afu->contexts_lock); - idr_remove(&ctx->afu->contexts_idr, ctx->pe); - mutex_unlock(&ctx->afu->contexts_lock); - synchronize_rcu(); + struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu); free_page((u64)ctx->sstp); + if (ctx->ff_page) + __free_page(ctx->ff_page); ctx->sstp = NULL; + if (ctx->kernelapi) + kfree(ctx->mapping); + + if (ctx->irq_bitmap) + kfree(ctx->irq_bitmap); - put_pid(ctx->pid); kfree(ctx); } + +void cxl_context_free(struct cxl_context *ctx) +{ + mutex_lock(&ctx->afu->contexts_lock); + idr_remove(&ctx->afu->contexts_idr, ctx->pe); + mutex_unlock(&ctx->afu->contexts_lock); + call_rcu(&ctx->rcu, reclaim_ctx); +} diff --git a/kernel/drivers/misc/cxl/cxl.h b/kernel/drivers/misc/cxl/cxl.h index a1cee4767..0cfb9c129 100644 --- a/kernel/drivers/misc/cxl/cxl.h +++ b/kernel/drivers/misc/cxl/cxl.h @@ -18,10 +18,11 @@ #include <linux/pid.h> #include <linux/io.h> #include <linux/pci.h> +#include <linux/fs.h> #include <asm/cputable.h> #include <asm/mmu.h> #include <asm/reg.h> -#include <misc/cxl.h> +#include <misc/cxl-base.h> #include <uapi/misc/cxl.h> @@ -33,7 +34,7 @@ extern uint cxl_verbose; * Bump version each time a user API change is made, whether it is * backwards compatible ot not. */ -#define CXL_API_VERSION 1 +#define CXL_API_VERSION 2 #define CXL_API_VERSION_COMPATIBLE 1 /* @@ -82,8 +83,10 @@ static const cxl_p1_reg_t CXL_PSL_AFUSEL = {0x00B0}; /* 0x00C0:7EFF Implementation dependent area */ static const cxl_p1_reg_t CXL_PSL_FIR1 = {0x0100}; static const cxl_p1_reg_t CXL_PSL_FIR2 = {0x0108}; +static const cxl_p1_reg_t CXL_PSL_Timebase = {0x0110}; static const cxl_p1_reg_t CXL_PSL_VERSION = {0x0118}; static const cxl_p1_reg_t CXL_PSL_RESLCKTO = {0x0128}; +static const cxl_p1_reg_t CXL_PSL_TB_CTLSTAT = {0x0140}; static const cxl_p1_reg_t CXL_PSL_FIR_CNTL = {0x0148}; static const cxl_p1_reg_t CXL_PSL_DSNDCTL = {0x0150}; static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158}; @@ -151,6 +154,9 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0}; #define CXL_PSL_SPAP_Size_Shift 4 #define CXL_PSL_SPAP_V 0x0000000000000001ULL +/****** CXL_PSL_Control ****************************************************/ +#define CXL_PSL_Control_tb 0x0000000000000001ULL + /****** CXL_PSL_DLCNTL *****************************************************/ #define CXL_PSL_DLCNTL_D (0x1ull << (63-28)) #define CXL_PSL_DLCNTL_C (0x1ull << (63-29)) @@ -315,8 +321,6 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0}; #define CXL_MAX_SLICES 4 #define MAX_AFU_MMIO_REGS 3 -#define CXL_MODE_DEDICATED 0x1 -#define CXL_MODE_DIRECTED 0x2 #define CXL_MODE_TIME_SLICED 0x4 #define CXL_SUPPORTED_MODES (CXL_MODE_DEDICATED | CXL_MODE_DIRECTED) @@ -362,6 +366,10 @@ struct cxl_afu { struct mutex spa_mutex; spinlock_t afu_cntl_lock; + /* AFU error buffer fields and bin attribute for sysfs */ + u64 eb_len, eb_offset; + struct bin_attribute attr_eb; + /* * Only the first part of the SPA is used for the process element * linked list. The only other part that software needs to worry about @@ -375,6 +383,9 @@ struct cxl_afu { int spa_max_procs; unsigned int psl_virq; + /* pointer to the vphb */ + struct pci_controller *phb; + int pp_irqs; int irqs_max; int num_procs; @@ -412,6 +423,9 @@ struct cxl_context { /* Used to unmap any mmaps when force detaching */ struct address_space *mapping; struct mutex mapping_lock; + struct page *ff_page; + bool mmio_err_ff; + bool kernelapi; spinlock_t sste_lock; /* Protects segment table entries */ struct cxl_sste *sstp; @@ -455,6 +469,8 @@ struct cxl_context { bool pending_irq; bool pending_fault; bool pending_afu_err; + + struct rcu_head rcu; }; struct cxl { @@ -485,6 +501,7 @@ struct cxl { bool user_image_loaded; bool perst_loads_image; bool perst_select_user; + bool perst_same_image; }; int cxl_alloc_one_irq(struct cxl *adapter); @@ -523,16 +540,33 @@ struct cxl_process_element { __be32 software_state; } __packed; +static inline bool cxl_adapter_link_ok(struct cxl *cxl) +{ + struct pci_dev *pdev; + + pdev = to_pci_dev(cxl->dev.parent); + return !pci_channel_offline(pdev); +} + static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg) { WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE)); return cxl->p1_mmio + cxl_reg_off(reg); } -#define cxl_p1_write(cxl, reg, val) \ - out_be64(_cxl_p1_addr(cxl, reg), val) -#define cxl_p1_read(cxl, reg) \ - in_be64(_cxl_p1_addr(cxl, reg)) +static inline void cxl_p1_write(struct cxl *cxl, cxl_p1_reg_t reg, u64 val) +{ + if (likely(cxl_adapter_link_ok(cxl))) + out_be64(_cxl_p1_addr(cxl, reg), val); +} + +static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg) +{ + if (likely(cxl_adapter_link_ok(cxl))) + return in_be64(_cxl_p1_addr(cxl, reg)); + else + return ~0ULL; +} static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg) { @@ -540,29 +574,62 @@ static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg return afu->p1n_mmio + cxl_reg_off(reg); } -#define cxl_p1n_write(afu, reg, val) \ - out_be64(_cxl_p1n_addr(afu, reg), val) -#define cxl_p1n_read(afu, reg) \ - in_be64(_cxl_p1n_addr(afu, reg)) +static inline void cxl_p1n_write(struct cxl_afu *afu, cxl_p1n_reg_t reg, u64 val) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + out_be64(_cxl_p1n_addr(afu, reg), val); +} + +static inline u64 cxl_p1n_read(struct cxl_afu *afu, cxl_p1n_reg_t reg) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + return in_be64(_cxl_p1n_addr(afu, reg)); + else + return ~0ULL; +} static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg) { return afu->p2n_mmio + cxl_reg_off(reg); } -#define cxl_p2n_write(afu, reg, val) \ - out_be64(_cxl_p2n_addr(afu, reg), val) -#define cxl_p2n_read(afu, reg) \ - in_be64(_cxl_p2n_addr(afu, reg)) +static inline void cxl_p2n_write(struct cxl_afu *afu, cxl_p2n_reg_t reg, u64 val) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + out_be64(_cxl_p2n_addr(afu, reg), val); +} + +static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + return in_be64(_cxl_p2n_addr(afu, reg)); + else + return ~0ULL; +} +static inline u64 cxl_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + return in_le64((afu)->afu_desc_mmio + (afu)->crs_offset + + ((cr) * (afu)->crs_len) + (off)); + else + return ~0ULL; +} -#define cxl_afu_cr_read64(afu, cr, off) \ - in_le64((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off)) -#define cxl_afu_cr_read32(afu, cr, off) \ - in_le32((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off)) +static inline u32 cxl_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + return in_le32((afu)->afu_desc_mmio + (afu)->crs_offset + + ((cr) * (afu)->crs_len) + (off)); + else + return 0xffffffff; +} u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off); u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off); +ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf, + loff_t off, size_t count); + struct cxl_calls { void (*cxl_slbia)(struct mm_struct *mm); @@ -574,6 +641,9 @@ void unregister_cxl_calls(struct cxl_calls *calls); int cxl_alloc_adapter_nr(struct cxl *adapter); void cxl_remove_adapter_nr(struct cxl *adapter); +int cxl_alloc_spa(struct cxl_afu *afu); +void cxl_release_spa(struct cxl_afu *afu); + int cxl_file_init(void); void cxl_file_exit(void); int cxl_register_adapter(struct cxl *adapter); @@ -606,7 +676,8 @@ void cxl_release_psl_err_irq(struct cxl *adapter); int cxl_register_serr_irq(struct cxl_afu *afu); void cxl_release_serr_irq(struct cxl_afu *afu); int afu_register_irqs(struct cxl_context *ctx, u32 count); -void afu_release_irqs(struct cxl_context *ctx); +void afu_release_irqs(struct cxl_context *ctx, void *cookie); +void afu_irq_name_free(struct cxl_context *ctx); irqreturn_t cxl_slice_irq_err(int irq, void *data); int cxl_debugfs_init(void); @@ -629,6 +700,10 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master, struct address_space *mapping); void cxl_context_free(struct cxl_context *ctx); int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma); +unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, + irq_handler_t handler, void *cookie, const char *name); +void cxl_unmap_irq(unsigned int virq, void *cookie); +int __detach_context(struct cxl_context *ctx); /* This matches the layout of the H_COLLECT_CA_INT_INFO retbuf */ struct cxl_irq_info { @@ -642,6 +717,7 @@ struct cxl_irq_info { u64 padding[3]; /* to match the expected retbuf size for plpar_hcall9 */ }; +void cxl_assign_psn_space(struct cxl_context *ctx); int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr); int cxl_detach_process(struct cxl_context *ctx); @@ -653,11 +729,24 @@ int cxl_check_error(struct cxl_afu *afu); int cxl_afu_slbia(struct cxl_afu *afu); int cxl_tlb_slb_invalidate(struct cxl *adapter); int cxl_afu_disable(struct cxl_afu *afu); -int cxl_afu_reset(struct cxl_afu *afu); +int __cxl_afu_reset(struct cxl_afu *afu); +int cxl_afu_check_and_enable(struct cxl_afu *afu); int cxl_psl_purge(struct cxl_afu *afu); void cxl_stop_trace(struct cxl *cxl); +int cxl_pci_vphb_add(struct cxl_afu *afu); +void cxl_pci_vphb_reconfigure(struct cxl_afu *afu); +void cxl_pci_vphb_remove(struct cxl_afu *afu); extern struct pci_driver cxl_pci_driver; +int afu_allocate_irqs(struct cxl_context *ctx, u32 count); + +int afu_open(struct inode *inode, struct file *file); +int afu_release(struct inode *inode, struct file *file); +long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int afu_mmap(struct file *file, struct vm_area_struct *vm); +unsigned int afu_poll(struct file *file, struct poll_table_struct *poll); +ssize_t afu_read(struct file *file, char __user *buf, size_t count, loff_t *off); +extern const struct file_operations afu_fops; #endif diff --git a/kernel/drivers/misc/cxl/debugfs.c b/kernel/drivers/misc/cxl/debugfs.c index 825c41258..18df6f44a 100644 --- a/kernel/drivers/misc/cxl/debugfs.c +++ b/kernel/drivers/misc/cxl/debugfs.c @@ -48,7 +48,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_io_x64, debugfs_io_u64_get, debugfs_io_u64_set, "0x static struct dentry *debugfs_create_io_x64(const char *name, umode_t mode, struct dentry *parent, u64 __iomem *value) { - return debugfs_create_file(name, mode, parent, (void *)value, &fops_io_x64); + return debugfs_create_file(name, mode, parent, (void __force *)value, &fops_io_x64); } int cxl_debugfs_adapter_add(struct cxl *adapter) diff --git a/kernel/drivers/misc/cxl/fault.c b/kernel/drivers/misc/cxl/fault.c index 5286b8b70..25a5418c5 100644 --- a/kernel/drivers/misc/cxl/fault.c +++ b/kernel/drivers/misc/cxl/fault.c @@ -172,8 +172,8 @@ void cxl_handle_fault(struct work_struct *fault_work) container_of(fault_work, struct cxl_context, fault_work); u64 dsisr = ctx->dsisr; u64 dar = ctx->dar; - struct task_struct *task; - struct mm_struct *mm; + struct task_struct *task = NULL; + struct mm_struct *mm = NULL; if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr || cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar || @@ -194,17 +194,19 @@ void cxl_handle_fault(struct work_struct *fault_work) pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. " "DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar); - if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) { - pr_devel("cxl_handle_fault unable to get task %i\n", - pid_nr(ctx->pid)); - cxl_ack_ae(ctx); - return; - } - if (!(mm = get_task_mm(task))) { - pr_devel("cxl_handle_fault unable to get mm %i\n", - pid_nr(ctx->pid)); - cxl_ack_ae(ctx); - goto out; + if (!ctx->kernel) { + if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) { + pr_devel("cxl_handle_fault unable to get task %i\n", + pid_nr(ctx->pid)); + cxl_ack_ae(ctx); + return; + } + if (!(mm = get_task_mm(task))) { + pr_devel("cxl_handle_fault unable to get mm %i\n", + pid_nr(ctx->pid)); + cxl_ack_ae(ctx); + goto out; + } } if (dsisr & CXL_PSL_DSISR_An_DS) @@ -214,9 +216,11 @@ void cxl_handle_fault(struct work_struct *fault_work) else WARN(1, "cxl_handle_fault has nothing to handle\n"); - mmput(mm); + if (mm) + mmput(mm); out: - put_task_struct(task); + if (task) + put_task_struct(task); } static void cxl_prefault_one(struct cxl_context *ctx, u64 ea) diff --git a/kernel/drivers/misc/cxl/file.c b/kernel/drivers/misc/cxl/file.c index 2364bcadb..7ccd2998b 100644 --- a/kernel/drivers/misc/cxl/file.c +++ b/kernel/drivers/misc/cxl/file.c @@ -73,6 +73,11 @@ static int __afu_open(struct inode *inode, struct file *file, bool master) if (!afu->current_mode) goto err_put_afu; + if (!cxl_adapter_link_ok(adapter)) { + rc = -EIO; + goto err_put_afu; + } + if (!(ctx = cxl_context_alloc())) { rc = -ENOMEM; goto err_put_afu; @@ -96,7 +101,8 @@ err_put_adapter: put_device(&adapter->dev); return rc; } -static int afu_open(struct inode *inode, struct file *file) + +int afu_open(struct inode *inode, struct file *file) { return __afu_open(inode, file, false); } @@ -106,7 +112,7 @@ static int afu_master_open(struct inode *inode, struct file *file) return __afu_open(inode, file, true); } -static int afu_release(struct inode *inode, struct file *file) +int afu_release(struct inode *inode, struct file *file) { struct cxl_context *ctx = file->private_data; @@ -114,9 +120,16 @@ static int afu_release(struct inode *inode, struct file *file) __func__, ctx->pe); cxl_context_detach(ctx); - mutex_lock(&ctx->mapping_lock); - ctx->mapping = NULL; - mutex_unlock(&ctx->mapping_lock); + + /* + * Delete the context's mapping pointer, unless it's created by the + * kernel API, in which case leave it so it can be freed by reclaim_ctx() + */ + if (!ctx->kernelapi) { + mutex_lock(&ctx->mapping_lock); + ctx->mapping = NULL; + mutex_unlock(&ctx->mapping_lock); + } put_device(&ctx->afu->dev); @@ -128,7 +141,6 @@ static int afu_release(struct inode *inode, struct file *file) */ cxl_context_free(ctx); - cxl_ctx_put(); return 0; } @@ -179,6 +191,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, if (work.flags & CXL_START_WORK_AMR) amr = work.amr & mfspr(SPRN_UAMOR); + ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF); + /* * We grab the PID here and not in the file open to allow for the case * where a process (master, some daemon, etc) has opened the chardev on @@ -191,7 +205,7 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, if ((rc = cxl_attach_process(ctx, false, work.work_element_descriptor, amr))) { - afu_release_irqs(ctx); + afu_release_irqs(ctx, ctx); goto out; } @@ -212,19 +226,44 @@ static long afu_ioctl_process_element(struct cxl_context *ctx, return 0; } -static long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +static long afu_ioctl_get_afu_id(struct cxl_context *ctx, + struct cxl_afu_id __user *upafuid) +{ + struct cxl_afu_id afuid = { 0 }; + + afuid.card_id = ctx->afu->adapter->adapter_num; + afuid.afu_offset = ctx->afu->slice; + afuid.afu_mode = ctx->afu->current_mode; + + /* set the flag bit in case the afu is a slave */ + if (ctx->afu->current_mode == CXL_MODE_DIRECTED && !ctx->master) + afuid.flags |= CXL_AFUID_FLAG_SLAVE; + + if (copy_to_user(upafuid, &afuid, sizeof(afuid))) + return -EFAULT; + + return 0; +} + +long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct cxl_context *ctx = file->private_data; if (ctx->status == CLOSED) return -EIO; + if (!cxl_adapter_link_ok(ctx->afu->adapter)) + return -EIO; + pr_devel("afu_ioctl\n"); switch (cmd) { case CXL_IOCTL_START_WORK: return afu_ioctl_start_work(ctx, (struct cxl_ioctl_start_work __user *)arg); case CXL_IOCTL_GET_PROCESS_ELEMENT: return afu_ioctl_process_element(ctx, (__u32 __user *)arg); + case CXL_IOCTL_GET_AFU_ID: + return afu_ioctl_get_afu_id(ctx, (struct cxl_afu_id __user *) + arg); } return -EINVAL; } @@ -235,7 +274,7 @@ static long afu_compat_ioctl(struct file *file, unsigned int cmd, return afu_ioctl(file, cmd, arg); } -static int afu_mmap(struct file *file, struct vm_area_struct *vm) +int afu_mmap(struct file *file, struct vm_area_struct *vm) { struct cxl_context *ctx = file->private_data; @@ -243,10 +282,13 @@ static int afu_mmap(struct file *file, struct vm_area_struct *vm) if (ctx->status != STARTED) return -EIO; + if (!cxl_adapter_link_ok(ctx->afu->adapter)) + return -EIO; + return cxl_context_iomap(ctx, vm); } -static unsigned int afu_poll(struct file *file, struct poll_table_struct *poll) +unsigned int afu_poll(struct file *file, struct poll_table_struct *poll) { struct cxl_context *ctx = file->private_data; int mask = 0; @@ -278,7 +320,7 @@ static inline int ctx_event_pending(struct cxl_context *ctx) ctx->pending_afu_err || (ctx->status == CLOSED)); } -static ssize_t afu_read(struct file *file, char __user *buf, size_t count, +ssize_t afu_read(struct file *file, char __user *buf, size_t count, loff_t *off) { struct cxl_context *ctx = file->private_data; @@ -287,6 +329,9 @@ static ssize_t afu_read(struct file *file, char __user *buf, size_t count, int rc; DEFINE_WAIT(wait); + if (!cxl_adapter_link_ok(ctx->afu->adapter)) + return -EIO; + if (count < CXL_READ_MIN_SIZE) return -EINVAL; @@ -297,6 +342,11 @@ static ssize_t afu_read(struct file *file, char __user *buf, size_t count, if (ctx_event_pending(ctx)) break; + if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + rc = -EIO; + goto out; + } + if (file->f_flags & O_NONBLOCK) { rc = -EAGAIN; goto out; @@ -359,7 +409,11 @@ out: return rc; } -static const struct file_operations afu_fops = { +/* + * Note: if this is updated, we need to update api.c to patch the new ones in + * too + */ +const struct file_operations afu_fops = { .owner = THIS_MODULE, .open = afu_open, .poll = afu_poll, @@ -493,7 +547,7 @@ int __init cxl_file_init(void) * If these change we really need to update API. Either change some * flags or update API version number CXL_API_VERSION. */ - BUILD_BUG_ON(CXL_API_VERSION != 1); + BUILD_BUG_ON(CXL_API_VERSION != 2); BUILD_BUG_ON(sizeof(struct cxl_ioctl_start_work) != 64); BUILD_BUG_ON(sizeof(struct cxl_event_header) != 8); BUILD_BUG_ON(sizeof(struct cxl_event_afu_interrupt) != 8); diff --git a/kernel/drivers/misc/cxl/irq.c b/kernel/drivers/misc/cxl/irq.c index c8929c526..09a406058 100644 --- a/kernel/drivers/misc/cxl/irq.c +++ b/kernel/drivers/misc/cxl/irq.c @@ -14,7 +14,7 @@ #include <linux/slab.h> #include <linux/pid.h> #include <asm/cputable.h> -#include <misc/cxl.h> +#include <misc/cxl-base.h> #include "cxl.h" #include "trace.h" @@ -30,12 +30,12 @@ static irqreturn_t handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr, u6 serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An); - dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%.16llx\n", errstat); - dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%.16llx\n", fir1); - dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%.16llx\n", fir2); - dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%.16llx\n", serr); - dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%.16llx\n", fir_slice); - dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%.16llx\n", afu_debug); + dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat); + dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1); + dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2); + dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); + dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); + dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n"); cxl_stop_trace(ctx->afu->adapter); @@ -54,10 +54,10 @@ irqreturn_t cxl_slice_irq_err(int irq, void *data) fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An); errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An); - dev_crit(&afu->dev, "PSL_SERR_An: 0x%.16llx\n", serr); - dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%.16llx\n", fir_slice); - dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%.16llx\n", errstat); - dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%.16llx\n", afu_debug); + dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); + dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); + dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%016llx\n", errstat); + dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); @@ -72,7 +72,7 @@ static irqreturn_t cxl_irq_err(int irq, void *data) WARN(1, "CXL ERROR interrupt %i\n", irq); err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE); - dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%.16llx\n", err_ivte); + dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%016llx\n", err_ivte); dev_crit(&adapter->dev, "STOPPING CXL TRACE\n"); cxl_stop_trace(adapter); @@ -80,7 +80,7 @@ static irqreturn_t cxl_irq_err(int irq, void *data) fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1); fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2); - dev_crit(&adapter->dev, "PSL_FIR1: 0x%.16llx\nPSL_FIR2: 0x%.16llx\n", fir1, fir2); + dev_crit(&adapter->dev, "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", fir1, fir2); return IRQ_HANDLED; } @@ -147,7 +147,7 @@ static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) if (dsisr & CXL_PSL_DSISR_An_PE) return handle_psl_slice_error(ctx, dsisr, irq_info->errstat); if (dsisr & CXL_PSL_DSISR_An_AE) { - pr_devel("CXL interrupt: AFU Error %.llx\n", irq_info->afu_err); + pr_devel("CXL interrupt: AFU Error 0x%016llx\n", irq_info->afu_err); if (ctx->pending_afu_err) { /* @@ -158,7 +158,7 @@ static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) * probably best that we log them somewhere: */ dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error " - "undelivered to pe %i: %.llx\n", + "undelivered to pe %i: 0x%016llx\n", ctx->pe, irq_info->afu_err); } else { spin_lock(&ctx->lock); @@ -211,8 +211,8 @@ static irqreturn_t cxl_irq_multiplexed(int irq, void *data) } rcu_read_unlock(); - WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %.16llx DAR" - " %.16llx\n(Possible AFU HW issue - was a term/remove acked" + WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %016llx DAR" + " %016llx\n(Possible AFU HW issue - was a term/remove acked" " with outstanding transactions?)\n", ph, irq_info.dsisr, irq_info.dar); return fail_psl_irq(afu, &irq_info); @@ -341,6 +341,9 @@ int cxl_register_psl_err_irq(struct cxl *adapter) void cxl_release_psl_err_irq(struct cxl *adapter) { + if (adapter->err_virq != irq_find_mapping(NULL, adapter->err_hwirq)) + return; + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); cxl_unmap_irq(adapter->err_virq, adapter); cxl_release_one_irq(adapter, adapter->err_hwirq); @@ -374,6 +377,9 @@ int cxl_register_serr_irq(struct cxl_afu *afu) void cxl_release_serr_irq(struct cxl_afu *afu) { + if (afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq)) + return; + cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000); cxl_unmap_irq(afu->serr_virq, afu); cxl_release_one_irq(afu->adapter, afu->serr_hwirq); @@ -400,6 +406,9 @@ int cxl_register_psl_irq(struct cxl_afu *afu) void cxl_release_psl_irq(struct cxl_afu *afu) { + if (afu->psl_virq != irq_find_mapping(NULL, afu->psl_hwirq)) + return; + cxl_unmap_irq(afu->psl_virq, afu); cxl_release_one_irq(afu->adapter, afu->psl_hwirq); kfree(afu->psl_irq_name); @@ -416,12 +425,14 @@ void afu_irq_name_free(struct cxl_context *ctx) } } -int afu_register_irqs(struct cxl_context *ctx, u32 count) +int afu_allocate_irqs(struct cxl_context *ctx, u32 count) { - irq_hw_number_t hwirq; int rc, r, i, j = 1; struct cxl_irq_name *irq_name; + /* Initialize the list head to hold irq names */ + INIT_LIST_HEAD(&ctx->irq_names); + if ((rc = cxl_alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, count))) return rc; @@ -433,13 +444,12 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count) ctx->irq_bitmap = kcalloc(BITS_TO_LONGS(count), sizeof(*ctx->irq_bitmap), GFP_KERNEL); if (!ctx->irq_bitmap) - return -ENOMEM; + goto out; /* * Allocate names first. If any fail, bail out before allocating * actual hardware IRQs. */ - INIT_LIST_HEAD(&ctx->irq_names); for (r = 1; r < CXL_IRQ_RANGES; r++) { for (i = 0; i < ctx->irqs.range[r]; i++) { irq_name = kmalloc(sizeof(struct cxl_irq_name), @@ -458,6 +468,19 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count) j++; } } + return 0; + +out: + cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); + afu_irq_name_free(ctx); + return -ENOMEM; +} + +static void afu_register_hwirqs(struct cxl_context *ctx) +{ + irq_hw_number_t hwirq; + struct cxl_irq_name *irq_name; + int r,i; /* We've allocated all memory now, so let's do the irq allocations */ irq_name = list_first_entry(&ctx->irq_names, struct cxl_irq_name, list); @@ -469,15 +492,21 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count) irq_name = list_next_entry(irq_name, list); } } +} - return 0; +int afu_register_irqs(struct cxl_context *ctx, u32 count) +{ + int rc; -out: - afu_irq_name_free(ctx); - return -ENOMEM; -} + rc = afu_allocate_irqs(ctx, count); + if (rc) + return rc; + + afu_register_hwirqs(ctx); + return 0; + } -void afu_release_irqs(struct cxl_context *ctx) +void afu_release_irqs(struct cxl_context *ctx, void *cookie) { irq_hw_number_t hwirq; unsigned int virq; @@ -488,10 +517,12 @@ void afu_release_irqs(struct cxl_context *ctx) for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { virq = irq_find_mapping(NULL, hwirq); if (virq) - cxl_unmap_irq(virq, ctx); + cxl_unmap_irq(virq, cookie); } } afu_irq_name_free(ctx); cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); + + ctx->irq_count = 0; } diff --git a/kernel/drivers/misc/cxl/main.c b/kernel/drivers/misc/cxl/main.c index de350dd46..9fde75ed4 100644 --- a/kernel/drivers/misc/cxl/main.c +++ b/kernel/drivers/misc/cxl/main.c @@ -20,7 +20,7 @@ #include <linux/idr.h> #include <linux/pci.h> #include <asm/cputable.h> -#include <misc/cxl.h> +#include <misc/cxl-base.h> #include "cxl.h" #include "trace.h" @@ -222,6 +222,7 @@ static void exit_cxl(void) cxl_debugfs_exit(); cxl_file_exit(); unregister_cxl_calls(&cxl_calls); + idr_destroy(&cxl_adapter_idr); } module_init(init_cxl); diff --git a/kernel/drivers/misc/cxl/native.c b/kernel/drivers/misc/cxl/native.c index 29185fc61..f40909793 100644 --- a/kernel/drivers/misc/cxl/native.c +++ b/kernel/drivers/misc/cxl/native.c @@ -15,7 +15,7 @@ #include <linux/mm.h> #include <linux/uaccess.h> #include <asm/synch.h> -#include <misc/cxl.h> +#include <misc/cxl-base.h> #include "cxl.h" #include "trace.h" @@ -41,7 +41,14 @@ static int afu_control(struct cxl_afu *afu, u64 command, rc = -EBUSY; goto out; } - pr_devel_ratelimited("AFU control... (0x%.16llx)\n", + + if (!cxl_adapter_link_ok(afu->adapter)) { + afu->enabled = enabled; + rc = -EIO; + goto out; + } + + pr_devel_ratelimited("AFU control... (0x%016llx)\n", AFU_Cntl | command); cpu_relax(); AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); @@ -73,7 +80,7 @@ int cxl_afu_disable(struct cxl_afu *afu) } /* This will disable as well as reset */ -int cxl_afu_reset(struct cxl_afu *afu) +int __cxl_afu_reset(struct cxl_afu *afu) { pr_devel("AFU reset request\n"); @@ -83,8 +90,12 @@ int cxl_afu_reset(struct cxl_afu *afu) false); } -static int afu_check_and_enable(struct cxl_afu *afu) +int cxl_afu_check_and_enable(struct cxl_afu *afu) { + if (!cxl_adapter_link_ok(afu->adapter)) { + WARN(1, "Refusing to enable afu while link down!\n"); + return -EIO; + } if (afu->enabled) return 0; return afu_enable(afu); @@ -103,6 +114,12 @@ int cxl_psl_purge(struct cxl_afu *afu) pr_devel("PSL purge request\n"); + if (!cxl_adapter_link_ok(afu->adapter)) { + dev_warn(&afu->dev, "PSL Purge called with link down, ignoring\n"); + rc = -EIO; + goto out; + } + if ((AFU_Cntl & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { WARN(1, "psl_purge request while AFU not disabled!\n"); cxl_afu_disable(afu); @@ -119,14 +136,19 @@ int cxl_psl_purge(struct cxl_afu *afu) rc = -EBUSY; goto out; } + if (!cxl_adapter_link_ok(afu->adapter)) { + rc = -EIO; + goto out; + } + dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); - pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%.16llx PSL_DSISR: 0x%.16llx\n", PSL_CNTL, dsisr); + pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%016llx PSL_DSISR: 0x%016llx\n", PSL_CNTL, dsisr); if (dsisr & CXL_PSL_DSISR_TRANS) { dar = cxl_p2n_read(afu, CXL_PSL_DAR_An); - dev_notice(&afu->dev, "PSL purge terminating pending translation, DSISR: 0x%.16llx, DAR: 0x%.16llx\n", dsisr, dar); + dev_notice(&afu->dev, "PSL purge terminating pending translation, DSISR: 0x%016llx, DAR: 0x%016llx\n", dsisr, dar); cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); } else if (dsisr) { - dev_notice(&afu->dev, "PSL purge acknowledging pending non-translation fault, DSISR: 0x%.16llx\n", dsisr); + dev_notice(&afu->dev, "PSL purge acknowledging pending non-translation fault, DSISR: 0x%016llx\n", dsisr); cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); } else { cpu_relax(); @@ -161,10 +183,8 @@ static int spa_max_procs(int spa_size) return ((spa_size / 8) - 96) / 17; } -static int alloc_spa(struct cxl_afu *afu) +int cxl_alloc_spa(struct cxl_afu *afu) { - u64 spap; - /* Work out how many pages to allocate */ afu->spa_order = 0; do { @@ -183,6 +203,13 @@ static int alloc_spa(struct cxl_afu *afu) pr_devel("spa pages: %i afu->spa_max_procs: %i afu->num_procs: %i\n", 1<<afu->spa_order, afu->spa_max_procs, afu->num_procs); + return 0; +} + +static void attach_spa(struct cxl_afu *afu) +{ + u64 spap; + afu->sw_command_status = (__be64 *)((char *)afu->spa + ((afu->spa_max_procs + 3) * 128)); @@ -191,14 +218,19 @@ static int alloc_spa(struct cxl_afu *afu) spap |= CXL_PSL_SPAP_V; pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n", afu->spa, afu->spa_max_procs, afu->sw_command_status, spap); cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap); - - return 0; } -static void release_spa(struct cxl_afu *afu) +static inline void detach_spa(struct cxl_afu *afu) { cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0); - free_pages((unsigned long) afu->spa, afu->spa_order); +} + +void cxl_release_spa(struct cxl_afu *afu) +{ + if (afu->spa) { + free_pages((unsigned long) afu->spa, afu->spa_order); + afu->spa = NULL; + } } int cxl_tlb_slb_invalidate(struct cxl *adapter) @@ -215,6 +247,8 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter) dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n"); return -EBUSY; } + if (!cxl_adapter_link_ok(adapter)) + return -EIO; cpu_relax(); } @@ -224,6 +258,8 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter) dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n"); return -EBUSY; } + if (!cxl_adapter_link_ok(adapter)) + return -EIO; cpu_relax(); } return 0; @@ -240,6 +276,11 @@ int cxl_afu_slbia(struct cxl_afu *afu) dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n"); return -EBUSY; } + /* If the adapter has gone down, we can assume that we + * will PERST it and that will invalidate everything. + */ + if (!cxl_adapter_link_ok(afu->adapter)) + return -EIO; cpu_relax(); } return 0; @@ -279,6 +320,8 @@ static void slb_invalid(struct cxl_context *ctx) cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID); while (1) { + if (!cxl_adapter_link_ok(adapter)) + break; slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA); if (!(slbia & CXL_TLB_SLB_P)) break; @@ -308,6 +351,11 @@ static int do_process_element_cmd(struct cxl_context *ctx, rc = -EBUSY; goto out; } + if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + dev_warn(&ctx->afu->dev, "WARNING: Device link down, aborting Process Element Command!\n"); + rc = -EIO; + goto out; + } state = be64_to_cpup(ctx->afu->sw_command_status); if (state == ~0ULL) { pr_err("cxl: Error adding process element to AFU\n"); @@ -355,8 +403,13 @@ static int terminate_process_element(struct cxl_context *ctx) mutex_lock(&ctx->afu->spa_mutex); pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe); - rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE, - CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T); + /* We could be asked to terminate when the hw is down. That + * should always succeed: it's not running if the hw has gone + * away and is being reset. + */ + if (cxl_adapter_link_ok(ctx->afu->adapter)) + rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE, + CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T); ctx->elem->software_state = 0; /* Remove Valid bit */ pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe); mutex_unlock(&ctx->afu->spa_mutex); @@ -369,7 +422,14 @@ static int remove_process_element(struct cxl_context *ctx) mutex_lock(&ctx->afu->spa_mutex); pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe); - if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0))) + + /* We could be asked to remove when the hw is down. Again, if + * the hw is down, the PE is gone, so we succeed. + */ + if (cxl_adapter_link_ok(ctx->afu->adapter)) + rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0); + + if (!rc) ctx->pe_inserted = false; slb_invalid(ctx); pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe); @@ -379,7 +439,7 @@ static int remove_process_element(struct cxl_context *ctx) } -static void assign_psn_space(struct cxl_context *ctx) +void cxl_assign_psn_space(struct cxl_context *ctx) { if (!ctx->afu->pp_size || ctx->master) { ctx->psn_phys = ctx->afu->psn_phys; @@ -397,15 +457,18 @@ static int activate_afu_directed(struct cxl_afu *afu) dev_info(&afu->dev, "Activating AFU directed mode\n"); - if (alloc_spa(afu)) - return -ENOMEM; + afu->num_procs = afu->max_procs_virtualised; + if (afu->spa == NULL) { + if (cxl_alloc_spa(afu)) + return -ENOMEM; + } + attach_spa(afu); cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_AFU); cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL); cxl_p1n_write(afu, CXL_PSL_ID_An, CXL_PSL_ID_An_F | CXL_PSL_ID_An_L); afu->current_mode = CXL_MODE_DIRECTED; - afu->num_procs = afu->max_procs_virtualised; if ((rc = cxl_chardev_m_afu_add(afu))) return rc; @@ -430,34 +493,46 @@ err: #define set_endian(sr) ((sr) &= ~(CXL_PSL_SR_An_LE)) #endif +static u64 calculate_sr(struct cxl_context *ctx) +{ + u64 sr = 0; + + set_endian(sr); + if (ctx->master) + sr |= CXL_PSL_SR_An_MP; + if (mfspr(SPRN_LPCR) & LPCR_TC) + sr |= CXL_PSL_SR_An_TC; + if (ctx->kernel) { + sr |= CXL_PSL_SR_An_R | (mfmsr() & MSR_SF); + sr |= CXL_PSL_SR_An_HV; + } else { + sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R; + sr &= ~(CXL_PSL_SR_An_HV); + if (!test_tsk_thread_flag(current, TIF_32BIT)) + sr |= CXL_PSL_SR_An_SF; + } + return sr; +} + static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) { - u64 sr; + u32 pid; int r, result; - assign_psn_space(ctx); + cxl_assign_psn_space(ctx); ctx->elem->ctxtime = 0; /* disable */ ctx->elem->lpid = cpu_to_be32(mfspr(SPRN_LPID)); ctx->elem->haurp = 0; /* disable */ ctx->elem->sdr = cpu_to_be64(mfspr(SPRN_SDR1)); - sr = 0; - if (ctx->master) - sr |= CXL_PSL_SR_An_MP; - if (mfspr(SPRN_LPCR) & LPCR_TC) - sr |= CXL_PSL_SR_An_TC; - /* HV=0, PR=1, R=1 for userspace - * For kernel contexts: this would need to change - */ - sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R; - set_endian(sr); - sr &= ~(CXL_PSL_SR_An_HV); - if (!test_tsk_thread_flag(current, TIF_32BIT)) - sr |= CXL_PSL_SR_An_SF; - ctx->elem->common.pid = cpu_to_be32(current->pid); + pid = current->pid; + if (ctx->kernel) + pid = 0; ctx->elem->common.tid = 0; - ctx->elem->sr = cpu_to_be64(sr); + ctx->elem->common.pid = cpu_to_be32(pid); + + ctx->elem->sr = cpu_to_be64(calculate_sr(ctx)); ctx->elem->common.csrp = 0; /* disable */ ctx->elem->common.aurp0 = 0; /* disable */ @@ -477,12 +552,10 @@ static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) ctx->elem->common.wed = cpu_to_be64(wed); /* first guy needs to enable */ - if ((result = afu_check_and_enable(ctx->afu))) + if ((result = cxl_afu_check_and_enable(ctx->afu))) return result; - add_process_element(ctx); - - return 0; + return add_process_element(ctx); } static int deactivate_afu_directed(struct cxl_afu *afu) @@ -495,12 +568,10 @@ static int deactivate_afu_directed(struct cxl_afu *afu) cxl_sysfs_afu_m_remove(afu); cxl_chardev_afu_remove(afu); - cxl_afu_reset(afu); + __cxl_afu_reset(afu); cxl_afu_disable(afu); cxl_psl_purge(afu); - release_spa(afu); - return 0; } @@ -530,20 +601,15 @@ static int activate_dedicated_process(struct cxl_afu *afu) static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr) { struct cxl_afu *afu = ctx->afu; - u64 sr; + u64 pid; int rc; - sr = 0; - set_endian(sr); - if (ctx->master) - sr |= CXL_PSL_SR_An_MP; - if (mfspr(SPRN_LPCR) & LPCR_TC) - sr |= CXL_PSL_SR_An_TC; - sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R; - if (!test_tsk_thread_flag(current, TIF_32BIT)) - sr |= CXL_PSL_SR_An_SF; - cxl_p2n_write(afu, CXL_PSL_PID_TID_An, (u64)current->pid << 32); - cxl_p1n_write(afu, CXL_PSL_SR_An, sr); + pid = (u64)current->pid << 32; + if (ctx->kernel) + pid = 0; + cxl_p2n_write(afu, CXL_PSL_PID_TID_An, pid); + + cxl_p1n_write(afu, CXL_PSL_SR_An, calculate_sr(ctx)); if ((rc = cxl_write_sstp(afu, ctx->sstp0, ctx->sstp1))) return rc; @@ -564,9 +630,9 @@ static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr) cxl_p2n_write(afu, CXL_PSL_AMR_An, amr); /* master only context for dedicated */ - assign_psn_space(ctx); + cxl_assign_psn_space(ctx); - if ((rc = cxl_afu_reset(afu))) + if ((rc = __cxl_afu_reset(afu))) return rc; cxl_p2n_write(afu, CXL_PSL_WED_An, wed); @@ -607,6 +673,11 @@ int cxl_afu_activate_mode(struct cxl_afu *afu, int mode) if (!(mode & afu->modes_supported)) return -EINVAL; + if (!cxl_adapter_link_ok(afu->adapter)) { + WARN(1, "Device link is down, refusing to activate!\n"); + return -EIO; + } + if (mode == CXL_MODE_DIRECTED) return activate_afu_directed(afu); if (mode == CXL_MODE_DEDICATED) @@ -617,6 +688,11 @@ int cxl_afu_activate_mode(struct cxl_afu *afu, int mode) int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr) { + if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + WARN(1, "Device link is down, refusing to attach process!\n"); + return -EIO; + } + ctx->kernel = kernel; if (ctx->afu->current_mode == CXL_MODE_DIRECTED) return attach_afu_directed(ctx, wed, amr); @@ -629,7 +705,7 @@ int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr) static inline int detach_process_native_dedicated(struct cxl_context *ctx) { - cxl_afu_reset(ctx->afu); + __cxl_afu_reset(ctx->afu); cxl_afu_disable(ctx->afu); cxl_psl_purge(ctx->afu); return 0; @@ -661,6 +737,12 @@ int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info) { u64 pidtid; + /* If the adapter has gone away, we can't get any meaningful + * information. + */ + if (!cxl_adapter_link_ok(afu->adapter)) + return -EIO; + info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); info->dar = cxl_p2n_read(afu, CXL_PSL_DAR_An); info->dsr = cxl_p2n_read(afu, CXL_PSL_DSR_An); @@ -677,7 +759,7 @@ static void recover_psl_err(struct cxl_afu *afu, u64 errstat) { u64 dsisr; - pr_devel("RECOVERING FROM PSL ERROR... (0x%.16llx)\n", errstat); + pr_devel("RECOVERING FROM PSL ERROR... (0x%016llx)\n", errstat); /* Clear PSL_DSISR[PE] */ dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); diff --git a/kernel/drivers/misc/cxl/pci.c b/kernel/drivers/misc/cxl/pci.c index 4f1b0bdb9..be2c8e248 100644 --- a/kernel/drivers/misc/cxl/pci.c +++ b/kernel/drivers/misc/cxl/pci.c @@ -24,6 +24,7 @@ #include <asm/io.h> #include "cxl.h" +#include <misc/cxl.h> #define CXL_PCI_VSEC_ID 0x1280 @@ -90,6 +91,7 @@ /* This works a little different than the p1/p2 register accesses to make it * easier to pull out individual fields */ #define AFUD_READ(afu, off) in_be64(afu->afu_desc_mmio + off) +#define AFUD_READ_LE(afu, off) in_le64(afu->afu_desc_mmio + off) #define EXTRACT_PPC_BIT(val, bit) (!!(val & PPC_BIT(bit))) #define EXTRACT_PPC_BITS(val, bs, be) ((val & PPC_BITMASK(bs, be)) >> PPC_BITLSHIFT(be)) @@ -132,7 +134,7 @@ u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off) return (val >> ((off & 0x3) * 8)) & 0xff; } -static DEFINE_PCI_DEVICE_TABLE(cxl_pci_tbl) = { +static const struct pci_device_id cxl_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), }, { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), }, { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x04cf), }, @@ -204,7 +206,7 @@ static void dump_cxl_config_space(struct pci_dev *dev) dev_info(&dev->dev, "p1 regs: %#llx, len: %#llx\n", p1_base(dev), p1_size(dev)); dev_info(&dev->dev, "p2 regs: %#llx, len: %#llx\n", - p1_base(dev), p2_size(dev)); + p2_base(dev), p2_size(dev)); dev_info(&dev->dev, "BAR 4/5: %#llx, len: %#llx\n", pci_resource_start(dev, 4), pci_resource_len(dev, 4)); @@ -286,7 +288,8 @@ static void dump_cxl_config_space(struct pci_dev *dev) static void dump_afu_descriptor(struct cxl_afu *afu) { - u64 val; + u64 val, afu_cr_num, afu_cr_off, afu_cr_len; + int i; #define show_reg(name, what) \ dev_info(&afu->dev, "afu desc: %30s: %#llx\n", name, what) @@ -296,6 +299,7 @@ static void dump_afu_descriptor(struct cxl_afu *afu) show_reg("num_of_processes", AFUD_NUM_PROCS(val)); show_reg("num_of_afu_CRs", AFUD_NUM_CRS(val)); show_reg("req_prog_mode", val & 0xffffULL); + afu_cr_num = AFUD_NUM_CRS(val); val = AFUD_READ(afu, 0x8); show_reg("Reserved", val); @@ -307,8 +311,10 @@ static void dump_afu_descriptor(struct cxl_afu *afu) val = AFUD_READ_CR(afu); show_reg("Reserved", (val >> (63-7)) & 0xff); show_reg("AFU_CR_len", AFUD_CR_LEN(val)); + afu_cr_len = AFUD_CR_LEN(val) * 256; val = AFUD_READ_CR_OFF(afu); + afu_cr_off = val; show_reg("AFU_CR_offset", val); val = AFUD_READ_PPPSA(afu); @@ -325,6 +331,11 @@ static void dump_afu_descriptor(struct cxl_afu *afu) val = AFUD_READ_EB_OFF(afu); show_reg("AFU_EB_offset", val); + for (i = 0; i < afu_cr_num; i++) { + val = AFUD_READ_LE(afu, afu_cr_off + i * afu_cr_len); + show_reg("CR Vendor", val & 0xffff); + show_reg("CR Device", (val >> 16) & 0xffff); + } #undef show_reg } @@ -359,6 +370,55 @@ static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev return 0; } +#define TBSYNC_CNT(n) (((u64)n & 0x7) << (63-6)) +#define _2048_250MHZ_CYCLES 1 + +static int cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev) +{ + u64 psl_tb; + int delta; + unsigned int retry = 0; + struct device_node *np; + + if (!(np = pnv_pci_get_phb_node(dev))) + return -ENODEV; + + /* Do not fail when CAPP timebase sync is not supported by OPAL */ + of_node_get(np); + if (! of_get_property(np, "ibm,capp-timebase-sync", NULL)) { + of_node_put(np); + pr_err("PSL: Timebase sync: OPAL support missing\n"); + return 0; + } + of_node_put(np); + + /* + * Setup PSL Timebase Control and Status register + * with the recommended Timebase Sync Count value + */ + cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT, + TBSYNC_CNT(2 * _2048_250MHZ_CYCLES)); + + /* Enable PSL Timebase */ + cxl_p1_write(adapter, CXL_PSL_Control, 0x0000000000000000); + cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb); + + /* Wait until CORE TB and PSL TB difference <= 16usecs */ + do { + msleep(1); + if (retry++ > 5) { + pr_err("PSL: Timebase sync: giving up!\n"); + return -EIO; + } + psl_tb = cxl_p1_read(adapter, CXL_PSL_Timebase); + delta = mftb() - psl_tb; + if (delta < 0) + delta = -delta; + } while (tb_to_ns(delta) > 16000); + + return 0; +} + static int init_implementation_afu_regs(struct cxl_afu *afu) { /* read/write masks for this slice */ @@ -529,10 +589,18 @@ err: static void cxl_unmap_slice_regs(struct cxl_afu *afu) { - if (afu->p1n_mmio) + if (afu->p2n_mmio) { iounmap(afu->p2n_mmio); - if (afu->p1n_mmio) + afu->p2n_mmio = NULL; + } + if (afu->p1n_mmio) { iounmap(afu->p1n_mmio); + afu->p1n_mmio = NULL; + } + if (afu->afu_desc_mmio) { + iounmap(afu->afu_desc_mmio); + afu->afu_desc_mmio = NULL; + } } static void cxl_release_afu(struct device *dev) @@ -541,6 +609,9 @@ static void cxl_release_afu(struct device *dev) pr_devel("cxl_release_afu\n"); + idr_destroy(&afu->contexts_idr); + cxl_release_spa(afu); + kfree(afu); } @@ -593,6 +664,22 @@ static int cxl_read_afu_descriptor(struct cxl_afu *afu) afu->crs_len = AFUD_CR_LEN(val) * 256; afu->crs_offset = AFUD_READ_CR_OFF(afu); + + /* eb_len is in multiple of 4K */ + afu->eb_len = AFUD_EB_LEN(AFUD_READ_EB(afu)) * 4096; + afu->eb_offset = AFUD_READ_EB_OFF(afu); + + /* eb_off is 4K aligned so lower 12 bits are always zero */ + if (EXTRACT_PPC_BITS(afu->eb_offset, 0, 11) != 0) { + dev_warn(&afu->dev, + "Invalid AFU error buffer offset %Lx\n", + afu->eb_offset); + dev_info(&afu->dev, + "Ignoring AFU error buffer in the descriptor\n"); + /* indicate that no afu buffer exists */ + afu->eb_len = 0; + } + return 0; } @@ -630,8 +717,8 @@ static int sanitise_afu_regs(struct cxl_afu *afu) */ reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An); if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { - dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#.16llx\n", reg); - if (cxl_afu_reset(afu)) + dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#016llx\n", reg); + if (__cxl_afu_reset(afu)) return -EIO; if (cxl_afu_disable(afu)) return -EIO; @@ -651,7 +738,7 @@ static int sanitise_afu_regs(struct cxl_afu *afu) cxl_p2n_write(afu, CXL_SSTP0_An, 0x0000000000000000); reg = cxl_p2n_read(afu, CXL_PSL_DSISR_An); if (reg) { - dev_warn(&afu->dev, "AFU had pending DSISR: %#.16llx\n", reg); + dev_warn(&afu->dev, "AFU had pending DSISR: %#016llx\n", reg); if (reg & CXL_PSL_DSISR_TRANS) cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); else @@ -660,57 +747,126 @@ static int sanitise_afu_regs(struct cxl_afu *afu) reg = cxl_p1n_read(afu, CXL_PSL_SERR_An); if (reg) { if (reg & ~0xffff) - dev_warn(&afu->dev, "AFU had pending SERR: %#.16llx\n", reg); + dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg); cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff); } reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); if (reg) { - dev_warn(&afu->dev, "AFU had pending error status: %#.16llx\n", reg); + dev_warn(&afu->dev, "AFU had pending error status: %#016llx\n", reg); cxl_p2n_write(afu, CXL_PSL_ErrStat_An, reg); } return 0; } -static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) +#define ERR_BUFF_MAX_COPY_SIZE PAGE_SIZE +/* + * afu_eb_read: + * Called from sysfs and reads the afu error info buffer. The h/w only supports + * 4/8 bytes aligned access. So in case the requested offset/count arent 8 byte + * aligned the function uses a bounce buffer which can be max PAGE_SIZE. + */ +ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf, + loff_t off, size_t count) { - struct cxl_afu *afu; - bool free = true; - int rc; + loff_t aligned_start, aligned_end; + size_t aligned_length; + void *tbuf; + const void __iomem *ebuf = afu->afu_desc_mmio + afu->eb_offset; + + if (count == 0 || off < 0 || (size_t)off >= afu->eb_len) + return 0; + + /* calculate aligned read window */ + count = min((size_t)(afu->eb_len - off), count); + aligned_start = round_down(off, 8); + aligned_end = round_up(off + count, 8); + aligned_length = aligned_end - aligned_start; + + /* max we can copy in one read is PAGE_SIZE */ + if (aligned_length > ERR_BUFF_MAX_COPY_SIZE) { + aligned_length = ERR_BUFF_MAX_COPY_SIZE; + count = ERR_BUFF_MAX_COPY_SIZE - (off & 0x7); + } - if (!(afu = cxl_alloc_afu(adapter, slice))) + /* use bounce buffer for copy */ + tbuf = (void *)__get_free_page(GFP_TEMPORARY); + if (!tbuf) return -ENOMEM; - if ((rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice))) - goto err1; + /* perform aligned read from the mmio region */ + memcpy_fromio(tbuf, ebuf + aligned_start, aligned_length); + memcpy(buf, tbuf + (off & 0x7), count); + + free_page((unsigned long)tbuf); + + return count; +} + +static int cxl_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) +{ + int rc; if ((rc = cxl_map_slice_regs(afu, adapter, dev))) - goto err1; + return rc; if ((rc = sanitise_afu_regs(afu))) - goto err2; + goto err1; /* We need to reset the AFU before we can read the AFU descriptor */ - if ((rc = cxl_afu_reset(afu))) - goto err2; + if ((rc = __cxl_afu_reset(afu))) + goto err1; if (cxl_verbose) dump_afu_descriptor(afu); if ((rc = cxl_read_afu_descriptor(afu))) - goto err2; + goto err1; if ((rc = cxl_afu_descriptor_looks_ok(afu))) - goto err2; + goto err1; if ((rc = init_implementation_afu_regs(afu))) - goto err2; + goto err1; if ((rc = cxl_register_serr_irq(afu))) - goto err2; + goto err1; if ((rc = cxl_register_psl_irq(afu))) - goto err3; + goto err2; + + return 0; + +err2: + cxl_release_serr_irq(afu); +err1: + cxl_unmap_slice_regs(afu); + return rc; +} + +static void cxl_deconfigure_afu(struct cxl_afu *afu) +{ + cxl_release_psl_irq(afu); + cxl_release_serr_irq(afu); + cxl_unmap_slice_regs(afu); +} + +static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) +{ + struct cxl_afu *afu; + int rc; + + afu = cxl_alloc_afu(adapter, slice); + if (!afu) + return -ENOMEM; + + rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice); + if (rc) + goto err_free; + + rc = cxl_configure_afu(afu, adapter, dev); + if (rc) + goto err_free; /* Don't care if this fails */ cxl_debugfs_afu_add(afu); @@ -725,29 +881,23 @@ static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) if ((rc = cxl_sysfs_afu_add(afu))) goto err_put1; - - if ((rc = cxl_afu_select_best_mode(afu))) - goto err_put2; - adapter->afu[afu->slice] = afu; + if ((rc = cxl_pci_vphb_add(afu))) + dev_info(&afu->dev, "Can't register vPHB\n"); + return 0; -err_put2: - cxl_sysfs_afu_remove(afu); err_put1: - device_unregister(&afu->dev); - free = false; + cxl_deconfigure_afu(afu); cxl_debugfs_afu_remove(afu); - cxl_release_psl_irq(afu); -err3: - cxl_release_serr_irq(afu); -err2: - cxl_unmap_slice_regs(afu); -err1: - if (free) - kfree(afu); + device_unregister(&afu->dev); return rc; + +err_free: + kfree(afu); + return rc; + } static void cxl_remove_afu(struct cxl_afu *afu) @@ -767,10 +917,7 @@ static void cxl_remove_afu(struct cxl_afu *afu) cxl_context_detach_all(afu); cxl_afu_deactivate_mode(afu); - cxl_release_psl_irq(afu); - cxl_release_serr_irq(afu); - cxl_unmap_slice_regs(afu); - + cxl_deconfigure_afu(afu); device_unregister(&afu->dev); } @@ -779,6 +926,12 @@ int cxl_reset(struct cxl *adapter) struct pci_dev *dev = to_pci_dev(adapter->dev.parent); int rc; + if (adapter->perst_same_image) { + dev_warn(&dev->dev, + "cxl: refusing to reset/reflash when perst_reloads_same_image is set.\n"); + return -EINVAL; + } + dev_info(&dev->dev, "CXL reset\n"); /* pcie_warm_reset requests a fundamental pci reset which includes a @@ -799,7 +952,7 @@ static int cxl_map_adapter_regs(struct cxl *adapter, struct pci_dev *dev) if (pci_request_region(dev, 0, "priv 1 regs")) goto err2; - pr_devel("cxl_map_adapter_regs: p1: %#.16llx %#llx, p2: %#.16llx %#llx", + pr_devel("cxl_map_adapter_regs: p1: %#016llx %#llx, p2: %#016llx %#llx", p1_base(dev), p1_size(dev), p2_base(dev), p2_size(dev)); if (!(adapter->p1_mmio = ioremap(p1_base(dev), p1_size(dev)))) @@ -823,10 +976,16 @@ err1: static void cxl_unmap_adapter_regs(struct cxl *adapter) { - if (adapter->p1_mmio) + if (adapter->p1_mmio) { iounmap(adapter->p1_mmio); - if (adapter->p2_mmio) + adapter->p1_mmio = NULL; + pci_release_region(to_pci_dev(adapter->dev.parent), 2); + } + if (adapter->p2_mmio) { iounmap(adapter->p2_mmio); + adapter->p2_mmio = NULL; + pci_release_region(to_pci_dev(adapter->dev.parent), 0); + } } static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) @@ -838,13 +997,13 @@ static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) u8 image_state; if (!(vsec = find_cxl_vsec(dev))) { - dev_err(&adapter->dev, "ABORTING: CXL VSEC not found!\n"); + dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); return -ENODEV; } CXL_READ_VSEC_LENGTH(dev, vsec, &vseclen); if (vseclen < CXL_VSEC_MIN_SIZE) { - pr_err("ABORTING: CXL VSEC too short\n"); + dev_err(&dev->dev, "ABORTING: CXL VSEC too short\n"); return -EINVAL; } @@ -855,7 +1014,6 @@ static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) CXL_READ_VSEC_BASE_IMAGE(dev, vsec, &adapter->base_image); CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state); adapter->user_image_loaded = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED); - adapter->perst_loads_image = true; adapter->perst_select_user = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED); CXL_READ_VSEC_NAFUS(dev, vsec, &adapter->slices); @@ -877,30 +1035,56 @@ static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) return 0; } +/* + * Workaround a PCIe Host Bridge defect on some cards, that can cause + * malformed Transaction Layer Packet (TLP) errors to be erroneously + * reported. Mask this error in the Uncorrectable Error Mask Register. + * + * The upper nibble of the PSL revision is used to distinguish between + * different cards. The affected ones have it set to 0. + */ +static void cxl_fixup_malformed_tlp(struct cxl *adapter, struct pci_dev *dev) +{ + int aer; + u32 data; + + if (adapter->psl_rev & 0xf000) + return; + if (!(aer = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))) + return; + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &data); + if (data & PCI_ERR_UNC_MALF_TLP) + if (data & PCI_ERR_UNC_INTN) + return; + data |= PCI_ERR_UNC_MALF_TLP; + data |= PCI_ERR_UNC_INTN; + pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, data); +} + static int cxl_vsec_looks_ok(struct cxl *adapter, struct pci_dev *dev) { if (adapter->vsec_status & CXL_STATUS_SECOND_PORT) return -EBUSY; if (adapter->vsec_status & CXL_UNSUPPORTED_FEATURES) { - dev_err(&adapter->dev, "ABORTING: CXL requires unsupported features\n"); + dev_err(&dev->dev, "ABORTING: CXL requires unsupported features\n"); return -EINVAL; } if (!adapter->slices) { /* Once we support dynamic reprogramming we can use the card if * it supports loadable AFUs */ - dev_err(&adapter->dev, "ABORTING: Device has no AFUs\n"); + dev_err(&dev->dev, "ABORTING: Device has no AFUs\n"); return -EINVAL; } if (!adapter->afu_desc_off || !adapter->afu_desc_size) { - dev_err(&adapter->dev, "ABORTING: VSEC shows no AFU descriptors\n"); + dev_err(&dev->dev, "ABORTING: VSEC shows no AFU descriptors\n"); return -EINVAL; } if (adapter->ps_size > p2_size(dev) - adapter->ps_off) { - dev_err(&adapter->dev, "ABORTING: Problem state size larger than " + dev_err(&dev->dev, "ABORTING: Problem state size larger than " "available in BAR2: 0x%llx > 0x%llx\n", adapter->ps_size, p2_size(dev) - adapter->ps_off); return -EINVAL; @@ -915,78 +1099,140 @@ static void cxl_release_adapter(struct device *dev) pr_devel("cxl_release_adapter\n"); + cxl_remove_adapter_nr(adapter); + kfree(adapter); } -static struct cxl *cxl_alloc_adapter(struct pci_dev *dev) +static struct cxl *cxl_alloc_adapter(void) { struct cxl *adapter; if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL))) return NULL; - adapter->dev.parent = &dev->dev; - adapter->dev.release = cxl_release_adapter; - pci_set_drvdata(dev, adapter); spin_lock_init(&adapter->afu_list_lock); + if (cxl_alloc_adapter_nr(adapter)) + goto err1; + + if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num)) + goto err2; + return adapter; + +err2: + cxl_remove_adapter_nr(adapter); +err1: + kfree(adapter); + return NULL; } +#define CXL_PSL_ErrIVTE_tberror (0x1ull << (63-31)) + static int sanitise_adapter_regs(struct cxl *adapter) { - cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); + /* Clear PSL tberror bit by writing 1 to it */ + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, CXL_PSL_ErrIVTE_tberror); return cxl_tlb_slb_invalidate(adapter); } -static struct cxl *cxl_init_adapter(struct pci_dev *dev) +/* This should contain *only* operations that can safely be done in + * both creation and recovery. + */ +static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) { - struct cxl *adapter; - bool free = true; int rc; + adapter->dev.parent = &dev->dev; + adapter->dev.release = cxl_release_adapter; + pci_set_drvdata(dev, adapter); - if (!(adapter = cxl_alloc_adapter(dev))) - return ERR_PTR(-ENOMEM); + rc = pci_enable_device(dev); + if (rc) { + dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc); + return rc; + } - if ((rc = switch_card_to_cxl(dev))) - goto err1; + if ((rc = cxl_read_vsec(adapter, dev))) + return rc; - if ((rc = cxl_alloc_adapter_nr(adapter))) - goto err1; + if ((rc = cxl_vsec_looks_ok(adapter, dev))) + return rc; - if ((rc = dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))) - goto err2; + cxl_fixup_malformed_tlp(adapter, dev); - if ((rc = cxl_read_vsec(adapter, dev))) - goto err2; + if ((rc = setup_cxl_bars(dev))) + return rc; - if ((rc = cxl_vsec_looks_ok(adapter, dev))) - goto err2; + if ((rc = switch_card_to_cxl(dev))) + return rc; if ((rc = cxl_update_image_control(adapter))) - goto err2; + return rc; if ((rc = cxl_map_adapter_regs(adapter, dev))) - goto err2; + return rc; if ((rc = sanitise_adapter_regs(adapter))) - goto err2; + goto err; if ((rc = init_implementation_adapter_regs(adapter, dev))) - goto err3; + goto err; if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_CAPI))) - goto err3; + goto err; /* If recovery happened, the last step is to turn on snooping. * In the non-recovery case this has no effect */ - if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON))) { - goto err3; - } + if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON))) + goto err; + + if ((rc = cxl_setup_psl_timebase(adapter, dev))) + goto err; if ((rc = cxl_register_psl_err_irq(adapter))) - goto err3; + goto err; + + return 0; + +err: + cxl_unmap_adapter_regs(adapter); + return rc; + +} + +static void cxl_deconfigure_adapter(struct cxl *adapter) +{ + struct pci_dev *pdev = to_pci_dev(adapter->dev.parent); + + cxl_release_psl_err_irq(adapter); + cxl_unmap_adapter_regs(adapter); + + pci_disable_device(pdev); +} + +static struct cxl *cxl_init_adapter(struct pci_dev *dev) +{ + struct cxl *adapter; + int rc; + + adapter = cxl_alloc_adapter(); + if (!adapter) + return ERR_PTR(-ENOMEM); + + /* Set defaults for parameters which need to persist over + * configure/reconfigure + */ + adapter->perst_loads_image = true; + adapter->perst_same_image = false; + + rc = cxl_configure_adapter(adapter, dev); + if (rc) { + pci_disable_device(dev); + cxl_release_adapter(&adapter->dev); + return ERR_PTR(rc); + } /* Don't care if this one fails: */ cxl_debugfs_adapter_add(adapter); @@ -1004,37 +1250,25 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev) return adapter; err_put1: - device_unregister(&adapter->dev); - free = false; + /* This should mirror cxl_remove_adapter, except without the + * sysfs parts + */ cxl_debugfs_adapter_remove(adapter); - cxl_release_psl_err_irq(adapter); -err3: - cxl_unmap_adapter_regs(adapter); -err2: - cxl_remove_adapter_nr(adapter); -err1: - if (free) - kfree(adapter); + cxl_deconfigure_adapter(adapter); + device_unregister(&adapter->dev); return ERR_PTR(rc); } static void cxl_remove_adapter(struct cxl *adapter) { - struct pci_dev *pdev = to_pci_dev(adapter->dev.parent); - - pr_devel("cxl_release_adapter\n"); + pr_devel("cxl_remove_adapter\n"); cxl_sysfs_adapter_remove(adapter); cxl_debugfs_adapter_remove(adapter); - cxl_release_psl_err_irq(adapter); - cxl_unmap_adapter_regs(adapter); - cxl_remove_adapter_nr(adapter); - device_unregister(&adapter->dev); + cxl_deconfigure_adapter(adapter); - pci_release_region(pdev, 0); - pci_release_region(pdev, 2); - pci_disable_device(pdev); + device_unregister(&adapter->dev); } static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) @@ -1046,14 +1280,6 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) if (cxl_verbose) dump_cxl_config_space(dev); - if ((rc = setup_cxl_bars(dev))) - return rc; - - if ((rc = pci_enable_device(dev))) { - dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc); - return rc; - } - adapter = cxl_init_adapter(dev); if (IS_ERR(adapter)) { dev_err(&dev->dev, "cxl_init_adapter failed: %li\n", PTR_ERR(adapter)); @@ -1061,8 +1287,14 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) } for (slice = 0; slice < adapter->slices; slice++) { - if ((rc = cxl_init_afu(adapter, slice, dev))) + if ((rc = cxl_init_afu(adapter, slice, dev))) { dev_err(&dev->dev, "AFU %i failed to initialise: %i\n", slice, rc); + continue; + } + + rc = cxl_afu_select_best_mode(adapter->afu[slice]); + if (rc) + dev_err(&dev->dev, "AFU %i failed to start: %i\n", slice, rc); } return 0; @@ -1071,22 +1303,277 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) static void cxl_remove(struct pci_dev *dev) { struct cxl *adapter = pci_get_drvdata(dev); - int afu; - - dev_warn(&dev->dev, "pci remove\n"); + struct cxl_afu *afu; + int i; /* * Lock to prevent someone grabbing a ref through the adapter list as * we are removing it */ - for (afu = 0; afu < adapter->slices; afu++) - cxl_remove_afu(adapter->afu[afu]); + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + cxl_pci_vphb_remove(afu); + cxl_remove_afu(afu); + } cxl_remove_adapter(adapter); } +static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu, + pci_channel_state_t state) +{ + struct pci_dev *afu_dev; + pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET; + pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET; + + /* There should only be one entry, but go through the list + * anyway + */ + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { + if (!afu_dev->driver) + continue; + + afu_dev->error_state = state; + + if (afu_dev->driver->err_handler) + afu_result = afu_dev->driver->err_handler->error_detected(afu_dev, + state); + /* Disconnect trumps all, NONE trumps NEED_RESET */ + if (afu_result == PCI_ERS_RESULT_DISCONNECT) + result = PCI_ERS_RESULT_DISCONNECT; + else if ((afu_result == PCI_ERS_RESULT_NONE) && + (result == PCI_ERS_RESULT_NEED_RESET)) + result = PCI_ERS_RESULT_NONE; + } + return result; +} + +static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct cxl *adapter = pci_get_drvdata(pdev); + struct cxl_afu *afu; + pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET; + int i; + + /* At this point, we could still have an interrupt pending. + * Let's try to get them out of the way before they do + * anything we don't like. + */ + schedule(); + + /* If we're permanently dead, give up. */ + if (state == pci_channel_io_perm_failure) { + /* Tell the AFU drivers; but we don't care what they + * say, we're going away. + */ + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + cxl_vphb_error_detected(afu, state); + } + return PCI_ERS_RESULT_DISCONNECT; + } + + /* Are we reflashing? + * + * If we reflash, we could come back as something entirely + * different, including a non-CAPI card. As such, by default + * we don't participate in the process. We'll be unbound and + * the slot re-probed. (TODO: check EEH doesn't blindly rebind + * us!) + * + * However, this isn't the entire story: for reliablity + * reasons, we usually want to reflash the FPGA on PERST in + * order to get back to a more reliable known-good state. + * + * This causes us a bit of a problem: if we reflash we can't + * trust that we'll come back the same - we could have a new + * image and been PERSTed in order to load that + * image. However, most of the time we actually *will* come + * back the same - for example a regular EEH event. + * + * Therefore, we allow the user to assert that the image is + * indeed the same and that we should continue on into EEH + * anyway. + */ + if (adapter->perst_loads_image && !adapter->perst_same_image) { + /* TODO take the PHB out of CXL mode */ + dev_info(&pdev->dev, "reflashing, so opting out of EEH!\n"); + return PCI_ERS_RESULT_NONE; + } + + /* + * At this point, we want to try to recover. We'll always + * need a complete slot reset: we don't trust any other reset. + * + * Now, we go through each AFU: + * - We send the driver, if bound, an error_detected callback. + * We expect it to clean up, but it can also tell us to give + * up and permanently detach the card. To simplify things, if + * any bound AFU driver doesn't support EEH, we give up on EEH. + * + * - We detach all contexts associated with the AFU. This + * does not free them, but puts them into a CLOSED state + * which causes any the associated files to return useful + * errors to userland. It also unmaps, but does not free, + * any IRQs. + * + * - We clean up our side: releasing and unmapping resources we hold + * so we can wire them up again when the hardware comes back up. + * + * Driver authors should note: + * + * - Any contexts you create in your kernel driver (except + * those associated with anonymous file descriptors) are + * your responsibility to free and recreate. Likewise with + * any attached resources. + * + * - We will take responsibility for re-initialising the + * device context (the one set up for you in + * cxl_pci_enable_device_hook and accessed through + * cxl_get_context). If you've attached IRQs or other + * resources to it, they remains yours to free. + * + * You can call the same functions to release resources as you + * normally would: we make sure that these functions continue + * to work when the hardware is down. + * + * Two examples: + * + * 1) If you normally free all your resources at the end of + * each request, or if you use anonymous FDs, your + * error_detected callback can simply set a flag to tell + * your driver not to start any new calls. You can then + * clear the flag in the resume callback. + * + * 2) If you normally allocate your resources on startup: + * * Set a flag in error_detected as above. + * * Let CXL detach your contexts. + * * In slot_reset, free the old resources and allocate new ones. + * * In resume, clear the flag to allow things to start. + */ + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + + result = cxl_vphb_error_detected(afu, state); + + /* Only continue if everyone agrees on NEED_RESET */ + if (result != PCI_ERS_RESULT_NEED_RESET) + return result; + + cxl_context_detach_all(afu); + cxl_afu_deactivate_mode(afu); + cxl_deconfigure_afu(afu); + } + cxl_deconfigure_adapter(adapter); + + return result; +} + +static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) +{ + struct cxl *adapter = pci_get_drvdata(pdev); + struct cxl_afu *afu; + struct cxl_context *ctx; + struct pci_dev *afu_dev; + pci_ers_result_t afu_result = PCI_ERS_RESULT_RECOVERED; + pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED; + int i; + + if (cxl_configure_adapter(adapter, pdev)) + goto err; + + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + + if (cxl_configure_afu(afu, adapter, pdev)) + goto err; + + if (cxl_afu_select_best_mode(afu)) + goto err; + + cxl_pci_vphb_reconfigure(afu); + + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { + /* Reset the device context. + * TODO: make this less disruptive + */ + ctx = cxl_get_context(afu_dev); + + if (ctx && cxl_release_context(ctx)) + goto err; + + ctx = cxl_dev_context_init(afu_dev); + if (!ctx) + goto err; + + afu_dev->dev.archdata.cxl_ctx = ctx; + + if (cxl_afu_check_and_enable(afu)) + goto err; + + afu_dev->error_state = pci_channel_io_normal; + + /* If there's a driver attached, allow it to + * chime in on recovery. Drivers should check + * if everything has come back OK, but + * shouldn't start new work until we call + * their resume function. + */ + if (!afu_dev->driver) + continue; + + if (afu_dev->driver->err_handler && + afu_dev->driver->err_handler->slot_reset) + afu_result = afu_dev->driver->err_handler->slot_reset(afu_dev); + + if (afu_result == PCI_ERS_RESULT_DISCONNECT) + result = PCI_ERS_RESULT_DISCONNECT; + } + } + return result; + +err: + /* All the bits that happen in both error_detected and cxl_remove + * should be idempotent, so we don't need to worry about leaving a mix + * of unconfigured and reconfigured resources. + */ + dev_err(&pdev->dev, "EEH recovery failed. Asking to be disconnected.\n"); + return PCI_ERS_RESULT_DISCONNECT; +} + +static void cxl_pci_resume(struct pci_dev *pdev) +{ + struct cxl *adapter = pci_get_drvdata(pdev); + struct cxl_afu *afu; + struct pci_dev *afu_dev; + int i; + + /* Everything is back now. Drivers should restart work now. + * This is not the place to be checking if everything came back up + * properly, because there's no return value: do that in slot_reset. + */ + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { + if (afu_dev->driver && afu_dev->driver->err_handler && + afu_dev->driver->err_handler->resume) + afu_dev->driver->err_handler->resume(afu_dev); + } + } +} + +static const struct pci_error_handlers cxl_err_handler = { + .error_detected = cxl_pci_error_detected, + .slot_reset = cxl_pci_slot_reset, + .resume = cxl_pci_resume, +}; + struct pci_driver cxl_pci_driver = { .name = "cxl-pci", .id_table = cxl_pci_tbl, .probe = cxl_probe, .remove = cxl_remove, + .shutdown = cxl_remove, + .err_handler = &cxl_err_handler, }; diff --git a/kernel/drivers/misc/cxl/sysfs.c b/kernel/drivers/misc/cxl/sysfs.c index d0c38c7bc..02006f710 100644 --- a/kernel/drivers/misc/cxl/sysfs.c +++ b/kernel/drivers/misc/cxl/sysfs.c @@ -112,12 +112,38 @@ static ssize_t load_image_on_perst_store(struct device *device, return count; } +static ssize_t perst_reloads_same_image_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->perst_same_image); +} + +static ssize_t perst_reloads_same_image_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl *adapter = to_cxl_adapter(device); + int rc; + int val; + + rc = sscanf(buf, "%i", &val); + if ((rc != 1) || !(val == 1 || val == 0)) + return -EINVAL; + + adapter->perst_same_image = (val == 1 ? true : false); + return count; +} + static struct device_attribute adapter_attrs[] = { __ATTR_RO(caia_version), __ATTR_RO(psl_revision), __ATTR_RO(base_image), __ATTR_RO(image_loaded), __ATTR_RW(load_image_on_perst), + __ATTR_RW(perst_reloads_same_image), __ATTR(reset, S_IWUSR, NULL, reset_adapter_store), }; @@ -185,7 +211,7 @@ static ssize_t reset_store_afu(struct device *device, goto err; } - if ((rc = cxl_afu_reset(afu))) + if ((rc = __cxl_afu_reset(afu))) goto err; rc = count; @@ -356,6 +382,16 @@ static ssize_t api_version_compatible_show(struct device *device, return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION_COMPATIBLE); } +static ssize_t afu_eb_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + struct cxl_afu *afu = to_cxl_afu(container_of(kobj, + struct device, kobj)); + + return cxl_afu_read_err_buffer(afu, buf, off, count); +} + static struct device_attribute afu_attrs[] = { __ATTR_RO(mmio_size), __ATTR_RO(irqs_min), @@ -433,12 +469,7 @@ static ssize_t afu_read_config(struct file *filp, struct kobject *kobj, struct afu_config_record *cr = to_cr(kobj); struct cxl_afu *afu = to_cxl_afu(container_of(kobj->parent, struct device, kobj)); - u64 i, j, val, size = afu->crs_len; - - if (off > size) - return 0; - if (off + count > size) - count = size - off; + u64 i, j, val; for (i = 0; i < count;) { val = cxl_afu_cr_read64(afu, cr->cr, off & ~0x7); @@ -534,6 +565,10 @@ void cxl_sysfs_afu_remove(struct cxl_afu *afu) struct afu_config_record *cr, *tmp; int i; + /* remove the err buffer bin attribute */ + if (afu->eb_len) + device_remove_bin_file(&afu->dev, &afu->attr_eb); + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) device_remove_file(&afu->dev, &afu_attrs[i]); @@ -555,6 +590,24 @@ int cxl_sysfs_afu_add(struct cxl_afu *afu) goto err; } + /* conditionally create the add the binary file for error info buffer */ + if (afu->eb_len) { + sysfs_attr_init(&afu->attr_eb.attr); + + afu->attr_eb.attr.name = "afu_err_buff"; + afu->attr_eb.attr.mode = S_IRUGO; + afu->attr_eb.size = afu->eb_len; + afu->attr_eb.read = afu_eb_read; + + rc = device_create_bin_file(&afu->dev, &afu->attr_eb); + if (rc) { + dev_err(&afu->dev, + "Unable to create eb attr for the afu. Err(%d)\n", + rc); + goto err; + } + } + for (i = 0; i < afu->crs_num; i++) { cr = cxl_sysfs_afu_new_cr(afu, i); if (IS_ERR(cr)) { @@ -570,6 +623,9 @@ err1: cxl_sysfs_afu_remove(afu); return rc; err: + /* reset the eb_len as we havent created the bin attr */ + afu->eb_len = 0; + for (i--; i >= 0; i--) device_remove_file(&afu->dev, &afu_attrs[i]); return rc; diff --git a/kernel/drivers/misc/cxl/trace.h b/kernel/drivers/misc/cxl/trace.h index ae434d878..6e1e2adfb 100644 --- a/kernel/drivers/misc/cxl/trace.h +++ b/kernel/drivers/misc/cxl/trace.h @@ -105,7 +105,7 @@ TRACE_EVENT(cxl_attach, __entry->num_interrupts = num_interrupts; ), - TP_printk("afu%i.%i pid=%i pe=%i wed=0x%.16llx irqs=%i amr=0x%llx", + TP_printk("afu%i.%i pid=%i pe=%i wed=0x%016llx irqs=%i amr=0x%llx", __entry->card, __entry->afu, __entry->pid, @@ -177,7 +177,7 @@ TRACE_EVENT(cxl_psl_irq, __entry->dar = dar; ), - TP_printk("afu%i.%i pe=%i irq=%i dsisr=%s dar=0x%.16llx", + TP_printk("afu%i.%i pe=%i irq=%i dsisr=%s dar=0x%016llx", __entry->card, __entry->afu, __entry->pe, @@ -233,7 +233,7 @@ TRACE_EVENT(cxl_ste_miss, __entry->dar = dar; ), - TP_printk("afu%i.%i pe=%i dar=0x%.16llx", + TP_printk("afu%i.%i pe=%i dar=0x%016llx", __entry->card, __entry->afu, __entry->pe, @@ -264,7 +264,7 @@ TRACE_EVENT(cxl_ste_write, __entry->v = v; ), - TP_printk("afu%i.%i pe=%i SSTE[%i] E=0x%.16llx V=0x%.16llx", + TP_printk("afu%i.%i pe=%i SSTE[%i] E=0x%016llx V=0x%016llx", __entry->card, __entry->afu, __entry->pe, @@ -295,7 +295,7 @@ TRACE_EVENT(cxl_pte_miss, __entry->dar = dar; ), - TP_printk("afu%i.%i pe=%i dsisr=%s dar=0x%.16llx", + TP_printk("afu%i.%i pe=%i dsisr=%s dar=0x%016llx", __entry->card, __entry->afu, __entry->pe, diff --git a/kernel/drivers/misc/cxl/vphb.c b/kernel/drivers/misc/cxl/vphb.c new file mode 100644 index 000000000..cbd4331fb --- /dev/null +++ b/kernel/drivers/misc/cxl/vphb.c @@ -0,0 +1,313 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/pci.h> +#include <misc/cxl.h> +#include "cxl.h" + +static int cxl_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) +{ + if (dma_mask < DMA_BIT_MASK(64)) { + pr_info("%s only 64bit DMA supported on CXL", __func__); + return -EIO; + } + + *(pdev->dev.dma_mask) = dma_mask; + return 0; +} + +static int cxl_pci_probe_mode(struct pci_bus *bus) +{ + return PCI_PROBE_NORMAL; +} + +static int cxl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + return -ENODEV; +} + +static void cxl_teardown_msi_irqs(struct pci_dev *pdev) +{ + /* + * MSI should never be set but need still need to provide this call + * back. + */ +} + +static bool cxl_pci_enable_device_hook(struct pci_dev *dev) +{ + struct pci_controller *phb; + struct cxl_afu *afu; + struct cxl_context *ctx; + + phb = pci_bus_to_host(dev->bus); + afu = (struct cxl_afu *)phb->private_data; + + if (!cxl_adapter_link_ok(afu->adapter)) { + dev_warn(&dev->dev, "%s: Device link is down, refusing to enable AFU\n", __func__); + return false; + } + + set_dma_ops(&dev->dev, &dma_direct_ops); + set_dma_offset(&dev->dev, PAGE_OFFSET); + + /* + * Allocate a context to do cxl things too. If we eventually do real + * DMA ops, we'll need a default context to attach them to + */ + ctx = cxl_dev_context_init(dev); + if (!ctx) + return false; + dev->dev.archdata.cxl_ctx = ctx; + + return (cxl_afu_check_and_enable(afu) == 0); +} + +static void cxl_pci_disable_device(struct pci_dev *dev) +{ + struct cxl_context *ctx = cxl_get_context(dev); + + if (ctx) { + if (ctx->status == STARTED) { + dev_err(&dev->dev, "Default context started\n"); + return; + } + dev->dev.archdata.cxl_ctx = NULL; + cxl_release_context(ctx); + } +} + +static resource_size_t cxl_pci_window_alignment(struct pci_bus *bus, + unsigned long type) +{ + return 1; +} + +static void cxl_pci_reset_secondary_bus(struct pci_dev *dev) +{ + /* Should we do an AFU reset here ? */ +} + +static int cxl_pcie_cfg_record(u8 bus, u8 devfn) +{ + return (bus << 8) + devfn; +} + +static unsigned long cxl_pcie_cfg_addr(struct pci_controller* phb, + u8 bus, u8 devfn, int offset) +{ + int record = cxl_pcie_cfg_record(bus, devfn); + + return (unsigned long)phb->cfg_addr + ((unsigned long)phb->cfg_data * record) + offset; +} + + +static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, + int offset, int len, + volatile void __iomem **ioaddr, + u32 *mask, int *shift) +{ + struct pci_controller *phb; + struct cxl_afu *afu; + unsigned long addr; + + phb = pci_bus_to_host(bus); + if (phb == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + afu = (struct cxl_afu *)phb->private_data; + + if (cxl_pcie_cfg_record(bus->number, devfn) > afu->crs_num) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset >= (unsigned long)phb->cfg_data) + return PCIBIOS_BAD_REGISTER_NUMBER; + addr = cxl_pcie_cfg_addr(phb, bus->number, devfn, offset); + + *ioaddr = (void *)(addr & ~0x3ULL); + *shift = ((addr & 0x3) * 8); + switch (len) { + case 1: + *mask = 0xff; + break; + case 2: + *mask = 0xffff; + break; + default: + *mask = 0xffffffff; + break; + } + return 0; +} + + +static inline bool cxl_config_link_ok(struct pci_bus *bus) +{ + struct pci_controller *phb; + struct cxl_afu *afu; + + /* Config space IO is based on phb->cfg_addr, which is based on + * afu_desc_mmio. This isn't safe to read/write when the link + * goes down, as EEH tears down MMIO space. + * + * Check if the link is OK before proceeding. + */ + + phb = pci_bus_to_host(bus); + if (phb == NULL) + return false; + afu = (struct cxl_afu *)phb->private_data; + return cxl_adapter_link_ok(afu->adapter); +} + +static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + volatile void __iomem *ioaddr; + int shift, rc; + u32 mask; + + rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr, + &mask, &shift); + if (rc) + return rc; + + if (!cxl_config_link_ok(bus)) + return PCIBIOS_DEVICE_NOT_FOUND; + + /* Can only read 32 bits */ + *val = (in_le32(ioaddr) >> shift) & mask; + return PCIBIOS_SUCCESSFUL; +} + +static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + volatile void __iomem *ioaddr; + u32 v, mask; + int shift, rc; + + rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr, + &mask, &shift); + if (rc) + return rc; + + if (!cxl_config_link_ok(bus)) + return PCIBIOS_DEVICE_NOT_FOUND; + + /* Can only write 32 bits so do read-modify-write */ + mask <<= shift; + val <<= shift; + + v = (in_le32(ioaddr) & ~mask) | (val & mask); + + out_le32(ioaddr, v); + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops cxl_pcie_pci_ops = +{ + .read = cxl_pcie_read_config, + .write = cxl_pcie_write_config, +}; + + +static struct pci_controller_ops cxl_pci_controller_ops = +{ + .probe_mode = cxl_pci_probe_mode, + .enable_device_hook = cxl_pci_enable_device_hook, + .disable_device = cxl_pci_disable_device, + .release_device = cxl_pci_disable_device, + .window_alignment = cxl_pci_window_alignment, + .reset_secondary_bus = cxl_pci_reset_secondary_bus, + .setup_msi_irqs = cxl_setup_msi_irqs, + .teardown_msi_irqs = cxl_teardown_msi_irqs, + .dma_set_mask = cxl_dma_set_mask, +}; + +int cxl_pci_vphb_add(struct cxl_afu *afu) +{ + struct pci_dev *phys_dev; + struct pci_controller *phb, *phys_phb; + + phys_dev = to_pci_dev(afu->adapter->dev.parent); + phys_phb = pci_bus_to_host(phys_dev->bus); + + /* Alloc and setup PHB data structure */ + phb = pcibios_alloc_controller(phys_phb->dn); + + if (!phb) + return -ENODEV; + + /* Setup parent in sysfs */ + phb->parent = &phys_dev->dev; + + /* Setup the PHB using arch provided callback */ + phb->ops = &cxl_pcie_pci_ops; + phb->cfg_addr = afu->afu_desc_mmio + afu->crs_offset; + phb->cfg_data = (void *)(u64)afu->crs_len; + phb->private_data = afu; + phb->controller_ops = cxl_pci_controller_ops; + + /* Scan the bus */ + pcibios_scan_phb(phb); + if (phb->bus == NULL) + return -ENXIO; + + /* Claim resources. This might need some rework as well depending + * whether we are doing probe-only or not, like assigning unassigned + * resources etc... + */ + pcibios_claim_one_bus(phb->bus); + + /* Add probed PCI devices to the device model */ + pci_bus_add_devices(phb->bus); + + afu->phb = phb; + + return 0; +} + +void cxl_pci_vphb_reconfigure(struct cxl_afu *afu) +{ + /* When we are reconfigured, the AFU's MMIO space is unmapped + * and remapped. We need to reflect this in the PHB's view of + * the world. + */ + afu->phb->cfg_addr = afu->afu_desc_mmio + afu->crs_offset; +} + +void cxl_pci_vphb_remove(struct cxl_afu *afu) +{ + struct pci_controller *phb; + + /* If there is no configuration record we won't have one of these */ + if (!afu || !afu->phb) + return; + + phb = afu->phb; + afu->phb = NULL; + + pci_remove_root_bus(phb->bus); + pcibios_free_controller(phb); +} + +struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev) +{ + struct pci_controller *phb; + + phb = pci_bus_to_host(dev->bus); + + return (struct cxl_afu *)phb->private_data; +} +EXPORT_SYMBOL_GPL(cxl_pci_to_afu); + +unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev) +{ + return cxl_pcie_cfg_record(dev->bus->number, dev->devfn); +} +EXPORT_SYMBOL_GPL(cxl_pci_to_cfg_record); diff --git a/kernel/drivers/misc/ds1682.c b/kernel/drivers/misc/ds1682.c index b909fb302..c7112276a 100644 --- a/kernel/drivers/misc/ds1682.c +++ b/kernel/drivers/misc/ds1682.c @@ -148,12 +148,6 @@ static ssize_t ds1682_eeprom_read(struct file *filp, struct kobject *kobj, dev_dbg(&client->dev, "ds1682_eeprom_read(p=%p, off=%lli, c=%zi)\n", buf, off, count); - if (off >= DS1682_EEPROM_SIZE) - return 0; - - if (off + count > DS1682_EEPROM_SIZE) - count = DS1682_EEPROM_SIZE - off; - rc = i2c_smbus_read_i2c_block_data(client, DS1682_REG_EEPROM + off, count, buf); if (rc < 0) @@ -171,12 +165,6 @@ static ssize_t ds1682_eeprom_write(struct file *filp, struct kobject *kobj, dev_dbg(&client->dev, "ds1682_eeprom_write(p=%p, off=%lli, c=%zi)\n", buf, off, count); - if (off >= DS1682_EEPROM_SIZE) - return -ENOSPC; - - if (off + count > DS1682_EEPROM_SIZE) - count = DS1682_EEPROM_SIZE - off; - /* Write out to the device */ if (i2c_smbus_write_i2c_block_data(client, DS1682_REG_EEPROM + off, count, buf) < 0) diff --git a/kernel/drivers/misc/eeprom/Kconfig b/kernel/drivers/misc/eeprom/Kconfig index 9536852fd..04f2e1fa9 100644 --- a/kernel/drivers/misc/eeprom/Kconfig +++ b/kernel/drivers/misc/eeprom/Kconfig @@ -96,17 +96,4 @@ config EEPROM_DIGSY_MTC_CFG If unsure, say N. -config EEPROM_SUNXI_SID - tristate "Allwinner sunxi security ID support" - depends on ARCH_SUNXI && SYSFS - help - This is a driver for the 'security ID' available on various Allwinner - devices. - - Due to the potential risks involved with changing e-fuses, - this driver is read-only. - - This driver can also be built as a module. If so, the module - will be called sunxi_sid. - endmenu diff --git a/kernel/drivers/misc/eeprom/Makefile b/kernel/drivers/misc/eeprom/Makefile index 9507aec95..fc1e81d29 100644 --- a/kernel/drivers/misc/eeprom/Makefile +++ b/kernel/drivers/misc/eeprom/Makefile @@ -4,5 +4,4 @@ obj-$(CONFIG_EEPROM_LEGACY) += eeprom.o obj-$(CONFIG_EEPROM_MAX6875) += max6875.o obj-$(CONFIG_EEPROM_93CX6) += eeprom_93cx6.o obj-$(CONFIG_EEPROM_93XX46) += eeprom_93xx46.o -obj-$(CONFIG_EEPROM_SUNXI_SID) += sunxi_sid.o obj-$(CONFIG_EEPROM_DIGSY_MTC_CFG) += digsy_mtc_eeprom.o diff --git a/kernel/drivers/misc/eeprom/at24.c b/kernel/drivers/misc/eeprom/at24.c index 2d3db81be..5d7c0900f 100644 --- a/kernel/drivers/misc/eeprom/at24.c +++ b/kernel/drivers/misc/eeprom/at24.c @@ -21,6 +21,7 @@ #include <linux/bitops.h> #include <linux/jiffies.h> #include <linux/of.h> +#include <linux/acpi.h> #include <linux/i2c.h> #include <linux/platform_data/at24.h> @@ -131,6 +132,12 @@ static const struct i2c_device_id at24_ids[] = { }; MODULE_DEVICE_TABLE(i2c, at24_ids); +static const struct acpi_device_id at24_acpi_ids[] = { + { "INT3499", AT24_DEVICE_MAGIC(8192 / 8, 0) }, + { } +}; +MODULE_DEVICE_TABLE(acpi, at24_acpi_ids); + /*-------------------------------------------------------------------------*/ /* @@ -186,19 +193,11 @@ static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf, if (count > io_limit) count = io_limit; - switch (at24->use_smbus) { - case I2C_SMBUS_I2C_BLOCK_DATA: + if (at24->use_smbus) { /* Smaller eeproms can work given some SMBus extension calls */ if (count > I2C_SMBUS_BLOCK_MAX) count = I2C_SMBUS_BLOCK_MAX; - break; - case I2C_SMBUS_WORD_DATA: - count = 2; - break; - case I2C_SMBUS_BYTE_DATA: - count = 1; - break; - default: + } else { /* * When we have a better choice than SMBus calls, use a * combined I2C message. Write address; then read up to @@ -229,27 +228,10 @@ static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf, timeout = jiffies + msecs_to_jiffies(write_timeout); do { read_time = jiffies; - switch (at24->use_smbus) { - case I2C_SMBUS_I2C_BLOCK_DATA: - status = i2c_smbus_read_i2c_block_data(client, offset, - count, buf); - break; - case I2C_SMBUS_WORD_DATA: - status = i2c_smbus_read_word_data(client, offset); - if (status >= 0) { - buf[0] = status & 0xff; - buf[1] = status >> 8; - status = count; - } - break; - case I2C_SMBUS_BYTE_DATA: - status = i2c_smbus_read_byte_data(client, offset); - if (status >= 0) { - buf[0] = status; - status = count; - } - break; - default: + if (at24->use_smbus) { + status = i2c_smbus_read_i2c_block_data_or_emulated(client, offset, + count, buf); + } else { status = i2c_transfer(client->adapter, msg, 2); if (status == 2) status = count; @@ -438,9 +420,6 @@ static ssize_t at24_bin_write(struct file *filp, struct kobject *kobj, { struct at24_data *at24; - if (unlikely(off >= attr->size)) - return -EFBIG; - at24 = dev_get_drvdata(container_of(kobj, struct device, kobj)); return at24_write(at24, buf, off, count); } @@ -495,21 +474,29 @@ static void at24_get_ofdata(struct i2c_client *client, static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct at24_platform_data chip; + kernel_ulong_t magic = 0; bool writable; int use_smbus = 0; int use_smbus_write = 0; struct at24_data *at24; int err; unsigned i, num_addresses; - kernel_ulong_t magic; if (client->dev.platform_data) { chip = *(struct at24_platform_data *)client->dev.platform_data; } else { - if (!id->driver_data) + if (id) { + magic = id->driver_data; + } else { + const struct acpi_device_id *aid; + + aid = acpi_match_device(at24_acpi_ids, &client->dev); + if (aid) + magic = aid->driver_data; + } + if (!magic) return -ENODEV; - magic = id->driver_data; chip.byte_len = BIT(magic & AT24_BITMASK(AT24_SIZE_BYTELEN)); magic >>= AT24_SIZE_BYTELEN; chip.flags = magic & AT24_BITMASK(AT24_SIZE_FLAGS); @@ -689,7 +676,7 @@ static int at24_remove(struct i2c_client *client) static struct i2c_driver at24_driver = { .driver = { .name = "at24", - .owner = THIS_MODULE, + .acpi_match_table = ACPI_PTR(at24_acpi_ids), }, .probe = at24_probe, .remove = at24_remove, diff --git a/kernel/drivers/misc/eeprom/at25.c b/kernel/drivers/misc/eeprom/at25.c index 0a1af93ec..f850ef556 100644 --- a/kernel/drivers/misc/eeprom/at25.c +++ b/kernel/drivers/misc/eeprom/at25.c @@ -462,7 +462,6 @@ MODULE_DEVICE_TABLE(of, at25_of_match); static struct spi_driver at25_driver = { .driver = { .name = "at25", - .owner = THIS_MODULE, .of_match_table = at25_of_match, }, .probe = at25_probe, diff --git a/kernel/drivers/misc/eeprom/eeprom.c b/kernel/drivers/misc/eeprom/eeprom.c index b432873de..7342fd637 100644 --- a/kernel/drivers/misc/eeprom/eeprom.c +++ b/kernel/drivers/misc/eeprom/eeprom.c @@ -88,11 +88,6 @@ static ssize_t eeprom_read(struct file *filp, struct kobject *kobj, struct eeprom_data *data = i2c_get_clientdata(client); u8 slice; - if (off > EEPROM_SIZE) - return 0; - if (off + count > EEPROM_SIZE) - count = EEPROM_SIZE - off; - /* Only refresh slices which contain requested bytes */ for (slice = off >> 5; slice <= (off + count - 1) >> 5; slice++) eeprom_update_client(client, slice); diff --git a/kernel/drivers/misc/eeprom/eeprom_93xx46.c b/kernel/drivers/misc/eeprom/eeprom_93xx46.c index 9ebeacdb8..ff63f05ed 100644 --- a/kernel/drivers/misc/eeprom/eeprom_93xx46.c +++ b/kernel/drivers/misc/eeprom/eeprom_93xx46.c @@ -48,13 +48,6 @@ eeprom_93xx46_bin_read(struct file *filp, struct kobject *kobj, dev = container_of(kobj, struct device, kobj); edev = dev_get_drvdata(dev); - if (unlikely(off >= edev->bin.size)) - return 0; - if ((off + count) > edev->bin.size) - count = edev->bin.size - off; - if (unlikely(!count)) - return count; - cmd_addr = OP_READ << edev->addrlen; if (edev->addrlen == 7) { @@ -200,13 +193,6 @@ eeprom_93xx46_bin_write(struct file *filp, struct kobject *kobj, dev = container_of(kobj, struct device, kobj); edev = dev_get_drvdata(dev); - if (unlikely(off >= edev->bin.size)) - return -EFBIG; - if ((off + count) > edev->bin.size) - count = edev->bin.size - off; - if (unlikely(!count)) - return count; - /* only write even number of bytes on 16-bit devices */ if (edev->addrlen == 6) { step = 2; @@ -384,7 +370,6 @@ static int eeprom_93xx46_remove(struct spi_device *spi) static struct spi_driver eeprom_93xx46_driver = { .driver = { .name = "93xx46", - .owner = THIS_MODULE, }, .probe = eeprom_93xx46_probe, .remove = eeprom_93xx46_remove, diff --git a/kernel/drivers/misc/eeprom/max6875.c b/kernel/drivers/misc/eeprom/max6875.c index 580ff9df5..e4dd93b25 100644 --- a/kernel/drivers/misc/eeprom/max6875.c +++ b/kernel/drivers/misc/eeprom/max6875.c @@ -114,12 +114,6 @@ static ssize_t max6875_read(struct file *filp, struct kobject *kobj, struct max6875_data *data = i2c_get_clientdata(client); int slice, max_slice; - if (off > USER_EEPROM_SIZE) - return 0; - - if (off + count > USER_EEPROM_SIZE) - count = USER_EEPROM_SIZE - off; - /* refresh slices which contain requested bytes */ max_slice = (off + count - 1) >> SLICE_BITS; for (slice = (off >> SLICE_BITS); slice <= max_slice; slice++) @@ -197,6 +191,7 @@ static const struct i2c_device_id max6875_id[] = { { "max6875", 0 }, { } }; +MODULE_DEVICE_TABLE(i2c, max6875_id); static struct i2c_driver max6875_driver = { .driver = { diff --git a/kernel/drivers/misc/eeprom/sunxi_sid.c b/kernel/drivers/misc/eeprom/sunxi_sid.c deleted file mode 100644 index 8385177ff..000000000 --- a/kernel/drivers/misc/eeprom/sunxi_sid.c +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (c) 2013 Oliver Schinagl <oliver@schinagl.nl> - * http://www.linux-sunxi.org - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * This driver exposes the Allwinner security ID, efuses exported in byte- - * sized chunks. - */ - -#include <linux/compiler.h> -#include <linux/device.h> -#include <linux/err.h> -#include <linux/export.h> -#include <linux/fs.h> -#include <linux/io.h> -#include <linux/kernel.h> -#include <linux/kobject.h> -#include <linux/module.h> -#include <linux/of_device.h> -#include <linux/platform_device.h> -#include <linux/random.h> -#include <linux/slab.h> -#include <linux/stat.h> -#include <linux/sysfs.h> -#include <linux/types.h> - -#define DRV_NAME "sunxi-sid" - -struct sunxi_sid_data { - void __iomem *reg_base; - unsigned int keysize; -}; - -/* We read the entire key, due to a 32 bit read alignment requirement. Since we - * want to return the requested byte, this results in somewhat slower code and - * uses 4 times more reads as needed but keeps code simpler. Since the SID is - * only very rarely probed, this is not really an issue. - */ -static u8 sunxi_sid_read_byte(const struct sunxi_sid_data *sid_data, - const unsigned int offset) -{ - u32 sid_key; - - if (offset >= sid_data->keysize) - return 0; - - sid_key = ioread32be(sid_data->reg_base + round_down(offset, 4)); - sid_key >>= (offset % 4) * 8; - - return sid_key; /* Only return the last byte */ -} - -static ssize_t sid_read(struct file *fd, struct kobject *kobj, - struct bin_attribute *attr, char *buf, - loff_t pos, size_t size) -{ - struct platform_device *pdev; - struct sunxi_sid_data *sid_data; - int i; - - pdev = to_platform_device(kobj_to_dev(kobj)); - sid_data = platform_get_drvdata(pdev); - - if (pos < 0 || pos >= sid_data->keysize) - return 0; - if (size > sid_data->keysize - pos) - size = sid_data->keysize - pos; - - for (i = 0; i < size; i++) - buf[i] = sunxi_sid_read_byte(sid_data, pos + i); - - return i; -} - -static struct bin_attribute sid_bin_attr = { - .attr = { .name = "eeprom", .mode = S_IRUGO, }, - .read = sid_read, -}; - -static int sunxi_sid_remove(struct platform_device *pdev) -{ - device_remove_bin_file(&pdev->dev, &sid_bin_attr); - dev_dbg(&pdev->dev, "driver unloaded\n"); - - return 0; -} - -static const struct of_device_id sunxi_sid_of_match[] = { - { .compatible = "allwinner,sun4i-a10-sid", .data = (void *)16}, - { .compatible = "allwinner,sun7i-a20-sid", .data = (void *)512}, - {/* sentinel */}, -}; -MODULE_DEVICE_TABLE(of, sunxi_sid_of_match); - -static int sunxi_sid_probe(struct platform_device *pdev) -{ - struct sunxi_sid_data *sid_data; - struct resource *res; - const struct of_device_id *of_dev_id; - u8 *entropy; - unsigned int i; - - sid_data = devm_kzalloc(&pdev->dev, sizeof(struct sunxi_sid_data), - GFP_KERNEL); - if (!sid_data) - return -ENOMEM; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - sid_data->reg_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(sid_data->reg_base)) - return PTR_ERR(sid_data->reg_base); - - of_dev_id = of_match_device(sunxi_sid_of_match, &pdev->dev); - if (!of_dev_id) - return -ENODEV; - sid_data->keysize = (int)of_dev_id->data; - - platform_set_drvdata(pdev, sid_data); - - sid_bin_attr.size = sid_data->keysize; - if (device_create_bin_file(&pdev->dev, &sid_bin_attr)) - return -ENODEV; - - entropy = kzalloc(sizeof(u8) * sid_data->keysize, GFP_KERNEL); - for (i = 0; i < sid_data->keysize; i++) - entropy[i] = sunxi_sid_read_byte(sid_data, i); - add_device_randomness(entropy, sid_data->keysize); - kfree(entropy); - - dev_dbg(&pdev->dev, "loaded\n"); - - return 0; -} - -static struct platform_driver sunxi_sid_driver = { - .probe = sunxi_sid_probe, - .remove = sunxi_sid_remove, - .driver = { - .name = DRV_NAME, - .of_match_table = sunxi_sid_of_match, - }, -}; -module_platform_driver(sunxi_sid_driver); - -MODULE_AUTHOR("Oliver Schinagl <oliver@schinagl.nl>"); -MODULE_DESCRIPTION("Allwinner sunxi security id driver"); -MODULE_LICENSE("GPL"); diff --git a/kernel/drivers/misc/genwqe/card_base.h b/kernel/drivers/misc/genwqe/card_base.h index e73534498..cb851c14c 100644 --- a/kernel/drivers/misc/genwqe/card_base.h +++ b/kernel/drivers/misc/genwqe/card_base.h @@ -514,7 +514,7 @@ int __genwqe_execute_ddcb(struct genwqe_dev *cd, /** * __genwqe_execute_raw_ddcb() - Execute DDCB request without addr translation * - * This version will not do address translation or any modifcation of + * This version will not do address translation or any modification of * the DDCB data. It is used e.g. for the MoveFlash DDCB which is * entirely prepared by the driver itself. That means the appropriate * DMA addresses are already in the DDCB and do not need any diff --git a/kernel/drivers/misc/genwqe/card_ddcb.c b/kernel/drivers/misc/genwqe/card_ddcb.c index 6d51e5f08..353ee0cc7 100644 --- a/kernel/drivers/misc/genwqe/card_ddcb.c +++ b/kernel/drivers/misc/genwqe/card_ddcb.c @@ -203,7 +203,7 @@ struct genwqe_ddcb_cmd *ddcb_requ_alloc(void) { struct ddcb_requ *req; - req = kzalloc(sizeof(*req), GFP_ATOMIC); + req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return NULL; diff --git a/kernel/drivers/misc/genwqe/card_dev.c b/kernel/drivers/misc/genwqe/card_dev.c index c49d24426..7f1b282d7 100644 --- a/kernel/drivers/misc/genwqe/card_dev.c +++ b/kernel/drivers/misc/genwqe/card_dev.c @@ -418,7 +418,7 @@ static void genwqe_vma_close(struct vm_area_struct *vma) kfree(dma_map); } -static struct vm_operations_struct genwqe_vma_ops = { +static const struct vm_operations_struct genwqe_vma_ops = { .open = genwqe_vma_open, .close = genwqe_vma_close, }; @@ -449,7 +449,7 @@ static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma) if (get_order(vsize) > MAX_ORDER) return -ENOMEM; - dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC); + dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL); if (dma_map == NULL) return -ENOMEM; @@ -785,7 +785,7 @@ static int genwqe_pin_mem(struct genwqe_file *cfile, struct genwqe_mem *m) map_addr = (m->addr & PAGE_MASK); map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE); - dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC); + dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL); if (dma_map == NULL) return -ENOMEM; diff --git a/kernel/drivers/misc/genwqe/card_utils.c b/kernel/drivers/misc/genwqe/card_utils.c index 1ca94e6fa..222367cc8 100644 --- a/kernel/drivers/misc/genwqe/card_utils.c +++ b/kernel/drivers/misc/genwqe/card_utils.c @@ -220,7 +220,8 @@ void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size, if (get_order(size) > MAX_ORDER) return NULL; - return pci_alloc_consistent(cd->pci_dev, size, dma_handle); + return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle, + GFP_KERNEL); } void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size, @@ -229,7 +230,7 @@ void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size, if (vaddr == NULL) return; - pci_free_consistent(cd->pci_dev, size, vaddr, dma_handle); + dma_free_coherent(&cd->pci_dev->dev, size, vaddr, dma_handle); } static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list, diff --git a/kernel/drivers/misc/hpilo.c b/kernel/drivers/misc/hpilo.c index b83e3ca12..d6a901cd4 100644 --- a/kernel/drivers/misc/hpilo.c +++ b/kernel/drivers/misc/hpilo.c @@ -2,7 +2,7 @@ * Driver for the HP iLO management processor. * * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. - * David Altobelli <david.altobelli@hp.com> + * David Altobelli <david.altobelli@hpe.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -902,11 +902,11 @@ static void __exit ilo_exit(void) MODULE_VERSION("1.4.1"); MODULE_ALIAS(ILO_NAME); MODULE_DESCRIPTION(ILO_NAME); -MODULE_AUTHOR("David Altobelli <david.altobelli@hp.com>"); +MODULE_AUTHOR("David Altobelli <david.altobelli@hpe.com>"); MODULE_LICENSE("GPL v2"); module_param(max_ccb, uint, 0444); -MODULE_PARM_DESC(max_ccb, "Maximum number of HP iLO channels to attach (16)"); +MODULE_PARM_DESC(max_ccb, "Maximum number of HP iLO channels to attach (8-24)(default=16)"); module_init(ilo_init); module_exit(ilo_exit); diff --git a/kernel/drivers/misc/hwlat_detector.c b/kernel/drivers/misc/hwlat_detector.c index 2429c4331..52f5ad5fd 100644 --- a/kernel/drivers/misc/hwlat_detector.c +++ b/kernel/drivers/misc/hwlat_detector.c @@ -616,7 +616,7 @@ static ssize_t debug_enable_fwrite(struct file *filp, buf[sizeof(buf)-1] = '\0'; /* just in case */ err = kstrtoul(buf, 10, &val); - if (0 != err) + if (err) return -EINVAL; if (val) { @@ -921,7 +921,7 @@ static ssize_t debug_width_fwrite(struct file *filp, buf[U64STR_SIZE-1] = '\0'; /* just in case */ err = kstrtoull(buf, 10, &val); - if (0 != err) + if (err) return -EINVAL; mutex_lock(&data.lock); @@ -1005,7 +1005,7 @@ static ssize_t debug_window_fwrite(struct file *filp, buf[U64STR_SIZE-1] = '\0'; /* just in case */ err = kstrtoull(buf, 10, &val); - if (0 != err) + if (err) return -EINVAL; mutex_lock(&data.lock); @@ -1198,11 +1198,11 @@ static int detector_init(void) pr_info(BANNER "version %s\n", VERSION); ret = init_stats(); - if (0 != ret) + if (ret) goto out; ret = init_debugfs(); - if (0 != ret) + if (ret) goto err_stats; if (enabled) diff --git a/kernel/drivers/misc/isl29003.c b/kernel/drivers/misc/isl29003.c index 12c30b486..976df0013 100644 --- a/kernel/drivers/misc/isl29003.c +++ b/kernel/drivers/misc/isl29003.c @@ -465,7 +465,6 @@ MODULE_DEVICE_TABLE(i2c, isl29003_id); static struct i2c_driver isl29003_driver = { .driver = { .name = ISL29003_DRV_NAME, - .owner = THIS_MODULE, .pm = ISL29003_PM_OPS, }, .probe = isl29003_probe, diff --git a/kernel/drivers/misc/kgdbts.c b/kernel/drivers/misc/kgdbts.c index 36f5d5277..99635dd9d 100644 --- a/kernel/drivers/misc/kgdbts.c +++ b/kernel/drivers/misc/kgdbts.c @@ -220,7 +220,7 @@ static unsigned long lookup_addr(char *arg) else if (!strcmp(arg, "sys_open")) addr = (unsigned long)do_sys_open; else if (!strcmp(arg, "do_fork")) - addr = (unsigned long)do_fork; + addr = (unsigned long)_do_fork; else if (!strcmp(arg, "hw_break_val")) addr = (unsigned long)&hw_break_val; addr = (unsigned long) dereference_function_descriptor((void *)addr); @@ -1112,6 +1112,7 @@ static int __init init_kgdbts(void) return configure_kgdbts(); } +device_initcall(init_kgdbts); static int kgdbts_get_char(void) { @@ -1180,10 +1181,9 @@ static struct kgdb_io kgdbts_io_ops = { .post_exception = kgdbts_post_exp_handler, }; -module_init(init_kgdbts); +/* + * not really modular, but the easiest way to keep compat with existing + * bootargs behaviour is to continue using module_param here. + */ module_param_call(kgdbts, param_set_kgdbts_var, param_get_string, &kps, 0644); MODULE_PARM_DESC(kgdbts, "<A|V1|V2>[F#|S#][N#]"); -MODULE_DESCRIPTION("KGDB Test Suite"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Wind River Systems, Inc."); - diff --git a/kernel/drivers/misc/lattice-ecp3-config.c b/kernel/drivers/misc/lattice-ecp3-config.c index c544f1f50..626fdcaf2 100644 --- a/kernel/drivers/misc/lattice-ecp3-config.c +++ b/kernel/drivers/misc/lattice-ecp3-config.c @@ -235,7 +235,6 @@ MODULE_DEVICE_TABLE(spi, lattice_ecp3_id); static struct spi_driver lattice_ecp3_driver = { .driver = { .name = "lattice-ecp3", - .owner = THIS_MODULE, }, .probe = lattice_ecp3_probe, .remove = lattice_ecp3_remove, diff --git a/kernel/drivers/misc/lis3lv02d/lis3lv02d.c b/kernel/drivers/misc/lis3lv02d/lis3lv02d.c index 4739689d2..fb8705fc3 100644 --- a/kernel/drivers/misc/lis3lv02d/lis3lv02d.c +++ b/kernel/drivers/misc/lis3lv02d/lis3lv02d.c @@ -115,7 +115,7 @@ static int param_set_axis(const char *val, const struct kernel_param *kp) return ret; } -static struct kernel_param_ops param_ops_axis = { +static const struct kernel_param_ops param_ops_axis = { .set = param_set_axis, .get = param_get_int, }; diff --git a/kernel/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/kernel/drivers/misc/lis3lv02d/lis3lv02d_i2c.c index e3e7f1dc2..0c3bb7e3e 100644 --- a/kernel/drivers/misc/lis3lv02d/lis3lv02d_i2c.c +++ b/kernel/drivers/misc/lis3lv02d/lis3lv02d_i2c.c @@ -274,7 +274,6 @@ static const struct dev_pm_ops lis3_pm_ops = { static struct i2c_driver lis3lv02d_i2c_driver = { .driver = { .name = DRV_NAME, - .owner = THIS_MODULE, .pm = &lis3_pm_ops, .of_match_table = of_match_ptr(lis3lv02d_i2c_dt_ids), }, diff --git a/kernel/drivers/misc/lis3lv02d/lis3lv02d_spi.c b/kernel/drivers/misc/lis3lv02d/lis3lv02d_spi.c index b2f6e1651..e57547512 100644 --- a/kernel/drivers/misc/lis3lv02d/lis3lv02d_spi.c +++ b/kernel/drivers/misc/lis3lv02d/lis3lv02d_spi.c @@ -138,7 +138,6 @@ static SIMPLE_DEV_PM_OPS(lis3lv02d_spi_pm, lis3lv02d_spi_suspend, static struct spi_driver lis302dl_spi_driver = { .driver = { .name = DRV_NAME, - .owner = THIS_MODULE, .pm = &lis3lv02d_spi_pm, .of_match_table = of_match_ptr(lis302dl_spi_dt_ids), }, diff --git a/kernel/drivers/misc/lkdtm.c b/kernel/drivers/misc/lkdtm.c index b5abe3412..11fdadc68 100644 --- a/kernel/drivers/misc/lkdtm.c +++ b/kernel/drivers/misc/lkdtm.c @@ -472,7 +472,7 @@ static void lkdtm_do_action(enum ctype which) break; } case CT_ACCESS_USERSPACE: { - unsigned long user_addr, tmp; + unsigned long user_addr, tmp = 0; unsigned long *ptr; user_addr = vm_mmap(NULL, 0, PAGE_SIZE, @@ -483,6 +483,12 @@ static void lkdtm_do_action(enum ctype which) return; } + if (copy_to_user((void __user *)user_addr, &tmp, sizeof(tmp))) { + pr_warn("copy_to_user failed\n"); + vm_munmap(user_addr, PAGE_SIZE); + return; + } + ptr = (unsigned long *)user_addr; pr_info("attempting bad read at %p\n", ptr); diff --git a/kernel/drivers/misc/mei/Makefile b/kernel/drivers/misc/mei/Makefile index 518914a82..01447ca21 100644 --- a/kernel/drivers/misc/mei/Makefile +++ b/kernel/drivers/misc/mei/Makefile @@ -11,7 +11,7 @@ mei-objs += main.o mei-objs += amthif.o mei-objs += wd.o mei-objs += bus.o -mei-objs += nfc.o +mei-objs += bus-fixup.o mei-$(CONFIG_DEBUG_FS) += debugfs.o obj-$(CONFIG_INTEL_MEI_ME) += mei-me.o diff --git a/kernel/drivers/misc/mei/amthif.c b/kernel/drivers/misc/mei/amthif.c index d2cd53e3f..cd0403f09 100644 --- a/kernel/drivers/misc/mei/amthif.c +++ b/kernel/drivers/misc/mei/amthif.c @@ -59,46 +59,29 @@ void mei_amthif_reset_params(struct mei_device *dev) * mei_amthif_host_init - mei initialization amthif client. * * @dev: the device structure + * @me_cl: me client * * Return: 0 on success, <0 on failure. */ -int mei_amthif_host_init(struct mei_device *dev) +int mei_amthif_host_init(struct mei_device *dev, struct mei_me_client *me_cl) { struct mei_cl *cl = &dev->iamthif_cl; - struct mei_me_client *me_cl; int ret; dev->iamthif_state = MEI_IAMTHIF_IDLE; mei_cl_init(cl, dev); - me_cl = mei_me_cl_by_uuid(dev, &mei_amthif_guid); - if (!me_cl) { - dev_info(dev->dev, "amthif: failed to find the client"); - return -ENOTTY; - } - - cl->me_client_id = me_cl->client_id; - cl->cl_uuid = me_cl->props.protocol_name; - - /* Assign iamthif_mtu to the value received from ME */ - - dev->iamthif_mtu = me_cl->props.max_msg_length; - dev_dbg(dev->dev, "IAMTHIF_MTU = %d\n", dev->iamthif_mtu); - - ret = mei_cl_link(cl, MEI_IAMTHIF_HOST_CLIENT_ID); if (ret < 0) { dev_err(dev->dev, "amthif: failed cl_link %d\n", ret); - goto out; + return ret; } - ret = mei_cl_connect(cl, NULL); + ret = mei_cl_connect(cl, me_cl, NULL); dev->iamthif_state = MEI_IAMTHIF_IDLE; -out: - mei_me_cl_put(me_cl); return ret; } @@ -250,7 +233,6 @@ static int mei_amthif_read_start(struct mei_cl *cl, struct file *file) { struct mei_device *dev = cl->dev; struct mei_cl_cb *cb; - size_t length = dev->iamthif_mtu; int rets; cb = mei_io_cb_init(cl, MEI_FOP_READ, file); @@ -259,7 +241,7 @@ static int mei_amthif_read_start(struct mei_cl *cl, struct file *file) goto err; } - rets = mei_io_cb_alloc_buf(cb, length); + rets = mei_io_cb_alloc_buf(cb, mei_cl_mtu(cl)); if (rets) goto err; @@ -476,7 +458,7 @@ void mei_amthif_complete(struct mei_device *dev, struct mei_cl_cb *cb) return; } - if (dev->iamthif_canceled != 1) { + if (!dev->iamthif_canceled) { dev->iamthif_state = MEI_IAMTHIF_READ_COMPLETE; dev->iamthif_stall_timer = 0; list_add_tail(&cb->list, &dev->amthif_rd_complete_list.list); diff --git a/kernel/drivers/misc/mei/bus-fixup.c b/kernel/drivers/misc/mei/bus-fixup.c new file mode 100644 index 000000000..020de5919 --- /dev/null +++ b/kernel/drivers/misc/mei/bus-fixup.c @@ -0,0 +1,306 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2013, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/uuid.h> + +#include <linux/mei_cl_bus.h> + +#include "mei_dev.h" +#include "client.h" + +#define MEI_UUID_NFC_INFO UUID_LE(0xd2de1625, 0x382d, 0x417d, \ + 0x48, 0xa4, 0xef, 0xab, 0xba, 0x8a, 0x12, 0x06) + +static const uuid_le mei_nfc_info_guid = MEI_UUID_NFC_INFO; + +#define MEI_UUID_NFC_HCI UUID_LE(0x0bb17a78, 0x2a8e, 0x4c50, \ + 0x94, 0xd4, 0x50, 0x26, 0x67, 0x23, 0x77, 0x5c) + +#define MEI_UUID_ANY NULL_UUID_LE + +/** + * number_of_connections - determine whether an client be on the bus + * according number of connections + * We support only clients: + * 1. with single connection + * 2. and fixed clients (max_number_of_connections == 0) + * + * @cldev: me clients device + */ +static void number_of_connections(struct mei_cl_device *cldev) +{ + dev_dbg(&cldev->dev, "running hook %s on %pUl\n", + __func__, mei_me_cl_uuid(cldev->me_cl)); + + if (cldev->me_cl->props.max_number_of_connections > 1) + cldev->do_match = 0; +} + +/** + * blacklist - blacklist a client from the bus + * + * @cldev: me clients device + */ +static void blacklist(struct mei_cl_device *cldev) +{ + dev_dbg(&cldev->dev, "running hook %s on %pUl\n", + __func__, mei_me_cl_uuid(cldev->me_cl)); + cldev->do_match = 0; +} + +struct mei_nfc_cmd { + u8 command; + u8 status; + u16 req_id; + u32 reserved; + u16 data_size; + u8 sub_command; + u8 data[]; +} __packed; + +struct mei_nfc_reply { + u8 command; + u8 status; + u16 req_id; + u32 reserved; + u16 data_size; + u8 sub_command; + u8 reply_status; + u8 data[]; +} __packed; + +struct mei_nfc_if_version { + u8 radio_version_sw[3]; + u8 reserved[3]; + u8 radio_version_hw[3]; + u8 i2c_addr; + u8 fw_ivn; + u8 vendor_id; + u8 radio_type; +} __packed; + + +#define MEI_NFC_CMD_MAINTENANCE 0x00 +#define MEI_NFC_SUBCMD_IF_VERSION 0x01 + +/* Vendors */ +#define MEI_NFC_VENDOR_INSIDE 0x00 +#define MEI_NFC_VENDOR_NXP 0x01 + +/* Radio types */ +#define MEI_NFC_VENDOR_INSIDE_UREAD 0x00 +#define MEI_NFC_VENDOR_NXP_PN544 0x01 + +/** + * mei_nfc_if_version - get NFC interface version + * + * @cl: host client (nfc info) + * @ver: NFC interface version to be filled in + * + * Return: 0 on success; < 0 otherwise + */ +static int mei_nfc_if_version(struct mei_cl *cl, + struct mei_nfc_if_version *ver) +{ + struct mei_device *bus; + struct mei_nfc_cmd cmd = { + .command = MEI_NFC_CMD_MAINTENANCE, + .data_size = 1, + .sub_command = MEI_NFC_SUBCMD_IF_VERSION, + }; + struct mei_nfc_reply *reply = NULL; + size_t if_version_length; + int bytes_recv, ret; + + bus = cl->dev; + + WARN_ON(mutex_is_locked(&bus->device_lock)); + + ret = __mei_cl_send(cl, (u8 *)&cmd, sizeof(struct mei_nfc_cmd), 1); + if (ret < 0) { + dev_err(bus->dev, "Could not send IF version cmd\n"); + return ret; + } + + /* to be sure on the stack we alloc memory */ + if_version_length = sizeof(struct mei_nfc_reply) + + sizeof(struct mei_nfc_if_version); + + reply = kzalloc(if_version_length, GFP_KERNEL); + if (!reply) + return -ENOMEM; + + ret = 0; + bytes_recv = __mei_cl_recv(cl, (u8 *)reply, if_version_length); + if (bytes_recv < 0 || bytes_recv < sizeof(struct mei_nfc_reply)) { + dev_err(bus->dev, "Could not read IF version\n"); + ret = -EIO; + goto err; + } + + memcpy(ver, reply->data, sizeof(struct mei_nfc_if_version)); + + dev_info(bus->dev, "NFC MEI VERSION: IVN 0x%x Vendor ID 0x%x Type 0x%x\n", + ver->fw_ivn, ver->vendor_id, ver->radio_type); + +err: + kfree(reply); + return ret; +} + +/** + * mei_nfc_radio_name - derive nfc radio name from the interface version + * + * @ver: NFC radio version + * + * Return: radio name string + */ +static const char *mei_nfc_radio_name(struct mei_nfc_if_version *ver) +{ + + if (ver->vendor_id == MEI_NFC_VENDOR_INSIDE) { + if (ver->radio_type == MEI_NFC_VENDOR_INSIDE_UREAD) + return "microread"; + } + + if (ver->vendor_id == MEI_NFC_VENDOR_NXP) { + if (ver->radio_type == MEI_NFC_VENDOR_NXP_PN544) + return "pn544"; + } + + return NULL; +} + +/** + * mei_nfc - The nfc fixup function. The function retrieves nfc radio + * name and set is as device attribute so we can load + * the proper device driver for it + * + * @cldev: me client device (nfc) + */ +static void mei_nfc(struct mei_cl_device *cldev) +{ + struct mei_device *bus; + struct mei_cl *cl; + struct mei_me_client *me_cl = NULL; + struct mei_nfc_if_version ver; + const char *radio_name = NULL; + int ret; + + bus = cldev->bus; + + dev_dbg(bus->dev, "running hook %s: %pUl match=%d\n", + __func__, mei_me_cl_uuid(cldev->me_cl), cldev->do_match); + + mutex_lock(&bus->device_lock); + /* we need to connect to INFO GUID */ + cl = mei_cl_alloc_linked(bus, MEI_HOST_CLIENT_ID_ANY); + if (IS_ERR(cl)) { + ret = PTR_ERR(cl); + cl = NULL; + dev_err(bus->dev, "nfc hook alloc failed %d\n", ret); + goto out; + } + + me_cl = mei_me_cl_by_uuid(bus, &mei_nfc_info_guid); + if (!me_cl) { + ret = -ENOTTY; + dev_err(bus->dev, "Cannot find nfc info %d\n", ret); + goto out; + } + + ret = mei_cl_connect(cl, me_cl, NULL); + if (ret < 0) { + dev_err(&cldev->dev, "Can't connect to the NFC INFO ME ret = %d\n", + ret); + goto out; + } + + mutex_unlock(&bus->device_lock); + + ret = mei_nfc_if_version(cl, &ver); + if (ret) + goto disconnect; + + radio_name = mei_nfc_radio_name(&ver); + + if (!radio_name) { + ret = -ENOENT; + dev_err(&cldev->dev, "Can't get the NFC interface version ret = %d\n", + ret); + goto disconnect; + } + + dev_dbg(bus->dev, "nfc radio %s\n", radio_name); + strlcpy(cldev->name, radio_name, sizeof(cldev->name)); + +disconnect: + mutex_lock(&bus->device_lock); + if (mei_cl_disconnect(cl) < 0) + dev_err(bus->dev, "Can't disconnect the NFC INFO ME\n"); + + mei_cl_flush_queues(cl, NULL); + +out: + mei_cl_unlink(cl); + mutex_unlock(&bus->device_lock); + mei_me_cl_put(me_cl); + kfree(cl); + + if (ret) + cldev->do_match = 0; + + dev_dbg(bus->dev, "end of fixup match = %d\n", cldev->do_match); +} + +#define MEI_FIXUP(_uuid, _hook) { _uuid, _hook } + +static struct mei_fixup { + + const uuid_le uuid; + void (*hook)(struct mei_cl_device *cldev); +} mei_fixups[] = { + MEI_FIXUP(MEI_UUID_ANY, number_of_connections), + MEI_FIXUP(MEI_UUID_NFC_INFO, blacklist), + MEI_FIXUP(MEI_UUID_NFC_HCI, mei_nfc), +}; + +/** + * mei_cldev_fixup - run fixup handlers + * + * @cldev: me client device + */ +void mei_cl_bus_dev_fixup(struct mei_cl_device *cldev) +{ + struct mei_fixup *f; + const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl); + int i; + + for (i = 0; i < ARRAY_SIZE(mei_fixups); i++) { + + f = &mei_fixups[i]; + if (uuid_le_cmp(f->uuid, MEI_UUID_ANY) == 0 || + uuid_le_cmp(f->uuid, *uuid) == 0) + f->hook(cldev); + } +} + diff --git a/kernel/drivers/misc/mei/bus.c b/kernel/drivers/misc/mei/bus.c index 4cf38c398..0b05aa938 100644 --- a/kernel/drivers/misc/mei/bus.c +++ b/kernel/drivers/misc/mei/bus.c @@ -30,227 +30,41 @@ #define to_mei_cl_driver(d) container_of(d, struct mei_cl_driver, driver) #define to_mei_cl_device(d) container_of(d, struct mei_cl_device, dev) -static int mei_cl_device_match(struct device *dev, struct device_driver *drv) -{ - struct mei_cl_device *device = to_mei_cl_device(dev); - struct mei_cl_driver *driver = to_mei_cl_driver(drv); - const struct mei_cl_device_id *id; - - if (!device) - return 0; - - if (!driver || !driver->id_table) - return 0; - - id = driver->id_table; - - while (id->name[0]) { - if (!strncmp(dev_name(dev), id->name, sizeof(id->name))) - return 1; - - id++; - } - - return 0; -} - -static int mei_cl_device_probe(struct device *dev) -{ - struct mei_cl_device *device = to_mei_cl_device(dev); - struct mei_cl_driver *driver; - struct mei_cl_device_id id; - - if (!device) - return 0; - - driver = to_mei_cl_driver(dev->driver); - if (!driver || !driver->probe) - return -ENODEV; - - dev_dbg(dev, "Device probe\n"); - - strlcpy(id.name, dev_name(dev), sizeof(id.name)); - - return driver->probe(device, &id); -} - -static int mei_cl_device_remove(struct device *dev) -{ - struct mei_cl_device *device = to_mei_cl_device(dev); - struct mei_cl_driver *driver; - - if (!device || !dev->driver) - return 0; - - if (device->event_cb) { - device->event_cb = NULL; - cancel_work_sync(&device->event_work); - } - - driver = to_mei_cl_driver(dev->driver); - if (!driver->remove) { - dev->driver = NULL; - - return 0; - } - - return driver->remove(device); -} - -static ssize_t modalias_show(struct device *dev, struct device_attribute *a, - char *buf) -{ - int len; - - len = snprintf(buf, PAGE_SIZE, "mei:%s\n", dev_name(dev)); - - return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len; -} -static DEVICE_ATTR_RO(modalias); - -static struct attribute *mei_cl_dev_attrs[] = { - &dev_attr_modalias.attr, - NULL, -}; -ATTRIBUTE_GROUPS(mei_cl_dev); - -static int mei_cl_uevent(struct device *dev, struct kobj_uevent_env *env) -{ - if (add_uevent_var(env, "MODALIAS=mei:%s", dev_name(dev))) - return -ENOMEM; - - return 0; -} - -static struct bus_type mei_cl_bus_type = { - .name = "mei", - .dev_groups = mei_cl_dev_groups, - .match = mei_cl_device_match, - .probe = mei_cl_device_probe, - .remove = mei_cl_device_remove, - .uevent = mei_cl_uevent, -}; - -static void mei_cl_dev_release(struct device *dev) -{ - kfree(to_mei_cl_device(dev)); -} - -static struct device_type mei_cl_device_type = { - .release = mei_cl_dev_release, -}; - -struct mei_cl *mei_cl_bus_find_cl_by_uuid(struct mei_device *dev, - uuid_le uuid) -{ - struct mei_cl *cl; - - list_for_each_entry(cl, &dev->device_list, device_link) { - if (!uuid_le_cmp(uuid, cl->cl_uuid)) - return cl; - } - - return NULL; -} -struct mei_cl_device *mei_cl_add_device(struct mei_device *dev, - uuid_le uuid, char *name, - struct mei_cl_ops *ops) -{ - struct mei_cl_device *device; - struct mei_cl *cl; - int status; - - cl = mei_cl_bus_find_cl_by_uuid(dev, uuid); - if (cl == NULL) - return NULL; - - device = kzalloc(sizeof(struct mei_cl_device), GFP_KERNEL); - if (!device) - return NULL; - - device->cl = cl; - device->ops = ops; - - device->dev.parent = dev->dev; - device->dev.bus = &mei_cl_bus_type; - device->dev.type = &mei_cl_device_type; - - dev_set_name(&device->dev, "%s", name); - - status = device_register(&device->dev); - if (status) { - dev_err(dev->dev, "Failed to register MEI device\n"); - kfree(device); - return NULL; - } - - cl->device = device; - - dev_dbg(&device->dev, "client %s registered\n", name); - - return device; -} -EXPORT_SYMBOL_GPL(mei_cl_add_device); - -void mei_cl_remove_device(struct mei_cl_device *device) -{ - device_unregister(&device->dev); -} -EXPORT_SYMBOL_GPL(mei_cl_remove_device); - -int __mei_cl_driver_register(struct mei_cl_driver *driver, struct module *owner) -{ - int err; - - driver->driver.name = driver->name; - driver->driver.owner = owner; - driver->driver.bus = &mei_cl_bus_type; - - err = driver_register(&driver->driver); - if (err) - return err; - - pr_debug("mei: driver [%s] registered\n", driver->driver.name); - - return 0; -} -EXPORT_SYMBOL_GPL(__mei_cl_driver_register); - -void mei_cl_driver_unregister(struct mei_cl_driver *driver) -{ - driver_unregister(&driver->driver); - - pr_debug("mei: driver [%s] unregistered\n", driver->driver.name); -} -EXPORT_SYMBOL_GPL(mei_cl_driver_unregister); - -static ssize_t ___mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, +/** + * __mei_cl_send - internal client send (write) + * + * @cl: host client + * @buf: buffer to send + * @length: buffer length + * @blocking: wait for write completion + * + * Return: written size bytes or < 0 on error + */ +ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, bool blocking) { - struct mei_device *dev; - struct mei_me_client *me_cl = NULL; + struct mei_device *bus; struct mei_cl_cb *cb = NULL; ssize_t rets; if (WARN_ON(!cl || !cl->dev)) return -ENODEV; - dev = cl->dev; + bus = cl->dev; - mutex_lock(&dev->device_lock); + mutex_lock(&bus->device_lock); if (!mei_cl_is_connected(cl)) { rets = -ENODEV; goto out; } /* Check if we have an ME client device */ - me_cl = mei_me_cl_by_uuid_id(dev, &cl->cl_uuid, cl->me_client_id); - if (!me_cl) { + if (!mei_me_cl_is_active(cl->me_cl)) { rets = -ENOTTY; goto out; } - if (length > me_cl->props.max_msg_length) { + if (length > mei_cl_mtu(cl)) { rets = -EFBIG; goto out; } @@ -266,17 +80,25 @@ static ssize_t ___mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, rets = mei_cl_write(cl, cb, blocking); out: - mei_me_cl_put(me_cl); - mutex_unlock(&dev->device_lock); + mutex_unlock(&bus->device_lock); if (rets < 0) mei_io_cb_free(cb); return rets; } +/** + * __mei_cl_recv - internal client receive (read) + * + * @cl: host client + * @buf: buffer to receive + * @length: buffer length + * + * Return: read size in bytes of < 0 on error + */ ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length) { - struct mei_device *dev; + struct mei_device *bus; struct mei_cl_cb *cb; size_t r_length; ssize_t rets; @@ -284,9 +106,9 @@ ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length) if (WARN_ON(!cl || !cl->dev)) return -ENODEV; - dev = cl->dev; + bus = cl->dev; - mutex_lock(&dev->device_lock); + mutex_lock(&bus->device_lock); cb = mei_cl_read_cb(cl, NULL); if (cb) @@ -296,9 +118,10 @@ ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length) if (rets && rets != -EBUSY) goto out; + /* wait on event only if there is no other waiter */ if (list_empty(&cl->rd_completed) && !waitqueue_active(&cl->rx_wait)) { - mutex_unlock(&dev->device_lock); + mutex_unlock(&bus->device_lock); if (wait_event_interruptible(cl->rx_wait, (!list_empty(&cl->rd_completed)) || @@ -309,7 +132,7 @@ ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length) return -ERESTARTSYS; } - mutex_lock(&dev->device_lock); + mutex_lock(&bus->device_lock); if (!mei_cl_is_connected(cl)) { rets = -EBUSY; @@ -336,196 +159,833 @@ copy: free: mei_io_cb_free(cb); out: - mutex_unlock(&dev->device_lock); + mutex_unlock(&bus->device_lock); return rets; } -inline ssize_t __mei_cl_async_send(struct mei_cl *cl, u8 *buf, size_t length) +/** + * mei_cldev_send - me device send (write) + * + * @cldev: me client device + * @buf: buffer to send + * @length: buffer length + * + * Return: written size in bytes or < 0 on error + */ +ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length) { - return ___mei_cl_send(cl, buf, length, 0); -} + struct mei_cl *cl = cldev->cl; -inline ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length) -{ - return ___mei_cl_send(cl, buf, length, 1); + if (cl == NULL) + return -ENODEV; + + return __mei_cl_send(cl, buf, length, 1); } +EXPORT_SYMBOL_GPL(mei_cldev_send); -ssize_t mei_cl_send(struct mei_cl_device *device, u8 *buf, size_t length) +/** + * mei_cldev_recv - client receive (read) + * + * @cldev: me client device + * @buf: buffer to receive + * @length: buffer length + * + * Return: read size in bytes of < 0 on error + */ +ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length) { - struct mei_cl *cl = device->cl; + struct mei_cl *cl = cldev->cl; if (cl == NULL) return -ENODEV; - if (device->ops && device->ops->send) - return device->ops->send(device, buf, length); + return __mei_cl_recv(cl, buf, length); +} +EXPORT_SYMBOL_GPL(mei_cldev_recv); - return __mei_cl_send(cl, buf, length); +/** + * mei_cl_bus_event_work - dispatch rx event for a bus device + * and schedule new work + * + * @work: work + */ +static void mei_cl_bus_event_work(struct work_struct *work) +{ + struct mei_cl_device *cldev; + + cldev = container_of(work, struct mei_cl_device, event_work); + + if (cldev->event_cb) + cldev->event_cb(cldev, cldev->events, cldev->event_context); + + cldev->events = 0; + + /* Prepare for the next read */ + if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) + mei_cl_read_start(cldev->cl, 0, NULL); } -EXPORT_SYMBOL_GPL(mei_cl_send); -ssize_t mei_cl_recv(struct mei_cl_device *device, u8 *buf, size_t length) +/** + * mei_cl_bus_notify_event - schedule notify cb on bus client + * + * @cl: host client + */ +void mei_cl_bus_notify_event(struct mei_cl *cl) { - struct mei_cl *cl = device->cl; + struct mei_cl_device *cldev = cl->cldev; - if (cl == NULL) - return -ENODEV; + if (!cldev || !cldev->event_cb) + return; - if (device->ops && device->ops->recv) - return device->ops->recv(device, buf, length); + if (!(cldev->events_mask & BIT(MEI_CL_EVENT_NOTIF))) + return; - return __mei_cl_recv(cl, buf, length); + if (!cl->notify_ev) + return; + + set_bit(MEI_CL_EVENT_NOTIF, &cldev->events); + + schedule_work(&cldev->event_work); + + cl->notify_ev = false; } -EXPORT_SYMBOL_GPL(mei_cl_recv); -static void mei_bus_event_work(struct work_struct *work) +/** + * mei_cl_bus_rx_event - schedule rx evenet + * + * @cl: host client + */ +void mei_cl_bus_rx_event(struct mei_cl *cl) { - struct mei_cl_device *device; + struct mei_cl_device *cldev = cl->cldev; - device = container_of(work, struct mei_cl_device, event_work); + if (!cldev || !cldev->event_cb) + return; - if (device->event_cb) - device->event_cb(device, device->events, device->event_context); + if (!(cldev->events_mask & BIT(MEI_CL_EVENT_RX))) + return; - device->events = 0; + set_bit(MEI_CL_EVENT_RX, &cldev->events); - /* Prepare for the next read */ - mei_cl_read_start(device->cl, 0, NULL); + schedule_work(&cldev->event_work); } -int mei_cl_register_event_cb(struct mei_cl_device *device, - mei_cl_event_cb_t event_cb, void *context) +/** + * mei_cldev_register_event_cb - register event callback + * + * @cldev: me client devices + * @event_cb: callback function + * @events_mask: requested events bitmask + * @context: driver context data + * + * Return: 0 on success + * -EALREADY if an callback is already registered + * <0 on other errors + */ +int mei_cldev_register_event_cb(struct mei_cl_device *cldev, + unsigned long events_mask, + mei_cldev_event_cb_t event_cb, void *context) { - if (device->event_cb) + int ret; + + if (cldev->event_cb) return -EALREADY; - device->events = 0; - device->event_cb = event_cb; - device->event_context = context; - INIT_WORK(&device->event_work, mei_bus_event_work); + cldev->events = 0; + cldev->events_mask = events_mask; + cldev->event_cb = event_cb; + cldev->event_context = context; + INIT_WORK(&cldev->event_work, mei_cl_bus_event_work); - mei_cl_read_start(device->cl, 0, NULL); + if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) { + ret = mei_cl_read_start(cldev->cl, 0, NULL); + if (ret && ret != -EBUSY) + return ret; + } + + if (cldev->events_mask & BIT(MEI_CL_EVENT_NOTIF)) { + mutex_lock(&cldev->cl->dev->device_lock); + ret = mei_cl_notify_request(cldev->cl, NULL, event_cb ? 1 : 0); + mutex_unlock(&cldev->cl->dev->device_lock); + if (ret) + return ret; + } return 0; } -EXPORT_SYMBOL_GPL(mei_cl_register_event_cb); +EXPORT_SYMBOL_GPL(mei_cldev_register_event_cb); -void *mei_cl_get_drvdata(const struct mei_cl_device *device) +/** + * mei_cldev_get_drvdata - driver data getter + * + * @cldev: mei client device + * + * Return: driver private data + */ +void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev) { - return dev_get_drvdata(&device->dev); + return dev_get_drvdata(&cldev->dev); } -EXPORT_SYMBOL_GPL(mei_cl_get_drvdata); +EXPORT_SYMBOL_GPL(mei_cldev_get_drvdata); -void mei_cl_set_drvdata(struct mei_cl_device *device, void *data) +/** + * mei_cldev_set_drvdata - driver data setter + * + * @cldev: mei client device + * @data: data to store + */ +void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data) { - dev_set_drvdata(&device->dev, data); + dev_set_drvdata(&cldev->dev, data); } -EXPORT_SYMBOL_GPL(mei_cl_set_drvdata); +EXPORT_SYMBOL_GPL(mei_cldev_set_drvdata); -int mei_cl_enable_device(struct mei_cl_device *device) +/** + * mei_cldev_uuid - return uuid of the underlying me client + * + * @cldev: mei client device + * + * Return: me client uuid + */ +const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev) { - int err; - struct mei_device *dev; - struct mei_cl *cl = device->cl; - - if (cl == NULL) - return -ENODEV; + return mei_me_cl_uuid(cldev->me_cl); +} +EXPORT_SYMBOL_GPL(mei_cldev_uuid); - dev = cl->dev; +/** + * mei_cldev_ver - return protocol version of the underlying me client + * + * @cldev: mei client device + * + * Return: me client protocol version + */ +u8 mei_cldev_ver(const struct mei_cl_device *cldev) +{ + return mei_me_cl_ver(cldev->me_cl); +} +EXPORT_SYMBOL_GPL(mei_cldev_ver); - mutex_lock(&dev->device_lock); +/** + * mei_cldev_enabled - check whether the device is enabled + * + * @cldev: mei client device + * + * Return: true if me client is initialized and connected + */ +bool mei_cldev_enabled(struct mei_cl_device *cldev) +{ + return cldev->cl && mei_cl_is_connected(cldev->cl); +} +EXPORT_SYMBOL_GPL(mei_cldev_enabled); - err = mei_cl_connect(cl, NULL); - if (err < 0) { - mutex_unlock(&dev->device_lock); - dev_err(dev->dev, "Could not connect to the ME client"); +/** + * mei_cldev_enable_device - enable me client device + * create connection with me client + * + * @cldev: me client device + * + * Return: 0 on success and < 0 on error + */ +int mei_cldev_enable(struct mei_cl_device *cldev) +{ + struct mei_device *bus = cldev->bus; + struct mei_cl *cl; + int ret; + + cl = cldev->cl; + + if (!cl) { + mutex_lock(&bus->device_lock); + cl = mei_cl_alloc_linked(bus, MEI_HOST_CLIENT_ID_ANY); + mutex_unlock(&bus->device_lock); + if (IS_ERR(cl)) + return PTR_ERR(cl); + /* update pointers */ + cldev->cl = cl; + cl->cldev = cldev; + } - return err; + mutex_lock(&bus->device_lock); + if (mei_cl_is_connected(cl)) { + ret = 0; + goto out; } - mutex_unlock(&dev->device_lock); + if (!mei_me_cl_is_active(cldev->me_cl)) { + dev_err(&cldev->dev, "me client is not active\n"); + ret = -ENOTTY; + goto out; + } - if (device->event_cb) - mei_cl_read_start(device->cl, 0, NULL); + ret = mei_cl_connect(cl, cldev->me_cl, NULL); + if (ret < 0) + dev_err(&cldev->dev, "cannot connect\n"); - if (!device->ops || !device->ops->enable) - return 0; +out: + mutex_unlock(&bus->device_lock); - return device->ops->enable(device); + return ret; } -EXPORT_SYMBOL_GPL(mei_cl_enable_device); +EXPORT_SYMBOL_GPL(mei_cldev_enable); -int mei_cl_disable_device(struct mei_cl_device *device) +/** + * mei_cldev_disable - disable me client device + * disconnect form the me client + * + * @cldev: me client device + * + * Return: 0 on success and < 0 on error + */ +int mei_cldev_disable(struct mei_cl_device *cldev) { + struct mei_device *bus; + struct mei_cl *cl; int err; - struct mei_device *dev; - struct mei_cl *cl = device->cl; - if (cl == NULL) + if (!cldev || !cldev->cl) return -ENODEV; - dev = cl->dev; + cl = cldev->cl; - if (device->ops && device->ops->disable) - device->ops->disable(device); + bus = cldev->bus; - device->event_cb = NULL; + cldev->event_cb = NULL; - mutex_lock(&dev->device_lock); + mutex_lock(&bus->device_lock); if (!mei_cl_is_connected(cl)) { - dev_err(dev->dev, "Already disconnected"); + dev_err(bus->dev, "Already disconnected"); err = 0; goto out; } - cl->state = MEI_FILE_DISCONNECTING; - err = mei_cl_disconnect(cl); - if (err < 0) { - dev_err(dev->dev, "Could not disconnect from the ME client"); - goto out; - } + if (err < 0) + dev_err(bus->dev, "Could not disconnect from the ME client"); +out: /* Flush queues and remove any pending read */ mei_cl_flush_queues(cl, NULL); + mei_cl_unlink(cl); -out: - mutex_unlock(&dev->device_lock); + kfree(cl); + cldev->cl = NULL; + + mutex_unlock(&bus->device_lock); return err; +} +EXPORT_SYMBOL_GPL(mei_cldev_disable); +/** + * mei_cl_device_find - find matching entry in the driver id table + * + * @cldev: me client device + * @cldrv: me client driver + * + * Return: id on success; NULL if no id is matching + */ +static const +struct mei_cl_device_id *mei_cl_device_find(struct mei_cl_device *cldev, + struct mei_cl_driver *cldrv) +{ + const struct mei_cl_device_id *id; + const uuid_le *uuid; + u8 version; + bool match; + + uuid = mei_me_cl_uuid(cldev->me_cl); + version = mei_me_cl_ver(cldev->me_cl); + + id = cldrv->id_table; + while (uuid_le_cmp(NULL_UUID_LE, id->uuid)) { + if (!uuid_le_cmp(*uuid, id->uuid)) { + match = true; + + if (cldev->name[0]) + if (strncmp(cldev->name, id->name, + sizeof(id->name))) + match = false; + + if (id->version != MEI_CL_VERSION_ANY) + if (id->version != version) + match = false; + if (match) + return id; + } + + id++; + } + + return NULL; } -EXPORT_SYMBOL_GPL(mei_cl_disable_device); -void mei_cl_bus_rx_event(struct mei_cl *cl) +/** + * mei_cl_device_match - device match function + * + * @dev: device + * @drv: driver + * + * Return: 1 if matching device was found 0 otherwise + */ +static int mei_cl_device_match(struct device *dev, struct device_driver *drv) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + struct mei_cl_driver *cldrv = to_mei_cl_driver(drv); + const struct mei_cl_device_id *found_id; + + if (!cldev) + return 0; + + if (!cldev->do_match) + return 0; + + if (!cldrv || !cldrv->id_table) + return 0; + + found_id = mei_cl_device_find(cldev, cldrv); + if (found_id) + return 1; + + return 0; +} + +/** + * mei_cl_device_probe - bus probe function + * + * @dev: device + * + * Return: 0 on success; < 0 otherwise + */ +static int mei_cl_device_probe(struct device *dev) +{ + struct mei_cl_device *cldev; + struct mei_cl_driver *cldrv; + const struct mei_cl_device_id *id; + + cldev = to_mei_cl_device(dev); + cldrv = to_mei_cl_driver(dev->driver); + + if (!cldev) + return 0; + + if (!cldrv || !cldrv->probe) + return -ENODEV; + + id = mei_cl_device_find(cldev, cldrv); + if (!id) + return -ENODEV; + + __module_get(THIS_MODULE); + + return cldrv->probe(cldev, id); +} + +/** + * mei_cl_device_remove - remove device from the bus + * + * @dev: device + * + * Return: 0 on success; < 0 otherwise + */ +static int mei_cl_device_remove(struct device *dev) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + struct mei_cl_driver *cldrv; + int ret = 0; + + if (!cldev || !dev->driver) + return 0; + + if (cldev->event_cb) { + cldev->event_cb = NULL; + cancel_work_sync(&cldev->event_work); + } + + cldrv = to_mei_cl_driver(dev->driver); + if (cldrv->remove) + ret = cldrv->remove(cldev); + + module_put(THIS_MODULE); + dev->driver = NULL; + return ret; + +} + +static ssize_t name_show(struct device *dev, struct device_attribute *a, + char *buf) { - struct mei_cl_device *device = cl->device; + struct mei_cl_device *cldev = to_mei_cl_device(dev); + size_t len; - if (!device || !device->event_cb) + len = snprintf(buf, PAGE_SIZE, "%s", cldev->name); + + return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len; +} +static DEVICE_ATTR_RO(name); + +static ssize_t uuid_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl); + size_t len; + + len = snprintf(buf, PAGE_SIZE, "%pUl", uuid); + + return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len; +} +static DEVICE_ATTR_RO(uuid); + +static ssize_t version_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + u8 version = mei_me_cl_ver(cldev->me_cl); + size_t len; + + len = snprintf(buf, PAGE_SIZE, "%02X", version); + + return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len; +} +static DEVICE_ATTR_RO(version); + +static ssize_t modalias_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl); + size_t len; + + len = snprintf(buf, PAGE_SIZE, "mei:%s:%pUl:", cldev->name, uuid); + return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len; +} +static DEVICE_ATTR_RO(modalias); + +static struct attribute *mei_cldev_attrs[] = { + &dev_attr_name.attr, + &dev_attr_uuid.attr, + &dev_attr_version.attr, + &dev_attr_modalias.attr, + NULL, +}; +ATTRIBUTE_GROUPS(mei_cldev); + +/** + * mei_cl_device_uevent - me client bus uevent handler + * + * @dev: device + * @env: uevent kobject + * + * Return: 0 on success -ENOMEM on when add_uevent_var fails + */ +static int mei_cl_device_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl); + u8 version = mei_me_cl_ver(cldev->me_cl); + + if (add_uevent_var(env, "MEI_CL_VERSION=%d", version)) + return -ENOMEM; + + if (add_uevent_var(env, "MEI_CL_UUID=%pUl", uuid)) + return -ENOMEM; + + if (add_uevent_var(env, "MEI_CL_NAME=%s", cldev->name)) + return -ENOMEM; + + if (add_uevent_var(env, "MODALIAS=mei:%s:%pUl:%02X:", + cldev->name, uuid, version)) + return -ENOMEM; + + return 0; +} + +static struct bus_type mei_cl_bus_type = { + .name = "mei", + .dev_groups = mei_cldev_groups, + .match = mei_cl_device_match, + .probe = mei_cl_device_probe, + .remove = mei_cl_device_remove, + .uevent = mei_cl_device_uevent, +}; + +static struct mei_device *mei_dev_bus_get(struct mei_device *bus) +{ + if (bus) + get_device(bus->dev); + + return bus; +} + +static void mei_dev_bus_put(struct mei_device *bus) +{ + if (bus) + put_device(bus->dev); +} + +static void mei_cl_bus_dev_release(struct device *dev) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + + if (!cldev) + return; + + mei_me_cl_put(cldev->me_cl); + mei_dev_bus_put(cldev->bus); + kfree(cldev); +} + +static struct device_type mei_cl_device_type = { + .release = mei_cl_bus_dev_release, +}; + +/** + * mei_cl_bus_set_name - set device name for me client device + * + * @cldev: me client device + */ +static inline void mei_cl_bus_set_name(struct mei_cl_device *cldev) +{ + dev_set_name(&cldev->dev, "mei:%s:%pUl:%02X", + cldev->name, + mei_me_cl_uuid(cldev->me_cl), + mei_me_cl_ver(cldev->me_cl)); +} + +/** + * mei_cl_bus_dev_alloc - initialize and allocate mei client device + * + * @bus: mei device + * @me_cl: me client + * + * Return: allocated device structur or NULL on allocation failure + */ +static struct mei_cl_device *mei_cl_bus_dev_alloc(struct mei_device *bus, + struct mei_me_client *me_cl) +{ + struct mei_cl_device *cldev; + + cldev = kzalloc(sizeof(struct mei_cl_device), GFP_KERNEL); + if (!cldev) + return NULL; + + device_initialize(&cldev->dev); + cldev->dev.parent = bus->dev; + cldev->dev.bus = &mei_cl_bus_type; + cldev->dev.type = &mei_cl_device_type; + cldev->bus = mei_dev_bus_get(bus); + cldev->me_cl = mei_me_cl_get(me_cl); + mei_cl_bus_set_name(cldev); + cldev->is_added = 0; + INIT_LIST_HEAD(&cldev->bus_list); + + return cldev; +} + +/** + * mei_cl_dev_setup - setup me client device + * run fix up routines and set the device name + * + * @bus: mei device + * @cldev: me client device + * + * Return: true if the device is eligible for enumeration + */ +static bool mei_cl_bus_dev_setup(struct mei_device *bus, + struct mei_cl_device *cldev) +{ + cldev->do_match = 1; + mei_cl_bus_dev_fixup(cldev); + + /* the device name can change during fix up */ + if (cldev->do_match) + mei_cl_bus_set_name(cldev); + + return cldev->do_match == 1; +} + +/** + * mei_cl_bus_dev_add - add me client devices + * + * @cldev: me client device + * + * Return: 0 on success; < 0 on failre + */ +static int mei_cl_bus_dev_add(struct mei_cl_device *cldev) +{ + int ret; + + dev_dbg(cldev->bus->dev, "adding %pUL:%02X\n", + mei_me_cl_uuid(cldev->me_cl), + mei_me_cl_ver(cldev->me_cl)); + ret = device_add(&cldev->dev); + if (!ret) + cldev->is_added = 1; + + return ret; +} + +/** + * mei_cl_bus_dev_stop - stop the driver + * + * @cldev: me client device + */ +static void mei_cl_bus_dev_stop(struct mei_cl_device *cldev) +{ + if (cldev->is_added) + device_release_driver(&cldev->dev); +} + +/** + * mei_cl_bus_dev_destroy - destroy me client devices object + * + * @cldev: me client device + * + * Locking: called under "dev->cl_bus_lock" lock + */ +static void mei_cl_bus_dev_destroy(struct mei_cl_device *cldev) +{ + + WARN_ON(!mutex_is_locked(&cldev->bus->cl_bus_lock)); + + if (!cldev->is_added) + return; + + device_del(&cldev->dev); + + list_del_init(&cldev->bus_list); + + cldev->is_added = 0; + put_device(&cldev->dev); +} + +/** + * mei_cl_bus_remove_device - remove a devices form the bus + * + * @cldev: me client device + */ +static void mei_cl_bus_remove_device(struct mei_cl_device *cldev) +{ + mei_cl_bus_dev_stop(cldev); + mei_cl_bus_dev_destroy(cldev); +} + +/** + * mei_cl_bus_remove_devices - remove all devices form the bus + * + * @bus: mei device + */ +void mei_cl_bus_remove_devices(struct mei_device *bus) +{ + struct mei_cl_device *cldev, *next; + + mutex_lock(&bus->cl_bus_lock); + list_for_each_entry_safe(cldev, next, &bus->device_list, bus_list) + mei_cl_bus_remove_device(cldev); + mutex_unlock(&bus->cl_bus_lock); +} + + +/** + * mei_cl_bus_dev_init - allocate and initializes an mei client devices + * based on me client + * + * @bus: mei device + * @me_cl: me client + * + * Locking: called under "dev->cl_bus_lock" lock + */ +static void mei_cl_bus_dev_init(struct mei_device *bus, + struct mei_me_client *me_cl) +{ + struct mei_cl_device *cldev; + + WARN_ON(!mutex_is_locked(&bus->cl_bus_lock)); + + dev_dbg(bus->dev, "initializing %pUl", mei_me_cl_uuid(me_cl)); + + if (me_cl->bus_added) return; - set_bit(MEI_CL_EVENT_RX, &device->events); + cldev = mei_cl_bus_dev_alloc(bus, me_cl); + if (!cldev) + return; + + me_cl->bus_added = true; + list_add_tail(&cldev->bus_list, &bus->device_list); - schedule_work(&device->event_work); } -void mei_cl_bus_remove_devices(struct mei_device *dev) +/** + * mei_cl_bus_rescan - scan me clients list and add create + * devices for eligible clients + * + * @bus: mei device + */ +void mei_cl_bus_rescan(struct mei_device *bus) { - struct mei_cl *cl, *next; + struct mei_cl_device *cldev, *n; + struct mei_me_client *me_cl; + + mutex_lock(&bus->cl_bus_lock); + + down_read(&bus->me_clients_rwsem); + list_for_each_entry(me_cl, &bus->me_clients, list) + mei_cl_bus_dev_init(bus, me_cl); + up_read(&bus->me_clients_rwsem); + + list_for_each_entry_safe(cldev, n, &bus->device_list, bus_list) { - mutex_lock(&dev->device_lock); - list_for_each_entry_safe(cl, next, &dev->device_list, device_link) { - if (cl->device) - mei_cl_remove_device(cl->device); + if (!mei_me_cl_is_active(cldev->me_cl)) { + mei_cl_bus_remove_device(cldev); + continue; + } + + if (cldev->is_added) + continue; - list_del(&cl->device_link); - mei_cl_unlink(cl); - kfree(cl); + if (mei_cl_bus_dev_setup(bus, cldev)) + mei_cl_bus_dev_add(cldev); + else { + list_del_init(&cldev->bus_list); + put_device(&cldev->dev); + } } - mutex_unlock(&dev->device_lock); + mutex_unlock(&bus->cl_bus_lock); + + dev_dbg(bus->dev, "rescan end"); } +int __mei_cldev_driver_register(struct mei_cl_driver *cldrv, + struct module *owner) +{ + int err; + + cldrv->driver.name = cldrv->name; + cldrv->driver.owner = owner; + cldrv->driver.bus = &mei_cl_bus_type; + + err = driver_register(&cldrv->driver); + if (err) + return err; + + pr_debug("mei: driver [%s] registered\n", cldrv->driver.name); + + return 0; +} +EXPORT_SYMBOL_GPL(__mei_cldev_driver_register); + +void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv) +{ + driver_unregister(&cldrv->driver); + + pr_debug("mei: driver [%s] unregistered\n", cldrv->driver.name); +} +EXPORT_SYMBOL_GPL(mei_cldev_driver_unregister); + + int __init mei_cl_bus_init(void) { return bus_register(&mei_cl_bus_type); diff --git a/kernel/drivers/misc/mei/client.c b/kernel/drivers/misc/mei/client.c index b2b9f4382..a6c87c713 100644 --- a/kernel/drivers/misc/mei/client.c +++ b/kernel/drivers/misc/mei/client.c @@ -83,7 +83,7 @@ void mei_me_cl_put(struct mei_me_client *me_cl) } /** - * __mei_me_cl_del - delete me client form the list and decrease + * __mei_me_cl_del - delete me client from the list and decrease * reference counter * * @dev: mei device @@ -96,11 +96,25 @@ static void __mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl) if (!me_cl) return; - list_del(&me_cl->list); + list_del_init(&me_cl->list); mei_me_cl_put(me_cl); } /** + * mei_me_cl_del - delete me client from the list and decrease + * reference counter + * + * @dev: mei device + * @me_cl: me client + */ +void mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl) +{ + down_write(&dev->me_clients_rwsem); + __mei_me_cl_del(dev, me_cl); + up_write(&dev->me_clients_rwsem); +} + +/** * mei_me_cl_add - add me client to the list * * @dev: mei device @@ -317,7 +331,7 @@ static inline bool mei_cl_cmp_id(const struct mei_cl *cl1, { return cl1 && cl2 && (cl1->host_client_id == cl2->host_client_id) && - (cl1->me_client_id == cl2->me_client_id); + (mei_cl_me_id(cl1) == mei_cl_me_id(cl2)); } /** @@ -541,11 +555,12 @@ void mei_cl_init(struct mei_cl *cl, struct mei_device *dev) init_waitqueue_head(&cl->wait); init_waitqueue_head(&cl->rx_wait); init_waitqueue_head(&cl->tx_wait); + init_waitqueue_head(&cl->ev_wait); INIT_LIST_HEAD(&cl->rd_completed); INIT_LIST_HEAD(&cl->rd_pending); INIT_LIST_HEAD(&cl->link); - INIT_LIST_HEAD(&cl->device_link); cl->writing_state = MEI_IDLE; + cl->state = MEI_FILE_INITIALIZING; cl->dev = dev; } @@ -619,7 +634,7 @@ int mei_cl_link(struct mei_cl *cl, int id) } /** - * mei_cl_unlink - remove me_cl from the list + * mei_cl_unlink - remove host client from the list * * @cl: host client * @@ -667,24 +682,20 @@ void mei_host_client_init(struct work_struct *work) me_cl = mei_me_cl_by_uuid(dev, &mei_amthif_guid); if (me_cl) - mei_amthif_host_init(dev); + mei_amthif_host_init(dev, me_cl); mei_me_cl_put(me_cl); me_cl = mei_me_cl_by_uuid(dev, &mei_wd_guid); if (me_cl) - mei_wd_host_init(dev); + mei_wd_host_init(dev, me_cl); mei_me_cl_put(me_cl); - me_cl = mei_me_cl_by_uuid(dev, &mei_nfc_guid); - if (me_cl) - mei_nfc_host_init(dev); - mei_me_cl_put(me_cl); - - dev->dev_state = MEI_DEV_ENABLED; dev->reset_count = 0; mutex_unlock(&dev->device_lock); + mei_cl_bus_rescan(dev); + pm_runtime_mark_last_busy(dev->dev); dev_dbg(dev->dev, "rpm: autosuspend\n"); pm_runtime_autosuspend(dev->dev); @@ -715,6 +726,173 @@ bool mei_hbuf_acquire(struct mei_device *dev) } /** + * mei_cl_set_disconnected - set disconnected state and clear + * associated states and resources + * + * @cl: host client + */ +void mei_cl_set_disconnected(struct mei_cl *cl) +{ + struct mei_device *dev = cl->dev; + + if (cl->state == MEI_FILE_DISCONNECTED || + cl->state == MEI_FILE_INITIALIZING) + return; + + cl->state = MEI_FILE_DISCONNECTED; + mei_io_list_flush(&dev->ctrl_rd_list, cl); + mei_io_list_flush(&dev->ctrl_wr_list, cl); + cl->mei_flow_ctrl_creds = 0; + cl->timer_count = 0; + + if (!cl->me_cl) + return; + + if (!WARN_ON(cl->me_cl->connect_count == 0)) + cl->me_cl->connect_count--; + + if (cl->me_cl->connect_count == 0) + cl->me_cl->mei_flow_ctrl_creds = 0; + + mei_me_cl_put(cl->me_cl); + cl->me_cl = NULL; +} + +static int mei_cl_set_connecting(struct mei_cl *cl, struct mei_me_client *me_cl) +{ + if (!mei_me_cl_get(me_cl)) + return -ENOENT; + + /* only one connection is allowed for fixed address clients */ + if (me_cl->props.fixed_address) { + if (me_cl->connect_count) { + mei_me_cl_put(me_cl); + return -EBUSY; + } + } + + cl->me_cl = me_cl; + cl->state = MEI_FILE_CONNECTING; + cl->me_cl->connect_count++; + + return 0; +} + +/* + * mei_cl_send_disconnect - send disconnect request + * + * @cl: host client + * @cb: callback block + * + * Return: 0, OK; otherwise, error. + */ +static int mei_cl_send_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb) +{ + struct mei_device *dev; + int ret; + + dev = cl->dev; + + ret = mei_hbm_cl_disconnect_req(dev, cl); + cl->status = ret; + if (ret) { + cl->state = MEI_FILE_DISCONNECT_REPLY; + return ret; + } + + list_move_tail(&cb->list, &dev->ctrl_rd_list.list); + cl->timer_count = MEI_CONNECT_TIMEOUT; + + return 0; +} + +/** + * mei_cl_irq_disconnect - processes close related operation from + * interrupt thread context - send disconnect request + * + * @cl: client + * @cb: callback block. + * @cmpl_list: complete list. + * + * Return: 0, OK; otherwise, error. + */ +int mei_cl_irq_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb, + struct mei_cl_cb *cmpl_list) +{ + struct mei_device *dev = cl->dev; + u32 msg_slots; + int slots; + int ret; + + msg_slots = mei_data2slots(sizeof(struct hbm_client_connect_request)); + slots = mei_hbuf_empty_slots(dev); + + if (slots < msg_slots) + return -EMSGSIZE; + + ret = mei_cl_send_disconnect(cl, cb); + if (ret) + list_move_tail(&cb->list, &cmpl_list->list); + + return ret; +} + +/** + * __mei_cl_disconnect - disconnect host client from the me one + * internal function runtime pm has to be already acquired + * + * @cl: host client + * + * Return: 0 on success, <0 on failure. + */ +static int __mei_cl_disconnect(struct mei_cl *cl) +{ + struct mei_device *dev; + struct mei_cl_cb *cb; + int rets; + + dev = cl->dev; + + cl->state = MEI_FILE_DISCONNECTING; + + cb = mei_io_cb_init(cl, MEI_FOP_DISCONNECT, NULL); + rets = cb ? 0 : -ENOMEM; + if (rets) + goto out; + + cl_dbg(dev, cl, "add disconnect cb to control write list\n"); + list_add_tail(&cb->list, &dev->ctrl_wr_list.list); + + if (mei_hbuf_acquire(dev)) { + rets = mei_cl_send_disconnect(cl, cb); + if (rets) { + cl_err(dev, cl, "failed to disconnect.\n"); + goto out; + } + } + + mutex_unlock(&dev->device_lock); + wait_event_timeout(cl->wait, cl->state == MEI_FILE_DISCONNECT_REPLY, + mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT)); + mutex_lock(&dev->device_lock); + + rets = cl->status; + if (cl->state != MEI_FILE_DISCONNECT_REPLY) { + cl_dbg(dev, cl, "timeout on disconnect from FW client.\n"); + rets = -ETIME; + } + +out: + /* we disconnect also on error */ + mei_cl_set_disconnected(cl); + if (!rets) + cl_dbg(dev, cl, "successfully disconnected from FW client.\n"); + + mei_io_cb_free(cb); + return rets; +} + +/** * mei_cl_disconnect - disconnect host client from the me one * * @cl: host client @@ -726,7 +904,6 @@ bool mei_hbuf_acquire(struct mei_device *dev) int mei_cl_disconnect(struct mei_cl *cl) { struct mei_device *dev; - struct mei_cl_cb *cb; int rets; if (WARN_ON(!cl || !cl->dev)) @@ -736,9 +913,14 @@ int mei_cl_disconnect(struct mei_cl *cl) cl_dbg(dev, cl, "disconnecting"); - if (cl->state != MEI_FILE_DISCONNECTING) + if (!mei_cl_is_connected(cl)) return 0; + if (mei_cl_is_fixed_address(cl)) { + mei_cl_set_disconnected(cl); + return 0; + } + rets = pm_runtime_get(dev->dev); if (rets < 0 && rets != -EINPROGRESS) { pm_runtime_put_noidle(dev->dev); @@ -746,49 +928,12 @@ int mei_cl_disconnect(struct mei_cl *cl) return rets; } - cb = mei_io_cb_init(cl, MEI_FOP_DISCONNECT, NULL); - rets = cb ? 0 : -ENOMEM; - if (rets) - goto free; + rets = __mei_cl_disconnect(cl); - if (mei_hbuf_acquire(dev)) { - if (mei_hbm_cl_disconnect_req(dev, cl)) { - rets = -ENODEV; - cl_err(dev, cl, "failed to disconnect.\n"); - goto free; - } - cl->timer_count = MEI_CONNECT_TIMEOUT; - mdelay(10); /* Wait for hardware disconnection ready */ - list_add_tail(&cb->list, &dev->ctrl_rd_list.list); - } else { - cl_dbg(dev, cl, "add disconnect cb to control write list\n"); - list_add_tail(&cb->list, &dev->ctrl_wr_list.list); - - } - mutex_unlock(&dev->device_lock); - - wait_event_timeout(cl->wait, - MEI_FILE_DISCONNECTED == cl->state, - mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT)); - - mutex_lock(&dev->device_lock); - - if (MEI_FILE_DISCONNECTED == cl->state) { - rets = 0; - cl_dbg(dev, cl, "successfully disconnected from FW client.\n"); - } else { - cl_dbg(dev, cl, "timeout on disconnect from FW client.\n"); - rets = -ETIME; - } - - mei_io_list_flush(&dev->ctrl_rd_list, cl); - mei_io_list_flush(&dev->ctrl_wr_list, cl); -free: cl_dbg(dev, cl, "rpm: autosuspend\n"); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); - mei_io_cb_free(cb); return rets; } @@ -801,53 +946,119 @@ free: * * Return: true if other client is connected, false - otherwise. */ -bool mei_cl_is_other_connecting(struct mei_cl *cl) +static bool mei_cl_is_other_connecting(struct mei_cl *cl) { struct mei_device *dev; - struct mei_cl *ocl; /* the other client */ - - if (WARN_ON(!cl || !cl->dev)) - return false; + struct mei_cl_cb *cb; dev = cl->dev; - list_for_each_entry(ocl, &dev->file_list, link) { - if (ocl->state == MEI_FILE_CONNECTING && - ocl != cl && - cl->me_client_id == ocl->me_client_id) + list_for_each_entry(cb, &dev->ctrl_rd_list.list, list) { + if (cb->fop_type == MEI_FOP_CONNECT && + mei_cl_me_id(cl) == mei_cl_me_id(cb->cl)) return true; - } return false; } /** + * mei_cl_send_connect - send connect request + * + * @cl: host client + * @cb: callback block + * + * Return: 0, OK; otherwise, error. + */ +static int mei_cl_send_connect(struct mei_cl *cl, struct mei_cl_cb *cb) +{ + struct mei_device *dev; + int ret; + + dev = cl->dev; + + ret = mei_hbm_cl_connect_req(dev, cl); + cl->status = ret; + if (ret) { + cl->state = MEI_FILE_DISCONNECT_REPLY; + return ret; + } + + list_move_tail(&cb->list, &dev->ctrl_rd_list.list); + cl->timer_count = MEI_CONNECT_TIMEOUT; + return 0; +} + +/** + * mei_cl_irq_connect - send connect request in irq_thread context + * + * @cl: host client + * @cb: callback block + * @cmpl_list: complete list + * + * Return: 0, OK; otherwise, error. + */ +int mei_cl_irq_connect(struct mei_cl *cl, struct mei_cl_cb *cb, + struct mei_cl_cb *cmpl_list) +{ + struct mei_device *dev = cl->dev; + u32 msg_slots; + int slots; + int rets; + + msg_slots = mei_data2slots(sizeof(struct hbm_client_connect_request)); + slots = mei_hbuf_empty_slots(dev); + + if (mei_cl_is_other_connecting(cl)) + return 0; + + if (slots < msg_slots) + return -EMSGSIZE; + + rets = mei_cl_send_connect(cl, cb); + if (rets) + list_move_tail(&cb->list, &cmpl_list->list); + + return rets; +} + +/** * mei_cl_connect - connect host client to the me one * * @cl: host client + * @me_cl: me client * @file: pointer to file structure * * Locking: called under "dev->device_lock" lock * * Return: 0 on success, <0 on failure. */ -int mei_cl_connect(struct mei_cl *cl, struct file *file) +int mei_cl_connect(struct mei_cl *cl, struct mei_me_client *me_cl, + struct file *file) { struct mei_device *dev; struct mei_cl_cb *cb; int rets; - if (WARN_ON(!cl || !cl->dev)) + if (WARN_ON(!cl || !cl->dev || !me_cl)) return -ENODEV; dev = cl->dev; + rets = mei_cl_set_connecting(cl, me_cl); + if (rets) + return rets; + + if (mei_cl_is_fixed_address(cl)) { + cl->state = MEI_FILE_CONNECTED; + return 0; + } + rets = pm_runtime_get(dev->dev); if (rets < 0 && rets != -EINPROGRESS) { pm_runtime_put_noidle(dev->dev); cl_err(dev, cl, "rpm: get failed %d\n", rets); - return rets; + goto nortpm; } cb = mei_io_cb_init(cl, MEI_FOP_CONNECT, file); @@ -855,45 +1066,52 @@ int mei_cl_connect(struct mei_cl *cl, struct file *file) if (rets) goto out; + list_add_tail(&cb->list, &dev->ctrl_wr_list.list); + /* run hbuf acquire last so we don't have to undo */ if (!mei_cl_is_other_connecting(cl) && mei_hbuf_acquire(dev)) { - cl->state = MEI_FILE_CONNECTING; - if (mei_hbm_cl_connect_req(dev, cl)) { - rets = -ENODEV; + rets = mei_cl_send_connect(cl, cb); + if (rets) goto out; - } - cl->timer_count = MEI_CONNECT_TIMEOUT; - list_add_tail(&cb->list, &dev->ctrl_rd_list.list); - } else { - cl->state = MEI_FILE_INITIALIZING; - list_add_tail(&cb->list, &dev->ctrl_wr_list.list); } mutex_unlock(&dev->device_lock); wait_event_timeout(cl->wait, (cl->state == MEI_FILE_CONNECTED || - cl->state == MEI_FILE_DISCONNECTED), + cl->state == MEI_FILE_DISCONNECT_REQUIRED || + cl->state == MEI_FILE_DISCONNECT_REPLY), mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT)); mutex_lock(&dev->device_lock); if (!mei_cl_is_connected(cl)) { - cl->state = MEI_FILE_DISCONNECTED; - /* something went really wrong */ + if (cl->state == MEI_FILE_DISCONNECT_REQUIRED) { + mei_io_list_flush(&dev->ctrl_rd_list, cl); + mei_io_list_flush(&dev->ctrl_wr_list, cl); + /* ignore disconnect return valuue; + * in case of failure reset will be invoked + */ + __mei_cl_disconnect(cl); + rets = -EFAULT; + goto out; + } + + /* timeout or something went really wrong */ if (!cl->status) cl->status = -EFAULT; - - mei_io_list_flush(&dev->ctrl_rd_list, cl); - mei_io_list_flush(&dev->ctrl_wr_list, cl); } rets = cl->status; - out: cl_dbg(dev, cl, "rpm: autosuspend\n"); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); mei_io_cb_free(cb); + +nortpm: + if (!mei_cl_is_connected(cl)) + mei_cl_set_disconnected(cl); + return rets; } @@ -934,36 +1152,29 @@ err: * @cl: private data of the file object * * Return: 1 if mei_flow_ctrl_creds >0, 0 - otherwise. - * -ENOENT if mei_cl is not present - * -EINVAL if single_recv_buf == 0 */ int mei_cl_flow_ctrl_creds(struct mei_cl *cl) { - struct mei_device *dev; - struct mei_me_client *me_cl; - int rets = 0; + int rets; - if (WARN_ON(!cl || !cl->dev)) + if (WARN_ON(!cl || !cl->me_cl)) return -EINVAL; - dev = cl->dev; - if (cl->mei_flow_ctrl_creds > 0) return 1; - me_cl = mei_me_cl_by_uuid_id(dev, &cl->cl_uuid, cl->me_client_id); - if (!me_cl) { - cl_err(dev, cl, "no such me client %d\n", cl->me_client_id); - return -ENOENT; + if (mei_cl_is_fixed_address(cl)) { + rets = mei_cl_read_start(cl, mei_cl_mtu(cl), NULL); + if (rets && rets != -EBUSY) + return rets; + return 1; } - if (me_cl->mei_flow_ctrl_creds > 0) { - rets = 1; - if (WARN_ON(me_cl->props.single_recv_buf == 0)) - rets = -EINVAL; + if (mei_cl_is_single_recv_buf(cl)) { + if (cl->me_cl->mei_flow_ctrl_creds > 0) + return 1; } - mei_me_cl_put(me_cl); - return rets; + return 0; } /** @@ -973,46 +1184,244 @@ int mei_cl_flow_ctrl_creds(struct mei_cl *cl) * * Return: * 0 on success - * -ENOENT when me client is not found * -EINVAL when ctrl credits are <= 0 */ int mei_cl_flow_ctrl_reduce(struct mei_cl *cl) { + if (WARN_ON(!cl || !cl->me_cl)) + return -EINVAL; + + if (mei_cl_is_fixed_address(cl)) + return 0; + + if (mei_cl_is_single_recv_buf(cl)) { + if (WARN_ON(cl->me_cl->mei_flow_ctrl_creds <= 0)) + return -EINVAL; + cl->me_cl->mei_flow_ctrl_creds--; + } else { + if (WARN_ON(cl->mei_flow_ctrl_creds <= 0)) + return -EINVAL; + cl->mei_flow_ctrl_creds--; + } + return 0; +} + +/** + * mei_cl_notify_fop2req - convert fop to proper request + * + * @fop: client notification start response command + * + * Return: MEI_HBM_NOTIFICATION_START/STOP + */ +u8 mei_cl_notify_fop2req(enum mei_cb_file_ops fop) +{ + if (fop == MEI_FOP_NOTIFY_START) + return MEI_HBM_NOTIFICATION_START; + else + return MEI_HBM_NOTIFICATION_STOP; +} + +/** + * mei_cl_notify_req2fop - convert notification request top file operation type + * + * @req: hbm notification request type + * + * Return: MEI_FOP_NOTIFY_START/STOP + */ +enum mei_cb_file_ops mei_cl_notify_req2fop(u8 req) +{ + if (req == MEI_HBM_NOTIFICATION_START) + return MEI_FOP_NOTIFY_START; + else + return MEI_FOP_NOTIFY_STOP; +} + +/** + * mei_cl_irq_notify - send notification request in irq_thread context + * + * @cl: client + * @cb: callback block. + * @cmpl_list: complete list. + * + * Return: 0 on such and error otherwise. + */ +int mei_cl_irq_notify(struct mei_cl *cl, struct mei_cl_cb *cb, + struct mei_cl_cb *cmpl_list) +{ + struct mei_device *dev = cl->dev; + u32 msg_slots; + int slots; + int ret; + bool request; + + msg_slots = mei_data2slots(sizeof(struct hbm_client_connect_request)); + slots = mei_hbuf_empty_slots(dev); + + if (slots < msg_slots) + return -EMSGSIZE; + + request = mei_cl_notify_fop2req(cb->fop_type); + ret = mei_hbm_cl_notify_req(dev, cl, request); + if (ret) { + cl->status = ret; + list_move_tail(&cb->list, &cmpl_list->list); + return ret; + } + + list_move_tail(&cb->list, &dev->ctrl_rd_list.list); + return 0; +} + +/** + * mei_cl_notify_request - send notification stop/start request + * + * @cl: host client + * @file: associate request with file + * @request: 1 for start or 0 for stop + * + * Locking: called under "dev->device_lock" lock + * + * Return: 0 on such and error otherwise. + */ +int mei_cl_notify_request(struct mei_cl *cl, struct file *file, u8 request) +{ struct mei_device *dev; - struct mei_me_client *me_cl; + struct mei_cl_cb *cb; + enum mei_cb_file_ops fop_type; int rets; if (WARN_ON(!cl || !cl->dev)) - return -EINVAL; + return -ENODEV; dev = cl->dev; - me_cl = mei_me_cl_by_uuid_id(dev, &cl->cl_uuid, cl->me_client_id); - if (!me_cl) { - cl_err(dev, cl, "no such me client %d\n", cl->me_client_id); - return -ENOENT; + if (!dev->hbm_f_ev_supported) { + cl_dbg(dev, cl, "notifications not supported\n"); + return -EOPNOTSUPP; } - if (me_cl->props.single_recv_buf) { - if (WARN_ON(me_cl->mei_flow_ctrl_creds <= 0)) { - rets = -EINVAL; + rets = pm_runtime_get(dev->dev); + if (rets < 0 && rets != -EINPROGRESS) { + pm_runtime_put_noidle(dev->dev); + cl_err(dev, cl, "rpm: get failed %d\n", rets); + return rets; + } + + fop_type = mei_cl_notify_req2fop(request); + cb = mei_io_cb_init(cl, fop_type, file); + if (!cb) { + rets = -ENOMEM; + goto out; + } + + if (mei_hbuf_acquire(dev)) { + if (mei_hbm_cl_notify_req(dev, cl, request)) { + rets = -ENODEV; goto out; } - me_cl->mei_flow_ctrl_creds--; + list_add_tail(&cb->list, &dev->ctrl_rd_list.list); } else { - if (WARN_ON(cl->mei_flow_ctrl_creds <= 0)) { - rets = -EINVAL; - goto out; - } - cl->mei_flow_ctrl_creds--; + list_add_tail(&cb->list, &dev->ctrl_wr_list.list); + } + + mutex_unlock(&dev->device_lock); + wait_event_timeout(cl->wait, cl->notify_en == request, + mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT)); + mutex_lock(&dev->device_lock); + + if (cl->notify_en != request) { + mei_io_list_flush(&dev->ctrl_rd_list, cl); + mei_io_list_flush(&dev->ctrl_wr_list, cl); + if (!cl->status) + cl->status = -EFAULT; } - rets = 0; + + rets = cl->status; + out: - mei_me_cl_put(me_cl); + cl_dbg(dev, cl, "rpm: autosuspend\n"); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + + mei_io_cb_free(cb); return rets; } /** + * mei_cl_notify - raise notification + * + * @cl: host client + * + * Locking: called under "dev->device_lock" lock + */ +void mei_cl_notify(struct mei_cl *cl) +{ + struct mei_device *dev; + + if (!cl || !cl->dev) + return; + + dev = cl->dev; + + if (!cl->notify_en) + return; + + cl_dbg(dev, cl, "notify event"); + cl->notify_ev = true; + wake_up_interruptible_all(&cl->ev_wait); + + if (cl->ev_async) + kill_fasync(&cl->ev_async, SIGIO, POLL_PRI); + + mei_cl_bus_notify_event(cl); +} + +/** + * mei_cl_notify_get - get or wait for notification event + * + * @cl: host client + * @block: this request is blocking + * @notify_ev: true if notification event was received + * + * Locking: called under "dev->device_lock" lock + * + * Return: 0 on such and error otherwise. + */ +int mei_cl_notify_get(struct mei_cl *cl, bool block, bool *notify_ev) +{ + struct mei_device *dev; + int rets; + + *notify_ev = false; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + if (!mei_cl_is_connected(cl)) + return -ENODEV; + + if (cl->notify_ev) + goto out; + + if (!block) + return -EAGAIN; + + mutex_unlock(&dev->device_lock); + rets = wait_event_interruptible(cl->ev_wait, cl->notify_ev); + mutex_lock(&dev->device_lock); + + if (rets < 0) + return rets; + +out: + *notify_ev = cl->notify_ev; + cl->notify_ev = false; + return 0; +} + +/** * mei_cl_read_start - the start read client message function. * * @cl: host client @@ -1025,7 +1434,6 @@ int mei_cl_read_start(struct mei_cl *cl, size_t length, struct file *fp) { struct mei_device *dev; struct mei_cl_cb *cb; - struct mei_me_client *me_cl; int rets; if (WARN_ON(!cl || !cl->dev)) @@ -1040,27 +1448,29 @@ int mei_cl_read_start(struct mei_cl *cl, size_t length, struct file *fp) if (!list_empty(&cl->rd_pending)) return -EBUSY; - me_cl = mei_me_cl_by_uuid_id(dev, &cl->cl_uuid, cl->me_client_id); - if (!me_cl) { - cl_err(dev, cl, "no such me client %d\n", cl->me_client_id); + if (!mei_me_cl_is_active(cl->me_cl)) { + cl_err(dev, cl, "no such me client\n"); return -ENOTTY; } + /* always allocate at least client max message */ - length = max_t(size_t, length, me_cl->props.max_msg_length); - mei_me_cl_put(me_cl); + length = max_t(size_t, length, mei_cl_mtu(cl)); + cb = mei_cl_alloc_cb(cl, length, MEI_FOP_READ, fp); + if (!cb) + return -ENOMEM; + + if (mei_cl_is_fixed_address(cl)) { + list_add_tail(&cb->list, &cl->rd_pending); + return 0; + } rets = pm_runtime_get(dev->dev); if (rets < 0 && rets != -EINPROGRESS) { pm_runtime_put_noidle(dev->dev); cl_err(dev, cl, "rpm: get failed %d\n", rets); - return rets; + goto nortpm; } - cb = mei_cl_alloc_cb(cl, length, MEI_FOP_READ, fp); - rets = cb ? 0 : -ENOMEM; - if (rets) - goto out; - if (mei_hbuf_acquire(dev)) { rets = mei_hbm_cl_flow_control_req(dev, cl); if (rets < 0) @@ -1068,6 +1478,7 @@ int mei_cl_read_start(struct mei_cl *cl, size_t length, struct file *fp) list_add_tail(&cb->list, &cl->rd_pending); } else { + rets = 0; list_add_tail(&cb->list, &dev->ctrl_wr_list.list); } @@ -1075,7 +1486,7 @@ out: cl_dbg(dev, cl, "rpm: autosuspend\n"); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); - +nortpm: if (rets) mei_io_cb_free(cb); @@ -1102,6 +1513,7 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, u32 msg_slots; int slots; int rets; + bool first_chunk; if (WARN_ON(!cl || !cl->dev)) return -ENODEV; @@ -1110,7 +1522,9 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, buf = &cb->buf; - rets = mei_cl_flow_ctrl_creds(cl); + first_chunk = cb->buf_idx == 0; + + rets = first_chunk ? mei_cl_flow_ctrl_creds(cl) : 1; if (rets < 0) return rets; @@ -1123,8 +1537,8 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, len = buf->size - cb->buf_idx; msg_slots = mei_data2slots(len); - mei_hdr.host_addr = cl->host_client_id; - mei_hdr.me_addr = cl->me_client_id; + mei_hdr.host_addr = mei_cl_host_addr(cl); + mei_hdr.me_addr = mei_cl_me_id(cl); mei_hdr.reserved = 0; mei_hdr.internal = cb->internal; @@ -1157,12 +1571,14 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, cb->buf_idx += mei_hdr.length; cb->completed = mei_hdr.msg_complete == 1; - if (mei_hdr.msg_complete) { + if (first_chunk) { if (mei_cl_flow_ctrl_reduce(cl)) return -EIO; - list_move_tail(&cb->list, &dev->write_waiting_list.list); } + if (mei_hdr.msg_complete) + list_move_tail(&cb->list, &dev->write_waiting_list.list); + return 0; } @@ -1181,6 +1597,7 @@ int mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, bool blocking) struct mei_device *dev; struct mei_msg_data *buf; struct mei_msg_hdr mei_hdr; + int size; int rets; @@ -1192,10 +1609,10 @@ int mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, bool blocking) dev = cl->dev; - buf = &cb->buf; + size = buf->size; - cl_dbg(dev, cl, "size=%d\n", buf->size); + cl_dbg(dev, cl, "size=%d\n", size); rets = pm_runtime_get(dev->dev); if (rets < 0 && rets != -EINPROGRESS) { @@ -1207,8 +1624,8 @@ int mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, bool blocking) cb->buf_idx = 0; cl->writing_state = MEI_IDLE; - mei_hdr.host_addr = cl->host_client_id; - mei_hdr.me_addr = cl->me_client_id; + mei_hdr.host_addr = mei_cl_host_addr(cl); + mei_hdr.me_addr = mei_cl_me_id(cl); mei_hdr.reserved = 0; mei_hdr.msg_complete = 0; mei_hdr.internal = cb->internal; @@ -1219,21 +1636,21 @@ int mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, bool blocking) if (rets == 0) { cl_dbg(dev, cl, "No flow control credentials: not sending.\n"); - rets = buf->size; + rets = size; goto out; } if (!mei_hbuf_acquire(dev)) { cl_dbg(dev, cl, "Cannot acquire the host buffer: not sending.\n"); - rets = buf->size; + rets = size; goto out; } /* Check for a maximum length */ - if (buf->size > mei_hbuf_max_len(dev)) { + if (size > mei_hbuf_max_len(dev)) { mei_hdr.length = mei_hbuf_max_len(dev); mei_hdr.msg_complete = 0; } else { - mei_hdr.length = buf->size; + mei_hdr.length = size; mei_hdr.msg_complete = 1; } @@ -1241,22 +1658,21 @@ int mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, bool blocking) if (rets) goto err; + rets = mei_cl_flow_ctrl_reduce(cl); + if (rets) + goto err; + cl->writing_state = MEI_WRITING; cb->buf_idx = mei_hdr.length; cb->completed = mei_hdr.msg_complete == 1; out: - if (mei_hdr.msg_complete) { - rets = mei_cl_flow_ctrl_reduce(cl); - if (rets < 0) - goto err; - + if (mei_hdr.msg_complete) list_add_tail(&cb->list, &dev->write_waiting_list.list); - } else { + else list_add_tail(&cb->list, &dev->write_list.list); - } - + cb = NULL; if (blocking && cl->writing_state != MEI_WRITE_COMPLETE) { mutex_unlock(&dev->device_lock); @@ -1271,7 +1687,7 @@ out: } } - rets = buf->size; + rets = size; err: cl_dbg(dev, cl, "rpm: autosuspend\n"); pm_runtime_mark_last_busy(dev->dev); @@ -1289,20 +1705,38 @@ err: */ void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb) { - if (cb->fop_type == MEI_FOP_WRITE) { + struct mei_device *dev = cl->dev; + + switch (cb->fop_type) { + case MEI_FOP_WRITE: mei_io_cb_free(cb); - cb = NULL; cl->writing_state = MEI_WRITE_COMPLETE; - if (waitqueue_active(&cl->tx_wait)) + if (waitqueue_active(&cl->tx_wait)) { wake_up_interruptible(&cl->tx_wait); + } else { + pm_runtime_mark_last_busy(dev->dev); + pm_request_autosuspend(dev->dev); + } + break; - } else if (cb->fop_type == MEI_FOP_READ) { + case MEI_FOP_READ: list_add_tail(&cb->list, &cl->rd_completed); if (waitqueue_active(&cl->rx_wait)) wake_up_interruptible_all(&cl->rx_wait); else mei_cl_bus_rx_event(cl); - + break; + + case MEI_FOP_CONNECT: + case MEI_FOP_DISCONNECT: + case MEI_FOP_NOTIFY_STOP: + case MEI_FOP_NOTIFY_START: + if (waitqueue_active(&cl->wait)) + wake_up(&cl->wait); + + break; + default: + BUG_ON(0); } } @@ -1312,16 +1746,12 @@ void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb) * * @dev: mei device */ - void mei_cl_all_disconnect(struct mei_device *dev) { struct mei_cl *cl; - list_for_each_entry(cl, &dev->file_list, link) { - cl->state = MEI_FILE_DISCONNECTED; - cl->mei_flow_ctrl_creds = 0; - cl->timer_count = 0; - } + list_for_each_entry(cl, &dev->file_list, link) + mei_cl_set_disconnected(cl); } @@ -1343,6 +1773,12 @@ void mei_cl_all_wakeup(struct mei_device *dev) cl_dbg(dev, cl, "Waking up writing client!\n"); wake_up_interruptible(&cl->tx_wait); } + + /* synchronized under device mutex */ + if (waitqueue_active(&cl->ev_wait)) { + cl_dbg(dev, cl, "Waking up waiting for event clients!\n"); + wake_up_interruptible(&cl->ev_wait); + } } } diff --git a/kernel/drivers/misc/mei/client.h b/kernel/drivers/misc/mei/client.h index 0a39e5d45..04e1aa392 100644 --- a/kernel/drivers/misc/mei/client.h +++ b/kernel/drivers/misc/mei/client.h @@ -44,6 +44,42 @@ void mei_me_cl_rm_by_uuid_id(struct mei_device *dev, const uuid_le *uuid, u8 id); void mei_me_cl_rm_all(struct mei_device *dev); +/** + * mei_me_cl_is_active - check whether me client is active in the fw + * + * @me_cl: me client + * + * Return: true if the me client is active in the firmware + */ +static inline bool mei_me_cl_is_active(const struct mei_me_client *me_cl) +{ + return !list_empty_careful(&me_cl->list); +} + +/** + * mei_me_cl_uuid - return me client protocol name (uuid) + * + * @me_cl: me client + * + * Return: me client protocol name + */ +static inline const uuid_le *mei_me_cl_uuid(const struct mei_me_client *me_cl) +{ + return &me_cl->props.protocol_name; +} + +/** + * mei_me_cl_ver - return me client protocol version + * + * @me_cl: me client + * + * Return: me client protocol version + */ +static inline u8 mei_me_cl_ver(const struct mei_me_client *me_cl) +{ + return me_cl->props.protocol_version; +} + /* * MEI IO Functions */ @@ -94,18 +130,96 @@ int mei_cl_flow_ctrl_reduce(struct mei_cl *cl); /** * mei_cl_is_connected - host client is connected * - * @cl: host clinet + * @cl: host client * - * Return: true if the host clinet is connected + * Return: true if the host client is connected */ static inline bool mei_cl_is_connected(struct mei_cl *cl) { return cl->state == MEI_FILE_CONNECTED; } -bool mei_cl_is_other_connecting(struct mei_cl *cl); +/** + * mei_cl_me_id - me client id + * + * @cl: host client + * + * Return: me client id or 0 if client is not connected + */ +static inline u8 mei_cl_me_id(const struct mei_cl *cl) +{ + return cl->me_cl ? cl->me_cl->client_id : 0; +} + +/** + * mei_cl_mtu - maximal message that client can send and receive + * + * @cl: host client + * + * Return: mtu + */ +static inline size_t mei_cl_mtu(const struct mei_cl *cl) +{ + return cl->me_cl->props.max_msg_length; +} + +/** + * mei_cl_is_fixed_address - check whether the me client uses fixed address + * + * @cl: host client + * + * Return: true if the client is connected and it has fixed me address + */ +static inline bool mei_cl_is_fixed_address(const struct mei_cl *cl) +{ + return cl->me_cl && cl->me_cl->props.fixed_address; +} + +/** + * mei_cl_is_single_recv_buf- check whether the me client + * uses single receiving buffer + * + * @cl: host client + * + * Return: true if single_recv_buf == 1; 0 otherwise + */ +static inline bool mei_cl_is_single_recv_buf(const struct mei_cl *cl) +{ + return cl->me_cl->props.single_recv_buf; +} + +/** + * mei_cl_uuid - client's uuid + * + * @cl: host client + * + * Return: return uuid of connected me client + */ +static inline const uuid_le *mei_cl_uuid(const struct mei_cl *cl) +{ + return mei_me_cl_uuid(cl->me_cl); +} + +/** + * mei_cl_host_addr - client's host address + * + * @cl: host client + * + * Return: 0 for fixed address client, host address for dynamic client + */ +static inline u8 mei_cl_host_addr(const struct mei_cl *cl) +{ + return mei_cl_is_fixed_address(cl) ? 0 : cl->host_client_id; +} + int mei_cl_disconnect(struct mei_cl *cl); -int mei_cl_connect(struct mei_cl *cl, struct file *file); +void mei_cl_set_disconnected(struct mei_cl *cl); +int mei_cl_irq_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb, + struct mei_cl_cb *cmpl_list); +int mei_cl_connect(struct mei_cl *cl, struct mei_me_client *me_cl, + struct file *file); +int mei_cl_irq_connect(struct mei_cl *cl, struct mei_cl_cb *cb, + struct mei_cl_cb *cmpl_list); int mei_cl_read_start(struct mei_cl *cl, size_t length, struct file *fp); int mei_cl_irq_read_msg(struct mei_cl *cl, struct mei_msg_hdr *hdr, struct mei_cl_cb *cmpl_list); @@ -117,14 +231,20 @@ void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb); void mei_host_client_init(struct work_struct *work); - +u8 mei_cl_notify_fop2req(enum mei_cb_file_ops fop); +enum mei_cb_file_ops mei_cl_notify_req2fop(u8 request); +int mei_cl_notify_request(struct mei_cl *cl, struct file *file, u8 request); +int mei_cl_irq_notify(struct mei_cl *cl, struct mei_cl_cb *cb, + struct mei_cl_cb *cmpl_list); +int mei_cl_notify_get(struct mei_cl *cl, bool block, bool *notify_ev); +void mei_cl_notify(struct mei_cl *cl); void mei_cl_all_disconnect(struct mei_device *dev); void mei_cl_all_wakeup(struct mei_device *dev); void mei_cl_all_write_clear(struct mei_device *dev); #define MEI_CL_FMT "cl:host=%02d me=%02d " -#define MEI_CL_PRM(cl) (cl)->host_client_id, (cl)->me_client_id +#define MEI_CL_PRM(cl) (cl)->host_client_id, mei_cl_me_id(cl) #define cl_dbg(dev, cl, format, arg...) \ dev_dbg((dev)->dev, MEI_CL_FMT format, MEI_CL_PRM(cl), ##arg) diff --git a/kernel/drivers/misc/mei/debugfs.c b/kernel/drivers/misc/mei/debugfs.c index d9cd7e6ee..a138d8a27 100644 --- a/kernel/drivers/misc/mei/debugfs.c +++ b/kernel/drivers/misc/mei/debugfs.c @@ -116,7 +116,7 @@ static ssize_t mei_dbgfs_read_active(struct file *fp, char __user *ubuf, pos += scnprintf(buf + pos, bufsz - pos, "%2d|%2d|%4d|%5d|%2d|%2d|\n", - i, cl->me_client_id, cl->host_client_id, cl->state, + i, mei_cl_me_id(cl), cl->host_client_id, cl->state, !list_empty(&cl->rd_completed), cl->writing_state); i++; } @@ -149,6 +149,19 @@ static ssize_t mei_dbgfs_read_devstate(struct file *fp, char __user *ubuf, mei_dev_state_str(dev->dev_state)); pos += scnprintf(buf + pos, bufsz - pos, "hbm: %s\n", mei_hbm_state_str(dev->hbm_state)); + + if (dev->hbm_state == MEI_HBM_STARTED) { + pos += scnprintf(buf + pos, bufsz - pos, "hbm features:\n"); + pos += scnprintf(buf + pos, bufsz - pos, "\tPG: %01d\n", + dev->hbm_f_pg_supported); + pos += scnprintf(buf + pos, bufsz - pos, "\tDC: %01d\n", + dev->hbm_f_dc_supported); + pos += scnprintf(buf + pos, bufsz - pos, "\tDOT: %01d\n", + dev->hbm_f_dot_supported); + pos += scnprintf(buf + pos, bufsz - pos, "\tEV: %01d\n", + dev->hbm_f_ev_supported); + } + pos += scnprintf(buf + pos, bufsz - pos, "pg: %s, %s\n", mei_pg_is_enabled(dev) ? "ENABLED" : "DISABLED", mei_pg_state_str(mei_pg_state(dev))); @@ -191,6 +204,8 @@ int mei_dbgfs_register(struct mei_device *dev, const char *name) if (!dir) return -ENOMEM; + dev->dbgfs_dir = dir; + f = debugfs_create_file("meclients", S_IRUSR, dir, dev, &mei_dbgfs_fops_meclients); if (!f) { @@ -200,7 +215,7 @@ int mei_dbgfs_register(struct mei_device *dev, const char *name) f = debugfs_create_file("active", S_IRUSR, dir, dev, &mei_dbgfs_fops_active); if (!f) { - dev_err(dev->dev, "meclients: registration failed\n"); + dev_err(dev->dev, "active: registration failed\n"); goto err; } f = debugfs_create_file("devstate", S_IRUSR, dir, @@ -209,7 +224,12 @@ int mei_dbgfs_register(struct mei_device *dev, const char *name) dev_err(dev->dev, "devstate: registration failed\n"); goto err; } - dev->dbgfs_dir = dir; + f = debugfs_create_bool("allow_fixed_address", S_IRUSR | S_IWUSR, dir, + &dev->allow_fixed_address); + if (!f) { + dev_err(dev->dev, "allow_fixed_address: registration failed\n"); + goto err; + } return 0; err: mei_dbgfs_deregister(dev); diff --git a/kernel/drivers/misc/mei/hbm.c b/kernel/drivers/misc/mei/hbm.c index 58da92565..e7b7aad09 100644 --- a/kernel/drivers/misc/mei/hbm.c +++ b/kernel/drivers/misc/mei/hbm.c @@ -52,6 +52,7 @@ static const char *mei_cl_conn_status_str(enum mei_cl_connect_status status) MEI_CL_CS(ALREADY_STARTED); MEI_CL_CS(OUT_OF_RESOURCES); MEI_CL_CS(MESSAGE_SMALL); + MEI_CL_CS(NOT_ALLOWED); default: return "unknown"; } #undef MEI_CL_CCS @@ -89,6 +90,7 @@ static int mei_cl_conn_status_to_errno(enum mei_cl_connect_status status) case MEI_CL_CONN_ALREADY_STARTED: return -EBUSY; case MEI_CL_CONN_OUT_OF_RESOURCES: return -EBUSY; case MEI_CL_CONN_MESSAGE_SMALL: return -EINVAL; + case MEI_CL_CONN_NOT_ALLOWED: return -EBUSY; default: return -EINVAL; } } @@ -150,8 +152,8 @@ void mei_hbm_cl_hdr(struct mei_cl *cl, u8 hbm_cmd, void *buf, size_t len) memset(cmd, 0, len); cmd->hbm_cmd = hbm_cmd; - cmd->host_addr = cl->host_client_id; - cmd->me_addr = cl->me_client_id; + cmd->host_addr = mei_cl_host_addr(cl); + cmd->me_addr = mei_cl_me_id(cl); } /** @@ -188,8 +190,8 @@ int mei_hbm_cl_write(struct mei_device *dev, static inline bool mei_hbm_cl_addr_equal(struct mei_cl *cl, struct mei_hbm_cl_cmd *cmd) { - return cl->host_client_id == cmd->host_addr && - cl->me_client_id == cmd->me_addr; + return mei_cl_host_addr(cl) == cmd->host_addr && + mei_cl_me_id(cl) == cmd->me_addr; } /** @@ -279,7 +281,7 @@ int mei_hbm_start_req(struct mei_device *dev) return 0; } -/* +/** * mei_hbm_enum_clients_req - sends enumeration client request message. * * @dev: the device structure @@ -299,6 +301,7 @@ static int mei_hbm_enum_clients_req(struct mei_device *dev) enum_req = (struct hbm_host_enum_request *)dev->wr_msg.data; memset(enum_req, 0, len); enum_req->hbm_cmd = HOST_ENUM_REQ_CMD; + enum_req->allow_add = dev->hbm_f_dc_supported; ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data); if (ret) { @@ -311,7 +314,7 @@ static int mei_hbm_enum_clients_req(struct mei_device *dev) return 0; } -/* +/** * mei_hbm_me_cl_add - add new me client to the list * * @dev: the device structure @@ -344,6 +347,180 @@ static int mei_hbm_me_cl_add(struct mei_device *dev, } /** + * mei_hbm_add_cl_resp - send response to fw on client add request + * + * @dev: the device structure + * @addr: me address + * @status: response status + * + * Return: 0 on success and < 0 on failure + */ +static int mei_hbm_add_cl_resp(struct mei_device *dev, u8 addr, u8 status) +{ + struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr; + struct hbm_add_client_response *resp; + const size_t len = sizeof(struct hbm_add_client_response); + int ret; + + dev_dbg(dev->dev, "adding client response\n"); + + resp = (struct hbm_add_client_response *)dev->wr_msg.data; + + mei_hbm_hdr(mei_hdr, len); + memset(resp, 0, sizeof(struct hbm_add_client_response)); + + resp->hbm_cmd = MEI_HBM_ADD_CLIENT_RES_CMD; + resp->me_addr = addr; + resp->status = status; + + ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data); + if (ret) + dev_err(dev->dev, "add client response write failed: ret = %d\n", + ret); + return ret; +} + +/** + * mei_hbm_fw_add_cl_req - request from the fw to add a client + * + * @dev: the device structure + * @req: add client request + * + * Return: 0 on success and < 0 on failure + */ +static int mei_hbm_fw_add_cl_req(struct mei_device *dev, + struct hbm_add_client_request *req) +{ + int ret; + u8 status = MEI_HBMS_SUCCESS; + + BUILD_BUG_ON(sizeof(struct hbm_add_client_request) != + sizeof(struct hbm_props_response)); + + ret = mei_hbm_me_cl_add(dev, (struct hbm_props_response *)req); + if (ret) + status = !MEI_HBMS_SUCCESS; + + return mei_hbm_add_cl_resp(dev, req->me_addr, status); +} + +/** + * mei_hbm_cl_notify_req - send notification request + * + * @dev: the device structure + * @cl: a client to disconnect from + * @start: true for start false for stop + * + * Return: 0 on success and -EIO on write failure + */ +int mei_hbm_cl_notify_req(struct mei_device *dev, + struct mei_cl *cl, u8 start) +{ + + struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr; + struct hbm_notification_request *req; + const size_t len = sizeof(struct hbm_notification_request); + int ret; + + mei_hbm_hdr(mei_hdr, len); + mei_hbm_cl_hdr(cl, MEI_HBM_NOTIFY_REQ_CMD, dev->wr_msg.data, len); + + req = (struct hbm_notification_request *)dev->wr_msg.data; + req->start = start; + + ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data); + if (ret) + dev_err(dev->dev, "notify request failed: ret = %d\n", ret); + + return ret; +} + +/** + * notify_res_to_fop - convert notification response to the proper + * notification FOP + * + * @cmd: client notification start response command + * + * Return: MEI_FOP_NOTIFY_START or MEI_FOP_NOTIFY_STOP; + */ +static inline enum mei_cb_file_ops notify_res_to_fop(struct mei_hbm_cl_cmd *cmd) +{ + struct hbm_notification_response *rs = + (struct hbm_notification_response *)cmd; + + return mei_cl_notify_req2fop(rs->start); +} + +/** + * mei_hbm_cl_notify_start_res - update the client state according + * notify start response + * + * @dev: the device structure + * @cl: mei host client + * @cmd: client notification start response command + */ +static void mei_hbm_cl_notify_start_res(struct mei_device *dev, + struct mei_cl *cl, + struct mei_hbm_cl_cmd *cmd) +{ + struct hbm_notification_response *rs = + (struct hbm_notification_response *)cmd; + + cl_dbg(dev, cl, "hbm: notify start response status=%d\n", rs->status); + + if (rs->status == MEI_HBMS_SUCCESS || + rs->status == MEI_HBMS_ALREADY_STARTED) { + cl->notify_en = true; + cl->status = 0; + } else { + cl->status = -EINVAL; + } +} + +/** + * mei_hbm_cl_notify_stop_res - update the client state according + * notify stop response + * + * @dev: the device structure + * @cl: mei host client + * @cmd: client notification stop response command + */ +static void mei_hbm_cl_notify_stop_res(struct mei_device *dev, + struct mei_cl *cl, + struct mei_hbm_cl_cmd *cmd) +{ + struct hbm_notification_response *rs = + (struct hbm_notification_response *)cmd; + + cl_dbg(dev, cl, "hbm: notify stop response status=%d\n", rs->status); + + if (rs->status == MEI_HBMS_SUCCESS || + rs->status == MEI_HBMS_NOT_STARTED) { + cl->notify_en = false; + cl->status = 0; + } else { + /* TODO: spec is not clear yet about other possible issues */ + cl->status = -EINVAL; + } +} + +/** + * mei_hbm_cl_notify - signal notification event + * + * @dev: the device structure + * @cmd: notification client message + */ +static void mei_hbm_cl_notify(struct mei_device *dev, + struct mei_hbm_cl_cmd *cmd) +{ + struct mei_cl *cl; + + cl = mei_hbm_cl_find_by_cmd(dev, cmd); + if (cl) + mei_cl_notify(cl); +} + +/** * mei_hbm_prop_req - request property for a single client * * @dev: the device structure @@ -392,7 +569,7 @@ static int mei_hbm_prop_req(struct mei_device *dev) return 0; } -/* +/** * mei_hbm_pg - sends pg command * * @dev: the device structure @@ -572,7 +749,7 @@ static void mei_hbm_cl_disconnect_res(struct mei_device *dev, struct mei_cl *cl, cl_dbg(dev, cl, "hbm: disconnect response status=%d\n", rs->status); if (rs->status == MEI_CL_DISCONN_SUCCESS) - cl->state = MEI_FILE_DISCONNECTED; + cl->state = MEI_FILE_DISCONNECT_REPLY; cl->status = 0; } @@ -610,8 +787,11 @@ static void mei_hbm_cl_connect_res(struct mei_device *dev, struct mei_cl *cl, if (rs->status == MEI_CL_CONN_SUCCESS) cl->state = MEI_FILE_CONNECTED; - else - cl->state = MEI_FILE_DISCONNECTED; + else { + cl->state = MEI_FILE_DISCONNECT_REPLY; + if (rs->status == MEI_CL_CONN_NOT_FOUND) + mei_me_cl_del(dev, cl->me_cl); + } cl->status = mei_cl_conn_status_to_errno(rs->status); } @@ -654,6 +834,12 @@ static void mei_hbm_cl_res(struct mei_device *dev, case MEI_FOP_DISCONNECT: mei_hbm_cl_disconnect_res(dev, cl, rs); break; + case MEI_FOP_NOTIFY_START: + mei_hbm_cl_notify_start_res(dev, cl, rs); + break; + case MEI_FOP_NOTIFY_STOP: + mei_hbm_cl_notify_stop_res(dev, cl, rs); + break; default: return; } @@ -680,8 +866,8 @@ static int mei_hbm_fw_disconnect_req(struct mei_device *dev, cl = mei_hbm_cl_find_by_cmd(dev, disconnect_req); if (cl) { - cl_dbg(dev, cl, "disconnect request received\n"); - cl->state = MEI_FILE_DISCONNECTED; + cl_dbg(dev, cl, "fw disconnect request received\n"); + cl->state = MEI_FILE_DISCONNECTING; cl->timer_count = 0; cb = mei_io_cb_init(cl, MEI_FOP_DISCONNECT_RSP, NULL); @@ -694,6 +880,79 @@ static int mei_hbm_fw_disconnect_req(struct mei_device *dev, } /** + * mei_hbm_pg_enter_res - PG enter response received + * + * @dev: the device structure. + * + * Return: 0 on success, -EPROTO on state mismatch + */ +static int mei_hbm_pg_enter_res(struct mei_device *dev) +{ + if (mei_pg_state(dev) != MEI_PG_OFF || + dev->pg_event != MEI_PG_EVENT_WAIT) { + dev_err(dev->dev, "hbm: pg entry response: state mismatch [%s, %d]\n", + mei_pg_state_str(mei_pg_state(dev)), dev->pg_event); + return -EPROTO; + } + + dev->pg_event = MEI_PG_EVENT_RECEIVED; + wake_up(&dev->wait_pg); + + return 0; +} + +/** + * mei_hbm_pg_resume - process with PG resume + * + * @dev: the device structure. + */ +void mei_hbm_pg_resume(struct mei_device *dev) +{ + pm_request_resume(dev->dev); +} +EXPORT_SYMBOL_GPL(mei_hbm_pg_resume); + +/** + * mei_hbm_pg_exit_res - PG exit response received + * + * @dev: the device structure. + * + * Return: 0 on success, -EPROTO on state mismatch + */ +static int mei_hbm_pg_exit_res(struct mei_device *dev) +{ + if (mei_pg_state(dev) != MEI_PG_ON || + (dev->pg_event != MEI_PG_EVENT_WAIT && + dev->pg_event != MEI_PG_EVENT_IDLE)) { + dev_err(dev->dev, "hbm: pg exit response: state mismatch [%s, %d]\n", + mei_pg_state_str(mei_pg_state(dev)), dev->pg_event); + return -EPROTO; + } + + switch (dev->pg_event) { + case MEI_PG_EVENT_WAIT: + dev->pg_event = MEI_PG_EVENT_RECEIVED; + wake_up(&dev->wait_pg); + break; + case MEI_PG_EVENT_IDLE: + /* + * If the driver is not waiting on this then + * this is HW initiated exit from PG. + * Start runtime pm resume sequence to exit from PG. + */ + dev->pg_event = MEI_PG_EVENT_RECEIVED; + mei_hbm_pg_resume(dev); + break; + default: + WARN(1, "hbm: pg exit response: unexpected pg event = %d\n", + dev->pg_event); + return -EPROTO; + } + + return 0; +} + +/** * mei_hbm_config_features - check what hbm features and commands * are supported by the fw * @@ -709,6 +968,17 @@ static void mei_hbm_config_features(struct mei_device *dev) if (dev->version.major_version == HBM_MAJOR_VERSION_PGI && dev->version.minor_version >= HBM_MINOR_VERSION_PGI) dev->hbm_f_pg_supported = 1; + + if (dev->version.major_version >= HBM_MAJOR_VERSION_DC) + dev->hbm_f_dc_supported = 1; + + /* disconnect on connect timeout instead of link reset */ + if (dev->version.major_version >= HBM_MAJOR_VERSION_DOT) + dev->hbm_f_dot_supported = 1; + + /* Notification Event Support */ + if (dev->version.major_version >= HBM_MAJOR_VERSION_EV) + dev->hbm_f_ev_supported = 1; } /** @@ -740,6 +1010,8 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr) struct hbm_host_version_response *version_res; struct hbm_props_response *props_res; struct hbm_host_enum_response *enum_res; + struct hbm_add_client_request *add_cl_req; + int ret; struct mei_hbm_cl_cmd *cl_cmd; struct hbm_client_connect_request *disconnect_req; @@ -828,24 +1100,17 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr) break; case MEI_PG_ISOLATION_ENTRY_RES_CMD: - dev_dbg(dev->dev, "power gate isolation entry response received\n"); - dev->pg_event = MEI_PG_EVENT_RECEIVED; - if (waitqueue_active(&dev->wait_pg)) - wake_up(&dev->wait_pg); + dev_dbg(dev->dev, "hbm: power gate isolation entry response received\n"); + ret = mei_hbm_pg_enter_res(dev); + if (ret) + return ret; break; case MEI_PG_ISOLATION_EXIT_REQ_CMD: - dev_dbg(dev->dev, "power gate isolation exit request received\n"); - dev->pg_event = MEI_PG_EVENT_RECEIVED; - if (waitqueue_active(&dev->wait_pg)) - wake_up(&dev->wait_pg); - else - /* - * If the driver is not waiting on this then - * this is HW initiated exit from PG. - * Start runtime pm resume sequence to exit from PG. - */ - pm_request_resume(dev->dev); + dev_dbg(dev->dev, "hbm: power gate isolation exit request received\n"); + ret = mei_hbm_pg_exit_res(dev); + if (ret) + return ret; break; case HOST_CLIENT_PROPERTIES_RES_CMD: @@ -937,6 +1202,39 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr) return -EIO; } break; + + case MEI_HBM_ADD_CLIENT_REQ_CMD: + dev_dbg(dev->dev, "hbm: add client request received\n"); + /* + * after the host receives the enum_resp + * message clients may be added or removed + */ + if (dev->hbm_state <= MEI_HBM_ENUM_CLIENTS || + dev->hbm_state >= MEI_HBM_STOPPED) { + dev_err(dev->dev, "hbm: add client: state mismatch, [%d, %d]\n", + dev->dev_state, dev->hbm_state); + return -EPROTO; + } + add_cl_req = (struct hbm_add_client_request *)mei_msg; + ret = mei_hbm_fw_add_cl_req(dev, add_cl_req); + if (ret) { + dev_err(dev->dev, "hbm: add client: failed to send response %d\n", + ret); + return -EIO; + } + dev_dbg(dev->dev, "hbm: add client request processed\n"); + break; + + case MEI_HBM_NOTIFY_RES_CMD: + dev_dbg(dev->dev, "hbm: notify response received\n"); + mei_hbm_cl_res(dev, cl_cmd, notify_res_to_fop(cl_cmd)); + break; + + case MEI_HBM_NOTIFICATION_CMD: + dev_dbg(dev->dev, "hbm: notification\n"); + mei_hbm_cl_notify(dev, cl_cmd); + break; + default: BUG(); break; diff --git a/kernel/drivers/misc/mei/hbm.h b/kernel/drivers/misc/mei/hbm.h index 2544db7d1..a2025a508 100644 --- a/kernel/drivers/misc/mei/hbm.h +++ b/kernel/drivers/misc/mei/hbm.h @@ -54,6 +54,9 @@ int mei_hbm_cl_disconnect_rsp(struct mei_device *dev, struct mei_cl *cl); int mei_hbm_cl_connect_req(struct mei_device *dev, struct mei_cl *cl); bool mei_hbm_version_is_supported(struct mei_device *dev); int mei_hbm_pg(struct mei_device *dev, u8 pg_cmd); +void mei_hbm_pg_resume(struct mei_device *dev); +int mei_hbm_cl_notify_req(struct mei_device *dev, + struct mei_cl *cl, u8 request); #endif /* _MEI_HBM_H_ */ diff --git a/kernel/drivers/misc/mei/hw-me-regs.h b/kernel/drivers/misc/mei/hw-me-regs.h index 9eb7ed70a..a8a68acd3 100644 --- a/kernel/drivers/misc/mei/hw-me-regs.h +++ b/kernel/drivers/misc/mei/hw-me-regs.h @@ -117,12 +117,17 @@ #define MEI_DEV_ID_WPT_LP 0x9CBA /* Wildcat Point LP */ #define MEI_DEV_ID_WPT_LP_2 0x9CBB /* Wildcat Point LP 2 */ +#define MEI_DEV_ID_SPT 0x9D3A /* Sunrise Point */ +#define MEI_DEV_ID_SPT_2 0x9D3B /* Sunrise Point 2 */ +#define MEI_DEV_ID_SPT_H 0xA13A /* Sunrise Point H */ +#define MEI_DEV_ID_SPT_H_2 0xA13B /* Sunrise Point H 2 */ /* * MEI HW Section */ /* Host Firmware Status Registers in PCI Config Space */ #define PCI_CFG_HFS_1 0x40 +# define PCI_CFG_HFS_1_D0I3_MSK 0x80000000 #define PCI_CFG_HFS_2 0x48 #define PCI_CFG_HFS_3 0x60 #define PCI_CFG_HFS_4 0x64 @@ -140,7 +145,8 @@ #define ME_CSR_HA 0xC /* H_HGC_CSR - PGI register */ #define H_HPG_CSR 0x10 - +/* H_D0I3C - D0I3 Control */ +#define H_D0I3C 0x800 /* register bits of H_CSR (Host Control Status register) */ /* Host Circular Buffer Depth - maximum number of 32-bit entries in CB */ @@ -159,7 +165,14 @@ #define H_IS 0x00000002 /* Host Interrupt Enable */ #define H_IE 0x00000001 +/* Host D0I3 Interrupt Enable */ +#define H_D0I3C_IE 0x00000020 +/* Host D0I3 Interrupt Status */ +#define H_D0I3C_IS 0x00000040 +/* H_CSR masks */ +#define H_CSR_IE_MASK (H_IE | H_D0I3C_IE) +#define H_CSR_IS_MASK (H_IS | H_D0I3C_IS) /* register bits of ME_CSR_HA (ME Control Status Host Access register) */ /* ME CB (Circular Buffer) Depth HRA (Host Read Access) - host read only @@ -183,8 +196,14 @@ access to ME_CBD */ #define ME_IE_HRA 0x00000001 -/* register bits - H_HPG_CSR */ -#define H_HPG_CSR_PGIHEXR 0x00000001 -#define H_HPG_CSR_PGI 0x00000002 +/* H_HPG_CSR register bits */ +#define H_HPG_CSR_PGIHEXR 0x00000001 +#define H_HPG_CSR_PGI 0x00000002 + +/* H_D0I3C register bits */ +#define H_D0I3C_CIP 0x00000001 +#define H_D0I3C_IR 0x00000002 +#define H_D0I3C_I3 0x00000004 +#define H_D0I3C_RR 0x00000008 #endif /* _MEI_HW_MEI_REGS_H_ */ diff --git a/kernel/drivers/misc/mei/hw-me.c b/kernel/drivers/misc/mei/hw-me.c index 43d7101ff..25b1997a6 100644 --- a/kernel/drivers/misc/mei/hw-me.c +++ b/kernel/drivers/misc/mei/hw-me.c @@ -134,11 +134,40 @@ static inline void mei_hcsr_write(struct mei_device *dev, u32 reg) */ static inline void mei_hcsr_set(struct mei_device *dev, u32 reg) { - reg &= ~H_IS; + reg &= ~H_CSR_IS_MASK; mei_hcsr_write(dev, reg); } /** + * mei_me_d0i3c_read - Reads 32bit data from the D0I3C register + * + * @dev: the device structure + * + * Return: H_D0I3C register value (u32) + */ +static inline u32 mei_me_d0i3c_read(const struct mei_device *dev) +{ + u32 reg; + + reg = mei_me_reg_read(to_me_hw(dev), H_D0I3C); + trace_mei_reg_read(dev->dev, "H_D0I3C", H_D0I3C, reg); + + return reg; +} + +/** + * mei_me_d0i3c_write - writes H_D0I3C register to device + * + * @dev: the device structure + * @reg: new register value + */ +static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg) +{ + trace_mei_reg_write(dev->dev, "H_D0I3C", H_D0I3C, reg); + mei_me_reg_write(to_me_hw(dev), H_D0I3C, reg); +} + +/** * mei_me_fw_status - read fw status register from pci config space * * @dev: mei device @@ -176,12 +205,25 @@ static int mei_me_fw_status(struct mei_device *dev, */ static void mei_me_hw_config(struct mei_device *dev) { + struct pci_dev *pdev = to_pci_dev(dev->dev); struct mei_me_hw *hw = to_me_hw(dev); - u32 hcsr = mei_hcsr_read(dev); + u32 hcsr, reg; + /* Doesn't change in runtime */ + hcsr = mei_hcsr_read(dev); dev->hbuf_depth = (hcsr & H_CBD) >> 24; + reg = 0; + pci_read_config_dword(pdev, PCI_CFG_HFS_1, ®); + hw->d0i3_supported = + ((reg & PCI_CFG_HFS_1_D0I3_MSK) == PCI_CFG_HFS_1_D0I3_MSK); + hw->pg_state = MEI_PG_OFF; + if (hw->d0i3_supported) { + reg = mei_me_d0i3c_read(dev); + if (reg & H_D0I3C_I3) + hw->pg_state = MEI_PG_ON; + } } /** @@ -208,7 +250,7 @@ static void mei_me_intr_clear(struct mei_device *dev) { u32 hcsr = mei_hcsr_read(dev); - if ((hcsr & H_IS) == H_IS) + if (hcsr & H_CSR_IS_MASK) mei_hcsr_write(dev, hcsr); } /** @@ -220,7 +262,7 @@ static void mei_me_intr_enable(struct mei_device *dev) { u32 hcsr = mei_hcsr_read(dev); - hcsr |= H_IE; + hcsr |= H_CSR_IE_MASK; mei_hcsr_set(dev, hcsr); } @@ -233,7 +275,7 @@ static void mei_me_intr_disable(struct mei_device *dev) { u32 hcsr = mei_hcsr_read(dev); - hcsr &= ~H_IE; + hcsr &= ~H_CSR_IE_MASK; mei_hcsr_set(dev, hcsr); } @@ -253,57 +295,6 @@ static void mei_me_hw_reset_release(struct mei_device *dev) /* complete this write before we set host ready on another CPU */ mmiowb(); } -/** - * mei_me_hw_reset - resets fw via mei csr register. - * - * @dev: the device structure - * @intr_enable: if interrupt should be enabled after reset. - * - * Return: always 0 - */ -static int mei_me_hw_reset(struct mei_device *dev, bool intr_enable) -{ - u32 hcsr = mei_hcsr_read(dev); - - /* H_RST may be found lit before reset is started, - * for example if preceding reset flow hasn't completed. - * In that case asserting H_RST will be ignored, therefore - * we need to clean H_RST bit to start a successful reset sequence. - */ - if ((hcsr & H_RST) == H_RST) { - dev_warn(dev->dev, "H_RST is set = 0x%08X", hcsr); - hcsr &= ~H_RST; - mei_hcsr_set(dev, hcsr); - hcsr = mei_hcsr_read(dev); - } - - hcsr |= H_RST | H_IG | H_IS; - - if (intr_enable) - hcsr |= H_IE; - else - hcsr &= ~H_IE; - - dev->recvd_hw_ready = false; - mei_hcsr_write(dev, hcsr); - - /* - * Host reads the H_CSR once to ensure that the - * posted write to H_CSR completes. - */ - hcsr = mei_hcsr_read(dev); - - if ((hcsr & H_RST) == 0) - dev_warn(dev->dev, "H_RST is not set = 0x%08X", hcsr); - - if ((hcsr & H_RDY) == H_RDY) - dev_warn(dev->dev, "H_RDY is not cleared 0x%08X", hcsr); - - if (intr_enable == false) - mei_me_hw_reset_release(dev); - - return 0; -} /** * mei_me_host_set_ready - enable device @@ -314,7 +305,7 @@ static void mei_me_host_set_ready(struct mei_device *dev) { u32 hcsr = mei_hcsr_read(dev); - hcsr |= H_IE | H_IG | H_RDY; + hcsr |= H_CSR_IE_MASK | H_IG | H_RDY; mei_hcsr_set(dev, hcsr); } @@ -601,13 +592,13 @@ static void mei_me_pg_unset(struct mei_device *dev) } /** - * mei_me_pg_enter_sync - perform pg entry procedure + * mei_me_pg_legacy_enter_sync - perform legacy pg entry procedure * * @dev: the device structure * * Return: 0 on success an error code otherwise */ -int mei_me_pg_enter_sync(struct mei_device *dev) +static int mei_me_pg_legacy_enter_sync(struct mei_device *dev) { struct mei_me_hw *hw = to_me_hw(dev); unsigned long timeout = mei_secs_to_jiffies(MEI_PGI_TIMEOUT); @@ -638,13 +629,13 @@ int mei_me_pg_enter_sync(struct mei_device *dev) } /** - * mei_me_pg_exit_sync - perform pg exit procedure + * mei_me_pg_legacy_exit_sync - perform legacy pg exit procedure * * @dev: the device structure * * Return: 0 on success an error code otherwise */ -int mei_me_pg_exit_sync(struct mei_device *dev) +static int mei_me_pg_legacy_exit_sync(struct mei_device *dev) { struct mei_me_hw *hw = to_me_hw(dev); unsigned long timeout = mei_secs_to_jiffies(MEI_PGI_TIMEOUT); @@ -712,8 +703,12 @@ static bool mei_me_pg_in_transition(struct mei_device *dev) */ static bool mei_me_pg_is_enabled(struct mei_device *dev) { + struct mei_me_hw *hw = to_me_hw(dev); u32 reg = mei_me_mecsr_read(dev); + if (hw->d0i3_supported) + return true; + if ((reg & ME_PGIC_HRA) == 0) goto notsupported; @@ -723,7 +718,8 @@ static bool mei_me_pg_is_enabled(struct mei_device *dev) return true; notsupported: - dev_dbg(dev->dev, "pg: not supported: HGP = %d hbm version %d.%d ?= %d.%d\n", + dev_dbg(dev->dev, "pg: not supported: d0i3 = %d HGP = %d hbm version %d.%d ?= %d.%d\n", + hw->d0i3_supported, !!(reg & ME_PGIC_HRA), dev->version.major_version, dev->version.minor_version, @@ -734,11 +730,211 @@ notsupported: } /** - * mei_me_pg_intr - perform pg processing in interrupt thread handler + * mei_me_d0i3_set - write d0i3 register bit on mei device. * * @dev: the device structure + * @intr: ask for interrupt + * + * Return: D0I3C register value */ -static void mei_me_pg_intr(struct mei_device *dev) +static u32 mei_me_d0i3_set(struct mei_device *dev, bool intr) +{ + u32 reg = mei_me_d0i3c_read(dev); + + reg |= H_D0I3C_I3; + if (intr) + reg |= H_D0I3C_IR; + else + reg &= ~H_D0I3C_IR; + mei_me_d0i3c_write(dev, reg); + /* read it to ensure HW consistency */ + reg = mei_me_d0i3c_read(dev); + return reg; +} + +/** + * mei_me_d0i3_unset - clean d0i3 register bit on mei device. + * + * @dev: the device structure + * + * Return: D0I3C register value + */ +static u32 mei_me_d0i3_unset(struct mei_device *dev) +{ + u32 reg = mei_me_d0i3c_read(dev); + + reg &= ~H_D0I3C_I3; + reg |= H_D0I3C_IR; + mei_me_d0i3c_write(dev, reg); + /* read it to ensure HW consistency */ + reg = mei_me_d0i3c_read(dev); + return reg; +} + +/** + * mei_me_d0i3_enter_sync - perform d0i3 entry procedure + * + * @dev: the device structure + * + * Return: 0 on success an error code otherwise + */ +static int mei_me_d0i3_enter_sync(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + unsigned long d0i3_timeout = mei_secs_to_jiffies(MEI_D0I3_TIMEOUT); + unsigned long pgi_timeout = mei_secs_to_jiffies(MEI_PGI_TIMEOUT); + int ret; + u32 reg; + + reg = mei_me_d0i3c_read(dev); + if (reg & H_D0I3C_I3) { + /* we are in d0i3, nothing to do */ + dev_dbg(dev->dev, "d0i3 set not needed\n"); + ret = 0; + goto on; + } + + /* PGI entry procedure */ + dev->pg_event = MEI_PG_EVENT_WAIT; + + ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_ENTRY_REQ_CMD); + if (ret) + /* FIXME: should we reset here? */ + goto out; + + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_pg, + dev->pg_event == MEI_PG_EVENT_RECEIVED, pgi_timeout); + mutex_lock(&dev->device_lock); + + if (dev->pg_event != MEI_PG_EVENT_RECEIVED) { + ret = -ETIME; + goto out; + } + /* end PGI entry procedure */ + + dev->pg_event = MEI_PG_EVENT_INTR_WAIT; + + reg = mei_me_d0i3_set(dev, true); + if (!(reg & H_D0I3C_CIP)) { + dev_dbg(dev->dev, "d0i3 enter wait not needed\n"); + ret = 0; + goto on; + } + + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_pg, + dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, d0i3_timeout); + mutex_lock(&dev->device_lock); + + if (dev->pg_event != MEI_PG_EVENT_INTR_RECEIVED) { + reg = mei_me_d0i3c_read(dev); + if (!(reg & H_D0I3C_I3)) { + ret = -ETIME; + goto out; + } + } + + ret = 0; +on: + hw->pg_state = MEI_PG_ON; +out: + dev->pg_event = MEI_PG_EVENT_IDLE; + dev_dbg(dev->dev, "d0i3 enter ret = %d\n", ret); + return ret; +} + +/** + * mei_me_d0i3_enter - perform d0i3 entry procedure + * no hbm PG handshake + * no waiting for confirmation; runs with interrupts + * disabled + * + * @dev: the device structure + * + * Return: 0 on success an error code otherwise + */ +static int mei_me_d0i3_enter(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + u32 reg; + + reg = mei_me_d0i3c_read(dev); + if (reg & H_D0I3C_I3) { + /* we are in d0i3, nothing to do */ + dev_dbg(dev->dev, "already d0i3 : set not needed\n"); + goto on; + } + + mei_me_d0i3_set(dev, false); +on: + hw->pg_state = MEI_PG_ON; + dev->pg_event = MEI_PG_EVENT_IDLE; + dev_dbg(dev->dev, "d0i3 enter\n"); + return 0; +} + +/** + * mei_me_d0i3_exit_sync - perform d0i3 exit procedure + * + * @dev: the device structure + * + * Return: 0 on success an error code otherwise + */ +static int mei_me_d0i3_exit_sync(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + unsigned long timeout = mei_secs_to_jiffies(MEI_D0I3_TIMEOUT); + int ret; + u32 reg; + + dev->pg_event = MEI_PG_EVENT_INTR_WAIT; + + reg = mei_me_d0i3c_read(dev); + if (!(reg & H_D0I3C_I3)) { + /* we are not in d0i3, nothing to do */ + dev_dbg(dev->dev, "d0i3 exit not needed\n"); + ret = 0; + goto off; + } + + reg = mei_me_d0i3_unset(dev); + if (!(reg & H_D0I3C_CIP)) { + dev_dbg(dev->dev, "d0i3 exit wait not needed\n"); + ret = 0; + goto off; + } + + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_pg, + dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, timeout); + mutex_lock(&dev->device_lock); + + if (dev->pg_event != MEI_PG_EVENT_INTR_RECEIVED) { + reg = mei_me_d0i3c_read(dev); + if (reg & H_D0I3C_I3) { + ret = -ETIME; + goto out; + } + } + + ret = 0; +off: + hw->pg_state = MEI_PG_OFF; +out: + dev->pg_event = MEI_PG_EVENT_IDLE; + + dev_dbg(dev->dev, "d0i3 exit ret = %d\n", ret); + return ret; +} + +/** + * mei_me_pg_legacy_intr - perform legacy pg processing + * in interrupt thread handler + * + * @dev: the device structure + */ +static void mei_me_pg_legacy_intr(struct mei_device *dev) { struct mei_me_hw *hw = to_me_hw(dev); @@ -752,6 +948,162 @@ static void mei_me_pg_intr(struct mei_device *dev) } /** + * mei_me_d0i3_intr - perform d0i3 processing in interrupt thread handler + * + * @dev: the device structure + */ +static void mei_me_d0i3_intr(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + if (dev->pg_event == MEI_PG_EVENT_INTR_WAIT && + (hw->intr_source & H_D0I3C_IS)) { + dev->pg_event = MEI_PG_EVENT_INTR_RECEIVED; + if (hw->pg_state == MEI_PG_ON) { + hw->pg_state = MEI_PG_OFF; + if (dev->hbm_state != MEI_HBM_IDLE) { + /* + * force H_RDY because it could be + * wiped off during PG + */ + dev_dbg(dev->dev, "d0i3 set host ready\n"); + mei_me_host_set_ready(dev); + } + } else { + hw->pg_state = MEI_PG_ON; + } + + wake_up(&dev->wait_pg); + } + + if (hw->pg_state == MEI_PG_ON && (hw->intr_source & H_IS)) { + /* + * HW sent some data and we are in D0i3, so + * we got here because of HW initiated exit from D0i3. + * Start runtime pm resume sequence to exit low power state. + */ + dev_dbg(dev->dev, "d0i3 want resume\n"); + mei_hbm_pg_resume(dev); + } +} + +/** + * mei_me_pg_intr - perform pg processing in interrupt thread handler + * + * @dev: the device structure + */ +static void mei_me_pg_intr(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + if (hw->d0i3_supported) + mei_me_d0i3_intr(dev); + else + mei_me_pg_legacy_intr(dev); +} + +/** + * mei_me_pg_enter_sync - perform runtime pm entry procedure + * + * @dev: the device structure + * + * Return: 0 on success an error code otherwise + */ +int mei_me_pg_enter_sync(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + if (hw->d0i3_supported) + return mei_me_d0i3_enter_sync(dev); + else + return mei_me_pg_legacy_enter_sync(dev); +} + +/** + * mei_me_pg_exit_sync - perform runtime pm exit procedure + * + * @dev: the device structure + * + * Return: 0 on success an error code otherwise + */ +int mei_me_pg_exit_sync(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + if (hw->d0i3_supported) + return mei_me_d0i3_exit_sync(dev); + else + return mei_me_pg_legacy_exit_sync(dev); +} + +/** + * mei_me_hw_reset - resets fw via mei csr register. + * + * @dev: the device structure + * @intr_enable: if interrupt should be enabled after reset. + * + * Return: 0 on success an error code otherwise + */ +static int mei_me_hw_reset(struct mei_device *dev, bool intr_enable) +{ + struct mei_me_hw *hw = to_me_hw(dev); + int ret; + u32 hcsr; + + if (intr_enable) { + mei_me_intr_enable(dev); + if (hw->d0i3_supported) { + ret = mei_me_d0i3_exit_sync(dev); + if (ret) + return ret; + } + } + + hcsr = mei_hcsr_read(dev); + /* H_RST may be found lit before reset is started, + * for example if preceding reset flow hasn't completed. + * In that case asserting H_RST will be ignored, therefore + * we need to clean H_RST bit to start a successful reset sequence. + */ + if ((hcsr & H_RST) == H_RST) { + dev_warn(dev->dev, "H_RST is set = 0x%08X", hcsr); + hcsr &= ~H_RST; + mei_hcsr_set(dev, hcsr); + hcsr = mei_hcsr_read(dev); + } + + hcsr |= H_RST | H_IG | H_CSR_IS_MASK; + + if (!intr_enable) + hcsr &= ~H_CSR_IE_MASK; + + dev->recvd_hw_ready = false; + mei_hcsr_write(dev, hcsr); + + /* + * Host reads the H_CSR once to ensure that the + * posted write to H_CSR completes. + */ + hcsr = mei_hcsr_read(dev); + + if ((hcsr & H_RST) == 0) + dev_warn(dev->dev, "H_RST is not set = 0x%08X", hcsr); + + if ((hcsr & H_RDY) == H_RDY) + dev_warn(dev->dev, "H_RDY is not cleared 0x%08X", hcsr); + + if (!intr_enable) { + mei_me_hw_reset_release(dev); + if (hw->d0i3_supported) { + ret = mei_me_d0i3_enter(dev); + if (ret) + return ret; + } + } + return 0; +} + +/** * mei_me_irq_quick_handler - The ISR of the MEI device * * @irq: The irq number @@ -759,16 +1111,20 @@ static void mei_me_pg_intr(struct mei_device *dev) * * Return: irqreturn_t */ - irqreturn_t mei_me_irq_quick_handler(int irq, void *dev_id) { - struct mei_device *dev = (struct mei_device *) dev_id; - u32 hcsr = mei_hcsr_read(dev); + struct mei_device *dev = (struct mei_device *)dev_id; + struct mei_me_hw *hw = to_me_hw(dev); + u32 hcsr; - if ((hcsr & H_IS) != H_IS) + hcsr = mei_hcsr_read(dev); + if (!(hcsr & H_CSR_IS_MASK)) return IRQ_NONE; - /* clear H_IS bit in H_CSR */ + hw->intr_source = hcsr & H_CSR_IS_MASK; + dev_dbg(dev->dev, "interrupt source 0x%08X.\n", hw->intr_source); + + /* clear H_IS and H_D0I3C_IS bits in H_CSR to clear the interrupts */ mei_hcsr_write(dev, hcsr); return IRQ_WAKE_THREAD; @@ -796,11 +1152,6 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) mutex_lock(&dev->device_lock); mei_io_list_init(&complete_list); - /* Ack the interrupt here - * In case of MSI we don't go through the quick handler */ - if (pci_dev_msi_enabled(to_pci_dev(dev->dev))) - mei_clear_interrupts(dev); - /* check if ME wants a reset */ if (!mei_hw_is_ready(dev) && dev->dev_state != MEI_DEV_RESETTING) { dev_warn(dev->dev, "FW not ready: resetting.\n"); diff --git a/kernel/drivers/misc/mei/hw-me.h b/kernel/drivers/misc/mei/hw-me.h index 6022d52af..2ee14dc1b 100644 --- a/kernel/drivers/misc/mei/hw-me.h +++ b/kernel/drivers/misc/mei/hw-me.h @@ -50,13 +50,17 @@ struct mei_cfg { * struct mei_me_hw - me hw specific data * * @cfg: per device generation config and ops - * @mem_addr: io memory address - * @pg_state: power gating state + * @mem_addr: io memory address + * @intr_source: interrupt source + * @pg_state: power gating state + * @d0i3_supported: di03 support */ struct mei_me_hw { const struct mei_cfg *cfg; void __iomem *mem_addr; + u32 intr_source; enum mei_pg_state pg_state; + bool d0i3_supported; }; #define to_me_hw(dev) (struct mei_me_hw *)((dev)->hw) diff --git a/kernel/drivers/misc/mei/hw.h b/kernel/drivers/misc/mei/hw.h index 16fef6dc4..4cebde859 100644 --- a/kernel/drivers/misc/mei/hw.h +++ b/kernel/drivers/misc/mei/hw.h @@ -31,14 +31,15 @@ #define MEI_IAMTHIF_STALL_TIMER 12 /* HPS */ #define MEI_IAMTHIF_READ_TIMER 10 /* HPS */ -#define MEI_PGI_TIMEOUT 1 /* PG Isolation time response 1 sec */ -#define MEI_HBM_TIMEOUT 1 /* 1 second */ +#define MEI_PGI_TIMEOUT 1 /* PG Isolation time response 1 sec */ +#define MEI_D0I3_TIMEOUT 5 /* D0i3 set/unset max response time */ +#define MEI_HBM_TIMEOUT 1 /* 1 second */ /* * MEI Version */ -#define HBM_MINOR_VERSION 1 -#define HBM_MAJOR_VERSION 1 +#define HBM_MINOR_VERSION 0 +#define HBM_MAJOR_VERSION 2 /* * MEI version with PGI support @@ -46,6 +47,24 @@ #define HBM_MINOR_VERSION_PGI 1 #define HBM_MAJOR_VERSION_PGI 1 +/* + * MEI version with Dynamic clients support + */ +#define HBM_MINOR_VERSION_DC 0 +#define HBM_MAJOR_VERSION_DC 2 + +/* + * MEI version with disconnect on connection timeout support + */ +#define HBM_MINOR_VERSION_DOT 0 +#define HBM_MAJOR_VERSION_DOT 2 + +/* + * MEI version with notifcation support + */ +#define HBM_MINOR_VERSION_EV 0 +#define HBM_MAJOR_VERSION_EV 2 + /* Host bus message command opcode */ #define MEI_HBM_CMD_OP_MSK 0x7f /* Host bus message command RESPONSE */ @@ -81,6 +100,13 @@ #define MEI_PG_ISOLATION_EXIT_REQ_CMD 0x0b #define MEI_PG_ISOLATION_EXIT_RES_CMD 0x8b +#define MEI_HBM_ADD_CLIENT_REQ_CMD 0x0f +#define MEI_HBM_ADD_CLIENT_RES_CMD 0x8f + +#define MEI_HBM_NOTIFY_REQ_CMD 0x10 +#define MEI_HBM_NOTIFY_RES_CMD 0x90 +#define MEI_HBM_NOTIFICATION_CMD 0x11 + /* * MEI Stop Reason * used by hbm_host_stop_request.reason @@ -136,6 +162,7 @@ enum mei_cl_connect_status { MEI_CL_CONN_ALREADY_STARTED = MEI_HBMS_ALREADY_EXISTS, MEI_CL_CONN_OUT_OF_RESOURCES = MEI_HBMS_REJECTED, MEI_CL_CONN_MESSAGE_SMALL = MEI_HBMS_INVALID_PARAMETER, + MEI_CL_CONN_NOT_ALLOWED = MEI_HBMS_NOT_ALLOWED, }; /* @@ -213,9 +240,17 @@ struct hbm_me_stop_request { u8 reserved[2]; } __packed; +/** + * struct hbm_host_enum_request - enumeration request from host to fw + * + * @hbm_cmd: bus message command header + * @allow_add: allow dynamic clients add HBM version >= 2.0 + * @reserved: reserved + */ struct hbm_host_enum_request { u8 hbm_cmd; - u8 reserved[3]; + u8 allow_add; + u8 reserved[2]; } __packed; struct hbm_host_enum_response { @@ -248,6 +283,38 @@ struct hbm_props_response { } __packed; /** + * struct hbm_add_client_request - request to add a client + * might be sent by fw after enumeration has already completed + * + * @hbm_cmd: bus message command header + * @me_addr: address of the client in ME + * @reserved: reserved + * @client_properties: client properties + */ +struct hbm_add_client_request { + u8 hbm_cmd; + u8 me_addr; + u8 reserved[2]; + struct mei_client_properties client_properties; +} __packed; + +/** + * struct hbm_add_client_response - response to add a client + * sent by the host to report client addition status to fw + * + * @hbm_cmd: bus message command header + * @me_addr: address of the client in ME + * @status: if HBMS_SUCCESS then the client can now accept connections. + * @reserved: reserved + */ +struct hbm_add_client_response { + u8 hbm_cmd; + u8 me_addr; + u8 status; + u8 reserved[1]; +} __packed; + +/** * struct hbm_power_gate - power gate request/response * * @hbm_cmd: bus message command header @@ -298,5 +365,62 @@ struct hbm_flow_control { u8 reserved[MEI_FC_MESSAGE_RESERVED_LENGTH]; } __packed; +#define MEI_HBM_NOTIFICATION_START 1 +#define MEI_HBM_NOTIFICATION_STOP 0 +/** + * struct hbm_notification_request - start/stop notification request + * + * @hbm_cmd: bus message command header + * @me_addr: address of the client in ME + * @host_addr: address of the client in the driver + * @start: start = 1 or stop = 0 asynchronous notifications + */ +struct hbm_notification_request { + u8 hbm_cmd; + u8 me_addr; + u8 host_addr; + u8 start; +} __packed; + +/** + * struct hbm_notification_response - start/stop notification response + * + * @hbm_cmd: bus message command header + * @me_addr: address of the client in ME + * @host_addr: - address of the client in the driver + * @status: (mei_hbm_status) response status for the request + * - MEI_HBMS_SUCCESS: successful stop/start + * - MEI_HBMS_CLIENT_NOT_FOUND: if the connection could not be found. + * - MEI_HBMS_ALREADY_STARTED: for start requests for a previously + * started notification. + * - MEI_HBMS_NOT_STARTED: for stop request for a connected client for whom + * asynchronous notifications are currently disabled. + * + * @start: start = 1 or stop = 0 asynchronous notifications + * @reserved: reserved + */ +struct hbm_notification_response { + u8 hbm_cmd; + u8 me_addr; + u8 host_addr; + u8 status; + u8 start; + u8 reserved[3]; +} __packed; + +/** + * struct hbm_notification - notification event + * + * @hbm_cmd: bus message command header + * @me_addr: address of the client in ME + * @host_addr: address of the client in the driver + * @reserved: reserved for alignment + */ +struct hbm_notification { + u8 hbm_cmd; + u8 me_addr; + u8 host_addr; + u8 reserved[1]; +} __packed; #endif diff --git a/kernel/drivers/misc/mei/init.c b/kernel/drivers/misc/mei/init.c index 97353cf8d..3edafc8d3 100644 --- a/kernel/drivers/misc/mei/init.c +++ b/kernel/drivers/misc/mei/init.c @@ -329,12 +329,10 @@ void mei_stop(struct mei_device *dev) { dev_dbg(dev->dev, "stopping the device.\n"); - mei_cancel_work(dev); - - mei_nfc_host_exit(dev); - mei_cl_bus_remove_devices(dev); + mei_cancel_work(dev); + mutex_lock(&dev->device_lock); mei_wd_stop(dev); @@ -361,13 +359,15 @@ bool mei_write_is_idle(struct mei_device *dev) { bool idle = (dev->dev_state == MEI_DEV_ENABLED && list_empty(&dev->ctrl_wr_list.list) && - list_empty(&dev->write_list.list)); + list_empty(&dev->write_list.list) && + list_empty(&dev->write_waiting_list.list)); - dev_dbg(dev->dev, "write pg: is idle[%d] state=%s ctrl=%d write=%d\n", + dev_dbg(dev->dev, "write pg: is idle[%d] state=%s ctrl=%01d write=%01d wwait=%01d\n", idle, mei_dev_state_str(dev->dev_state), list_empty(&dev->ctrl_wr_list.list), - list_empty(&dev->write_list.list)); + list_empty(&dev->write_list.list), + list_empty(&dev->write_waiting_list.list)); return idle; } @@ -390,6 +390,7 @@ void mei_device_init(struct mei_device *dev, INIT_LIST_HEAD(&dev->me_clients); mutex_init(&dev->device_lock); init_rwsem(&dev->me_clients_rwsem); + mutex_init(&dev->cl_bus_lock); init_waitqueue_head(&dev->wait_hw_ready); init_waitqueue_head(&dev->wait_pg); init_waitqueue_head(&dev->wait_hbm_start); diff --git a/kernel/drivers/misc/mei/interrupt.c b/kernel/drivers/misc/mei/interrupt.c index 3f84d2edc..64b568a02 100644 --- a/kernel/drivers/misc/mei/interrupt.c +++ b/kernel/drivers/misc/mei/interrupt.c @@ -21,6 +21,7 @@ #include <linux/fs.h> #include <linux/jiffies.h> #include <linux/slab.h> +#include <linux/pm_runtime.h> #include <linux/mei.h> @@ -65,8 +66,8 @@ EXPORT_SYMBOL_GPL(mei_irq_compl_handler); static inline int mei_cl_hbm_equal(struct mei_cl *cl, struct mei_msg_hdr *mei_hdr) { - return cl->host_client_id == mei_hdr->host_addr && - cl->me_client_id == mei_hdr->me_addr; + return mei_cl_host_addr(cl) == mei_hdr->host_addr && + mei_cl_me_id(cl) == mei_hdr->me_addr; } /** @@ -147,6 +148,9 @@ int mei_cl_irq_read_msg(struct mei_cl *cl, cb->read_time = jiffies; cl_dbg(dev, cl, "completed read length = %lu\n", cb->buf_idx); list_move_tail(&cb->list, &complete_list->list); + } else { + pm_runtime_mark_last_busy(dev->dev); + pm_request_autosuspend(dev->dev); } out: @@ -180,56 +184,14 @@ static int mei_cl_irq_disconnect_rsp(struct mei_cl *cl, struct mei_cl_cb *cb, return -EMSGSIZE; ret = mei_hbm_cl_disconnect_rsp(dev, cl); - - cl->state = MEI_FILE_DISCONNECTED; - cl->status = 0; + mei_cl_set_disconnected(cl); mei_io_cb_free(cb); + mei_me_cl_put(cl->me_cl); + cl->me_cl = NULL; return ret; } - - -/** - * mei_cl_irq_disconnect - processes close related operation from - * interrupt thread context - send disconnect request - * - * @cl: client - * @cb: callback block. - * @cmpl_list: complete list. - * - * Return: 0, OK; otherwise, error. - */ -static int mei_cl_irq_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb, - struct mei_cl_cb *cmpl_list) -{ - struct mei_device *dev = cl->dev; - u32 msg_slots; - int slots; - - msg_slots = mei_data2slots(sizeof(struct hbm_client_connect_request)); - slots = mei_hbuf_empty_slots(dev); - - if (slots < msg_slots) - return -EMSGSIZE; - - if (mei_hbm_cl_disconnect_req(dev, cl)) { - cl->status = 0; - cb->buf_idx = 0; - list_move_tail(&cb->list, &cmpl_list->list); - return -EIO; - } - - cl->state = MEI_FILE_DISCONNECTING; - cl->status = 0; - cb->buf_idx = 0; - list_move_tail(&cb->list, &dev->ctrl_rd_list.list); - cl->timer_count = MEI_CONNECT_TIMEOUT; - - return 0; -} - - /** * mei_cl_irq_read - processes client read related operation from the * interrupt thread context - request for flow control credits @@ -267,49 +229,6 @@ static int mei_cl_irq_read(struct mei_cl *cl, struct mei_cl_cb *cb, return 0; } - -/** - * mei_cl_irq_connect - send connect request in irq_thread context - * - * @cl: client - * @cb: callback block. - * @cmpl_list: complete list. - * - * Return: 0, OK; otherwise, error. - */ -static int mei_cl_irq_connect(struct mei_cl *cl, struct mei_cl_cb *cb, - struct mei_cl_cb *cmpl_list) -{ - struct mei_device *dev = cl->dev; - u32 msg_slots; - int slots; - int ret; - - msg_slots = mei_data2slots(sizeof(struct hbm_client_connect_request)); - slots = mei_hbuf_empty_slots(dev); - - if (mei_cl_is_other_connecting(cl)) - return 0; - - if (slots < msg_slots) - return -EMSGSIZE; - - cl->state = MEI_FILE_CONNECTING; - - ret = mei_hbm_cl_connect_req(dev, cl); - if (ret) { - cl->status = ret; - cb->buf_idx = 0; - list_del_init(&cb->list); - return ret; - } - - list_move_tail(&cb->list, &dev->ctrl_rd_list.list); - cl->timer_count = MEI_CONNECT_TIMEOUT; - return 0; -} - - /** * mei_irq_read_handler - bottom half read routine after ISR to * handle the read processing. @@ -488,6 +407,13 @@ int mei_irq_write_handler(struct mei_device *dev, struct mei_cl_cb *cmpl_list) if (ret) return ret; break; + + case MEI_FOP_NOTIFY_START: + case MEI_FOP_NOTIFY_STOP: + ret = mei_cl_irq_notify(cl, cb, cmpl_list); + if (ret) + return ret; + break; default: BUG(); } @@ -509,6 +435,24 @@ int mei_irq_write_handler(struct mei_device *dev, struct mei_cl_cb *cmpl_list) EXPORT_SYMBOL_GPL(mei_irq_write_handler); +/** + * mei_connect_timeout - connect/disconnect timeouts + * + * @cl: host client + */ +static void mei_connect_timeout(struct mei_cl *cl) +{ + struct mei_device *dev = cl->dev; + + if (cl->state == MEI_FILE_CONNECTING) { + if (dev->hbm_f_dot_supported) { + cl->state = MEI_FILE_DISCONNECT_REQUIRED; + wake_up(&cl->wait); + return; + } + } + mei_reset(dev); +} /** * mei_timer - timer function. @@ -549,7 +493,7 @@ void mei_timer(struct work_struct *work) if (cl->timer_count) { if (--cl->timer_count == 0) { dev_err(dev->dev, "timer: connect/disconnect timeout.\n"); - mei_reset(dev); + mei_connect_timeout(cl); goto out; } } diff --git a/kernel/drivers/misc/mei/main.c b/kernel/drivers/misc/mei/main.c index e40bcd03b..80f9afcb1 100644 --- a/kernel/drivers/misc/mei/main.c +++ b/kernel/drivers/misc/mei/main.c @@ -94,7 +94,7 @@ static int mei_release(struct inode *inode, struct file *file) { struct mei_cl *cl = file->private_data; struct mei_device *dev; - int rets = 0; + int rets; if (WARN_ON(!cl || !cl->dev)) return -ENODEV; @@ -106,11 +106,8 @@ static int mei_release(struct inode *inode, struct file *file) rets = mei_amthif_release(dev, file); goto out; } - if (mei_cl_is_connected(cl)) { - cl->state = MEI_FILE_DISCONNECTING; - cl_dbg(dev, cl, "disconnecting\n"); - rets = mei_cl_disconnect(cl); - } + rets = mei_cl_disconnect(cl); + mei_cl_flush_queues(cl, file); cl_dbg(dev, cl, "removing\n"); @@ -186,8 +183,7 @@ static ssize_t mei_read(struct file *file, char __user *ubuf, err = mei_cl_read_start(cl, length, file); if (err && err != -EBUSY) { - dev_dbg(dev->dev, - "mei start read failure with status = %d\n", err); + cl_dbg(dev, cl, "mei start read failure status = %d\n", err); rets = err; goto out; } @@ -218,6 +214,11 @@ static ssize_t mei_read(struct file *file, char __user *ubuf, cb = mei_cl_read_cb(cl, file); if (!cb) { + if (mei_cl_is_fixed_address(cl) && dev->allow_fixed_address) { + cb = mei_cl_read_cb(cl, NULL); + if (cb) + goto copy_buffer; + } rets = 0; goto out; } @@ -226,11 +227,11 @@ copy_buffer: /* now copy the data to user space */ if (cb->status) { rets = cb->status; - dev_dbg(dev->dev, "read operation failed %d\n", rets); + cl_dbg(dev, cl, "read operation failed %d\n", rets); goto free; } - dev_dbg(dev->dev, "buf.size = %d buf.idx= %ld\n", + cl_dbg(dev, cl, "buf.size = %d buf.idx = %ld\n", cb->buf.size, cb->buf_idx); if (length == 0 || ubuf == NULL || *offset > cb->buf_idx) { rets = -EMSGSIZE; @@ -256,7 +257,7 @@ free: mei_io_cb_free(cb); out: - dev_dbg(dev->dev, "end mei read rets= %d\n", rets); + cl_dbg(dev, cl, "end mei read rets = %d\n", rets); mutex_unlock(&dev->device_lock); return rets; } @@ -274,7 +275,6 @@ static ssize_t mei_write(struct file *file, const char __user *ubuf, size_t length, loff_t *offset) { struct mei_cl *cl = file->private_data; - struct mei_me_client *me_cl = NULL; struct mei_cl_cb *write_cb = NULL; struct mei_device *dev; unsigned long timeout = 0; @@ -292,27 +292,27 @@ static ssize_t mei_write(struct file *file, const char __user *ubuf, goto out; } - me_cl = mei_me_cl_by_uuid_id(dev, &cl->cl_uuid, cl->me_client_id); - if (!me_cl) { - rets = -ENOTTY; + if (!mei_cl_is_connected(cl)) { + cl_err(dev, cl, "is not connected"); + rets = -ENODEV; goto out; } - if (length == 0) { - rets = 0; + if (!mei_me_cl_is_active(cl->me_cl)) { + rets = -ENOTTY; goto out; } - if (length > me_cl->props.max_msg_length) { + if (length > mei_cl_mtu(cl)) { rets = -EFBIG; goto out; } - if (!mei_cl_is_connected(cl)) { - cl_err(dev, cl, "is not connected"); - rets = -ENODEV; + if (length == 0) { + rets = 0; goto out; } + if (cl == &dev->iamthif_cl) { write_cb = mei_amthif_find_read_list_entry(dev, file); @@ -350,14 +350,12 @@ static ssize_t mei_write(struct file *file, const char __user *ubuf, "amthif write failed with status = %d\n", rets); goto out; } - mei_me_cl_put(me_cl); mutex_unlock(&dev->device_lock); return length; } rets = mei_cl_write(cl, write_cb, false); out: - mei_me_cl_put(me_cl); mutex_unlock(&dev->device_lock); if (rets < 0) mei_io_cb_free(write_cb); @@ -395,17 +393,16 @@ static int mei_ioctl_connect_client(struct file *file, /* find ME client we're trying to connect to */ me_cl = mei_me_cl_by_uuid(dev, &data->in_client_uuid); - if (!me_cl || me_cl->props.fixed_address) { + if (!me_cl || + (me_cl->props.fixed_address && !dev->allow_fixed_address)) { dev_dbg(dev->dev, "Cannot connect to FW Client UUID = %pUl\n", - &data->in_client_uuid); + &data->in_client_uuid); + mei_me_cl_put(me_cl); return -ENOTTY; } - cl->me_client_id = me_cl->client_id; - cl->cl_uuid = me_cl->props.protocol_name; - dev_dbg(dev->dev, "Connect to FW Client ID = %d\n", - cl->me_client_id); + me_cl->client_id); dev_dbg(dev->dev, "FW Client - Protocol Version = %d\n", me_cl->props.protocol_version); dev_dbg(dev->dev, "FW Client - Max Msg Len = %d\n", @@ -441,7 +438,7 @@ static int mei_ioctl_connect_client(struct file *file, client->protocol_version = me_cl->props.protocol_version; dev_dbg(dev->dev, "Can connect?\n"); - rets = mei_cl_connect(cl, file); + rets = mei_cl_connect(cl, me_cl, file); end: mei_me_cl_put(me_cl); @@ -449,6 +446,49 @@ end: } /** + * mei_ioctl_client_notify_request - + * propagate event notification request to client + * + * @file: pointer to file structure + * @request: 0 - disable, 1 - enable + * + * Return: 0 on success , <0 on error + */ +static int mei_ioctl_client_notify_request(struct file *file, u32 request) +{ + struct mei_cl *cl = file->private_data; + + if (request != MEI_HBM_NOTIFICATION_START && + request != MEI_HBM_NOTIFICATION_STOP) + return -EINVAL; + + return mei_cl_notify_request(cl, file, (u8)request); +} + +/** + * mei_ioctl_client_notify_get - wait for notification request + * + * @file: pointer to file structure + * @notify_get: 0 - disable, 1 - enable + * + * Return: 0 on success , <0 on error + */ +static int mei_ioctl_client_notify_get(struct file *file, u32 *notify_get) +{ + struct mei_cl *cl = file->private_data; + bool notify_ev; + bool block = (file->f_flags & O_NONBLOCK) == 0; + int rets; + + rets = mei_cl_notify_get(cl, block, ¬ify_ev); + if (rets) + return rets; + + *notify_get = notify_ev ? 1 : 0; + return 0; +} + +/** * mei_ioctl - the IOCTL function * * @file: pointer to file structure @@ -462,6 +502,7 @@ static long mei_ioctl(struct file *file, unsigned int cmd, unsigned long data) struct mei_device *dev; struct mei_cl *cl = file->private_data; struct mei_connect_client_data connect_data; + u32 notify_get, notify_req; int rets; @@ -502,6 +543,33 @@ static long mei_ioctl(struct file *file, unsigned int cmd, unsigned long data) break; + case IOCTL_MEI_NOTIFY_SET: + dev_dbg(dev->dev, ": IOCTL_MEI_NOTIFY_SET.\n"); + if (copy_from_user(¬ify_req, + (char __user *)data, sizeof(notify_req))) { + dev_dbg(dev->dev, "failed to copy data from userland\n"); + rets = -EFAULT; + goto out; + } + rets = mei_ioctl_client_notify_request(file, notify_req); + break; + + case IOCTL_MEI_NOTIFY_GET: + dev_dbg(dev->dev, ": IOCTL_MEI_NOTIFY_GET.\n"); + rets = mei_ioctl_client_notify_get(file, ¬ify_get); + if (rets) + goto out; + + dev_dbg(dev->dev, "copy connect data to user\n"); + if (copy_to_user((char __user *)data, + ¬ify_get, sizeof(notify_get))) { + dev_dbg(dev->dev, "failed to copy data to userland\n"); + rets = -EFAULT; + goto out; + + } + break; + default: dev_err(dev->dev, ": unsupported ioctl %d.\n", cmd); rets = -ENOIOCTLCMD; @@ -544,6 +612,7 @@ static unsigned int mei_poll(struct file *file, poll_table *wait) struct mei_cl *cl = file->private_data; struct mei_device *dev; unsigned int mask = 0; + bool notify_en; if (WARN_ON(!cl || !cl->dev)) return POLLERR; @@ -552,6 +621,7 @@ static unsigned int mei_poll(struct file *file, poll_table *wait) mutex_lock(&dev->device_lock); + notify_en = cl->notify_en && (req_events & POLLPRI); if (dev->dev_state != MEI_DEV_ENABLED || !mei_cl_is_connected(cl)) { @@ -564,6 +634,12 @@ static unsigned int mei_poll(struct file *file, poll_table *wait) goto out; } + if (notify_en) { + poll_wait(file, &cl->ev_wait, wait); + if (cl->notify_ev) + mask |= POLLPRI; + } + if (req_events & (POLLIN | POLLRDNORM)) { poll_wait(file, &cl->rx_wait, wait); @@ -579,6 +655,28 @@ out: } /** + * mei_fasync - asynchronous io support + * + * @fd: file descriptor + * @file: pointer to file structure + * @band: band bitmap + * + * Return: negative on error, + * 0 if it did no changes, + * and positive a process was added or deleted + */ +static int mei_fasync(int fd, struct file *file, int band) +{ + + struct mei_cl *cl = file->private_data; + + if (!mei_cl_is_connected(cl)) + return -ENODEV; + + return fasync_helper(fd, file, band, &cl->ev_async); +} + +/** * fw_status_show - mei device attribute show method * * @device: device pointer @@ -630,6 +728,7 @@ static const struct file_operations mei_fops = { .release = mei_release, .write = mei_write, .poll = mei_poll, + .fasync = mei_fasync, .llseek = no_llseek }; diff --git a/kernel/drivers/misc/mei/mei_dev.h b/kernel/drivers/misc/mei/mei_dev.h index f84c39ee2..4250555d5 100644 --- a/kernel/drivers/misc/mei/mei_dev.h +++ b/kernel/drivers/misc/mei/mei_dev.h @@ -88,7 +88,9 @@ enum file_state { MEI_FILE_CONNECTING, MEI_FILE_CONNECTED, MEI_FILE_DISCONNECTING, - MEI_FILE_DISCONNECTED + MEI_FILE_DISCONNECT_REPLY, + MEI_FILE_DISCONNECT_REQUIRED, + MEI_FILE_DISCONNECTED, }; /* MEI device states */ @@ -134,6 +136,8 @@ enum mei_wd_states { * @MEI_FOP_CONNECT: connect * @MEI_FOP_DISCONNECT: disconnect * @MEI_FOP_DISCONNECT_RSP: disconnect response + * @MEI_FOP_NOTIFY_START: start notification + * @MEI_FOP_NOTIFY_STOP: stop notification */ enum mei_cb_file_ops { MEI_FOP_READ = 0, @@ -141,6 +145,8 @@ enum mei_cb_file_ops { MEI_FOP_CONNECT, MEI_FOP_DISCONNECT, MEI_FOP_DISCONNECT_RSP, + MEI_FOP_NOTIFY_START, + MEI_FOP_NOTIFY_STOP, }; /* @@ -176,6 +182,8 @@ struct mei_fw_status { * @props: client properties * @client_id: me client id * @mei_flow_ctrl_creds: flow control credits + * @connect_count: number connections to this client + * @bus_added: added to bus */ struct mei_me_client { struct list_head list; @@ -183,6 +191,8 @@ struct mei_me_client { struct mei_client_properties props; u8 client_id; u8 mei_flow_ctrl_creds; + u8 connect_count; + u8 bus_added; }; @@ -225,18 +235,21 @@ struct mei_cl_cb { * @tx_wait: wait queue for tx completion * @rx_wait: wait queue for rx completion * @wait: wait queue for management operation + * @ev_wait: notification wait queue + * @ev_async: event async notification * @status: connection status - * @cl_uuid: client uuid name + * @me_cl: fw client connected * @host_client_id: host id - * @me_client_id: me/fw id * @mei_flow_ctrl_creds: transmit flow credentials * @timer_count: watchdog timer for operation completion + * @reserved: reserved for alignment + * @notify_en: notification - enabled/disabled + * @notify_ev: pending notification event * @writing_state: state of the tx * @rd_pending: pending read credits * @rd_completed: completed read * - * @device: device on the mei client bus - * @device_link: link to bus clients + * @cldev: device on the mei client bus */ struct mei_cl { struct list_head link; @@ -245,47 +258,50 @@ struct mei_cl { wait_queue_head_t tx_wait; wait_queue_head_t rx_wait; wait_queue_head_t wait; + wait_queue_head_t ev_wait; + struct fasync_struct *ev_async; int status; - uuid_le cl_uuid; + struct mei_me_client *me_cl; u8 host_client_id; - u8 me_client_id; u8 mei_flow_ctrl_creds; u8 timer_count; + u8 reserved; + u8 notify_en; + u8 notify_ev; enum mei_file_transaction_states writing_state; struct list_head rd_pending; struct list_head rd_completed; - /* MEI CL bus data */ - struct mei_cl_device *device; - struct list_head device_link; + struct mei_cl_device *cldev; }; -/** struct mei_hw_ops +/** + * struct mei_hw_ops - hw specific ops * * @host_is_ready : query for host readiness - + * * @hw_is_ready : query if hw is ready * @hw_reset : reset hw * @hw_start : start hw after reset * @hw_config : configure hw - + * * @fw_status : get fw status registers * @pg_state : power gating state of the device * @pg_in_transition : is device now in pg transition * @pg_is_enabled : is power gating enabled - + * * @intr_clear : clear pending interrupts * @intr_enable : enable interrupts * @intr_disable : disable interrupts - + * * @hbuf_free_slots : query for write buffer empty slots * @hbuf_is_ready : query if write buffer is empty * @hbuf_max_len : query for write buffer max len - + * * @write : write a message to FW - + * * @rdbuf_full_slots : query how many slots are filled - + * * @read_hdr : get first 4 bytes (header) * @read : read a buffer from the FW */ @@ -324,75 +340,16 @@ struct mei_hw_ops { }; /* MEI bus API*/ - -/** - * struct mei_cl_ops - MEI CL device ops - * This structure allows ME host clients to implement technology - * specific operations. - * - * @enable: Enable an MEI CL device. Some devices require specific - * HECI commands to initialize completely. - * @disable: Disable an MEI CL device. - * @send: Tx hook for the device. This allows ME host clients to trap - * the device driver buffers before actually physically - * pushing it to the ME. - * @recv: Rx hook for the device. This allows ME host clients to trap the - * ME buffers before forwarding them to the device driver. - */ -struct mei_cl_ops { - int (*enable)(struct mei_cl_device *device); - int (*disable)(struct mei_cl_device *device); - int (*send)(struct mei_cl_device *device, u8 *buf, size_t length); - int (*recv)(struct mei_cl_device *device, u8 *buf, size_t length); -}; - -struct mei_cl_device *mei_cl_add_device(struct mei_device *dev, - uuid_le uuid, char *name, - struct mei_cl_ops *ops); -void mei_cl_remove_device(struct mei_cl_device *device); - -ssize_t __mei_cl_async_send(struct mei_cl *cl, u8 *buf, size_t length); -ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length); +void mei_cl_bus_rescan(struct mei_device *bus); +void mei_cl_bus_dev_fixup(struct mei_cl_device *dev); +ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, + bool blocking); ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length); void mei_cl_bus_rx_event(struct mei_cl *cl); -void mei_cl_bus_remove_devices(struct mei_device *dev); +void mei_cl_bus_notify_event(struct mei_cl *cl); +void mei_cl_bus_remove_devices(struct mei_device *bus); int mei_cl_bus_init(void); void mei_cl_bus_exit(void); -struct mei_cl *mei_cl_bus_find_cl_by_uuid(struct mei_device *dev, uuid_le uuid); - - -/** - * struct mei_cl_device - MEI device handle - * An mei_cl_device pointer is returned from mei_add_device() - * and links MEI bus clients to their actual ME host client pointer. - * Drivers for MEI devices will get an mei_cl_device pointer - * when being probed and shall use it for doing ME bus I/O. - * - * @dev: linux driver model device pointer - * @cl: mei client - * @ops: ME transport ops - * @event_work: async work to execute event callback - * @event_cb: Drivers register this callback to get asynchronous ME - * events (e.g. Rx buffer pending) notifications. - * @event_context: event callback run context - * @events: Events bitmask sent to the driver. - * @priv_data: client private data - */ -struct mei_cl_device { - struct device dev; - - struct mei_cl *cl; - - const struct mei_cl_ops *ops; - - struct work_struct event_work; - mei_cl_event_cb_t event_cb; - void *event_context; - unsigned long events; - - void *priv_data; -}; - /** * enum mei_pg_event - power gating transition events @@ -465,7 +422,10 @@ const char *mei_pg_state_str(enum mei_pg_state state); * @wr_msg : the buffer for hbm control messages * * @version : HBM protocol version in use - * @hbm_f_pg_supported : hbm feature pgi protocol + * @hbm_f_pg_supported : hbm feature pgi protocol + * @hbm_f_dc_supported : hbm feature dynamic clients + * @hbm_f_dot_supported : hbm feature disconnect on timeout + * @hbm_f_ev_supported : hbm feature event notification * * @me_clients_rwsem: rw lock over me_clients list * @me_clients : list of FW clients @@ -473,6 +433,8 @@ const char *mei_pg_state_str(enum mei_pg_state state); * @host_clients_map : host clients id pool * @me_client_index : last FW client index in enumeration * + * @allow_fixed_address: allow user space to connect a fixed client + * * @wd_cl : watchdog client * @wd_state : watchdog client state * @wd_pending : watchdog command is pending @@ -485,7 +447,6 @@ const char *mei_pg_state_str(enum mei_pg_state state); * @iamthif_cl : amthif host client * @iamthif_current_cb : amthif current operation callback * @iamthif_open_count : number of opened amthif connections - * @iamthif_mtu : amthif client max message length * @iamthif_timer : time stamp of current amthif command completion * @iamthif_stall_timer : timer to detect amthif hang * @iamthif_state : amthif processor state @@ -495,6 +456,7 @@ const char *mei_pg_state_str(enum mei_pg_state state); * @reset_work : work item for the device reset * * @device_list : mei client bus list + * @cl_bus_lock : client bus list lock * * @dbgfs_dir : debugfs mei root directory * @@ -557,6 +519,9 @@ struct mei_device { struct hbm_version version; unsigned int hbm_f_pg_supported:1; + unsigned int hbm_f_dc_supported:1; + unsigned int hbm_f_dot_supported:1; + unsigned int hbm_f_ev_supported:1; struct rw_semaphore me_clients_rwsem; struct list_head me_clients; @@ -564,6 +529,8 @@ struct mei_device { DECLARE_BITMAP(host_clients_map, MEI_CLIENTS_MAX); unsigned long me_client_index; + bool allow_fixed_address; + struct mei_cl wd_cl; enum mei_wd_states wd_state; bool wd_pending; @@ -579,7 +546,6 @@ struct mei_device { struct mei_cl iamthif_cl; struct mei_cl_cb *iamthif_current_cb; long iamthif_open_count; - int iamthif_mtu; unsigned long iamthif_timer; u32 iamthif_stall_timer; enum iamthif_states iamthif_state; @@ -590,6 +556,7 @@ struct mei_device { /* List of bus devices */ struct list_head device_list; + struct mutex cl_bus_lock; #if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *dbgfs_dir; @@ -658,7 +625,7 @@ void mei_irq_compl_handler(struct mei_device *dev, struct mei_cl_cb *cmpl_list); */ void mei_amthif_reset_params(struct mei_device *dev); -int mei_amthif_host_init(struct mei_device *dev); +int mei_amthif_host_init(struct mei_device *dev, struct mei_me_client *me_cl); int mei_amthif_read(struct mei_device *dev, struct file *file, char __user *ubuf, size_t length, loff_t *offset); @@ -685,7 +652,7 @@ int mei_amthif_irq_read(struct mei_device *dev, s32 *slots); /* * NFC functions */ -int mei_nfc_host_init(struct mei_device *dev); +int mei_nfc_host_init(struct mei_device *dev, struct mei_me_client *me_cl); void mei_nfc_host_exit(struct mei_device *dev); /* @@ -695,7 +662,7 @@ extern const uuid_le mei_nfc_guid; int mei_wd_send(struct mei_device *dev); int mei_wd_stop(struct mei_device *dev); -int mei_wd_host_init(struct mei_device *dev); +int mei_wd_host_init(struct mei_device *dev, struct mei_me_client *me_cl); /* * mei_watchdog_register - Registering watchdog interface * once we got connection to the WD Client diff --git a/kernel/drivers/misc/mei/nfc.c b/kernel/drivers/misc/mei/nfc.c deleted file mode 100644 index c3bcb6368..000000000 --- a/kernel/drivers/misc/mei/nfc.c +++ /dev/null @@ -1,593 +0,0 @@ -/* - * - * Intel Management Engine Interface (Intel MEI) Linux driver - * Copyright (c) 2003-2013, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/device.h> -#include <linux/slab.h> - -#include <linux/mei_cl_bus.h> - -#include "mei_dev.h" -#include "client.h" - -struct mei_nfc_cmd { - u8 command; - u8 status; - u16 req_id; - u32 reserved; - u16 data_size; - u8 sub_command; - u8 data[]; -} __packed; - -struct mei_nfc_reply { - u8 command; - u8 status; - u16 req_id; - u32 reserved; - u16 data_size; - u8 sub_command; - u8 reply_status; - u8 data[]; -} __packed; - -struct mei_nfc_if_version { - u8 radio_version_sw[3]; - u8 reserved[3]; - u8 radio_version_hw[3]; - u8 i2c_addr; - u8 fw_ivn; - u8 vendor_id; - u8 radio_type; -} __packed; - -struct mei_nfc_connect { - u8 fw_ivn; - u8 vendor_id; -} __packed; - -struct mei_nfc_connect_resp { - u8 fw_ivn; - u8 vendor_id; - u16 me_major; - u16 me_minor; - u16 me_hotfix; - u16 me_build; -} __packed; - -struct mei_nfc_hci_hdr { - u8 cmd; - u8 status; - u16 req_id; - u32 reserved; - u16 data_size; -} __packed; - -#define MEI_NFC_CMD_MAINTENANCE 0x00 -#define MEI_NFC_CMD_HCI_SEND 0x01 -#define MEI_NFC_CMD_HCI_RECV 0x02 - -#define MEI_NFC_SUBCMD_CONNECT 0x00 -#define MEI_NFC_SUBCMD_IF_VERSION 0x01 - -#define MEI_NFC_HEADER_SIZE 10 - -/** - * struct mei_nfc_dev - NFC mei device - * - * @cl: NFC host client - * @cl_info: NFC info host client - * @init_work: perform connection to the info client - * @send_wq: send completion wait queue - * @fw_ivn: NFC Interface Version Number - * @vendor_id: NFC manufacturer ID - * @radio_type: NFC radio type - * @bus_name: bus name - * - * @req_id: message counter - * @recv_req_id: reception message counter - */ -struct mei_nfc_dev { - struct mei_cl *cl; - struct mei_cl *cl_info; - struct work_struct init_work; - wait_queue_head_t send_wq; - u8 fw_ivn; - u8 vendor_id; - u8 radio_type; - char *bus_name; - - u16 req_id; - u16 recv_req_id; -}; - -/* UUIDs for NFC F/W clients */ -const uuid_le mei_nfc_guid = UUID_LE(0x0bb17a78, 0x2a8e, 0x4c50, - 0x94, 0xd4, 0x50, 0x26, - 0x67, 0x23, 0x77, 0x5c); - -static const uuid_le mei_nfc_info_guid = UUID_LE(0xd2de1625, 0x382d, 0x417d, - 0x48, 0xa4, 0xef, 0xab, - 0xba, 0x8a, 0x12, 0x06); - -/* Vendors */ -#define MEI_NFC_VENDOR_INSIDE 0x00 -#define MEI_NFC_VENDOR_NXP 0x01 - -/* Radio types */ -#define MEI_NFC_VENDOR_INSIDE_UREAD 0x00 -#define MEI_NFC_VENDOR_NXP_PN544 0x01 - -static void mei_nfc_free(struct mei_nfc_dev *ndev) -{ - if (!ndev) - return; - - if (ndev->cl) { - list_del(&ndev->cl->device_link); - mei_cl_unlink(ndev->cl); - kfree(ndev->cl); - } - - if (ndev->cl_info) { - list_del(&ndev->cl_info->device_link); - mei_cl_unlink(ndev->cl_info); - kfree(ndev->cl_info); - } - - kfree(ndev); -} - -static int mei_nfc_build_bus_name(struct mei_nfc_dev *ndev) -{ - struct mei_device *dev; - - if (!ndev->cl) - return -ENODEV; - - dev = ndev->cl->dev; - - switch (ndev->vendor_id) { - case MEI_NFC_VENDOR_INSIDE: - switch (ndev->radio_type) { - case MEI_NFC_VENDOR_INSIDE_UREAD: - ndev->bus_name = "microread"; - return 0; - - default: - dev_err(dev->dev, "Unknown radio type 0x%x\n", - ndev->radio_type); - - return -EINVAL; - } - - case MEI_NFC_VENDOR_NXP: - switch (ndev->radio_type) { - case MEI_NFC_VENDOR_NXP_PN544: - ndev->bus_name = "pn544"; - return 0; - default: - dev_err(dev->dev, "Unknown radio type 0x%x\n", - ndev->radio_type); - - return -EINVAL; - } - - default: - dev_err(dev->dev, "Unknown vendor ID 0x%x\n", - ndev->vendor_id); - - return -EINVAL; - } - - return 0; -} - -static int mei_nfc_connect(struct mei_nfc_dev *ndev) -{ - struct mei_device *dev; - struct mei_cl *cl; - struct mei_nfc_cmd *cmd, *reply; - struct mei_nfc_connect *connect; - struct mei_nfc_connect_resp *connect_resp; - size_t connect_length, connect_resp_length; - int bytes_recv, ret; - - cl = ndev->cl; - dev = cl->dev; - - connect_length = sizeof(struct mei_nfc_cmd) + - sizeof(struct mei_nfc_connect); - - connect_resp_length = sizeof(struct mei_nfc_cmd) + - sizeof(struct mei_nfc_connect_resp); - - cmd = kzalloc(connect_length, GFP_KERNEL); - if (!cmd) - return -ENOMEM; - connect = (struct mei_nfc_connect *)cmd->data; - - reply = kzalloc(connect_resp_length, GFP_KERNEL); - if (!reply) { - kfree(cmd); - return -ENOMEM; - } - - connect_resp = (struct mei_nfc_connect_resp *)reply->data; - - cmd->command = MEI_NFC_CMD_MAINTENANCE; - cmd->data_size = 3; - cmd->sub_command = MEI_NFC_SUBCMD_CONNECT; - connect->fw_ivn = ndev->fw_ivn; - connect->vendor_id = ndev->vendor_id; - - ret = __mei_cl_send(cl, (u8 *)cmd, connect_length); - if (ret < 0) { - dev_err(dev->dev, "Could not send connect cmd\n"); - goto err; - } - - bytes_recv = __mei_cl_recv(cl, (u8 *)reply, connect_resp_length); - if (bytes_recv < 0) { - dev_err(dev->dev, "Could not read connect response\n"); - ret = bytes_recv; - goto err; - } - - dev_info(dev->dev, "IVN 0x%x Vendor ID 0x%x\n", - connect_resp->fw_ivn, connect_resp->vendor_id); - - dev_info(dev->dev, "ME FW %d.%d.%d.%d\n", - connect_resp->me_major, connect_resp->me_minor, - connect_resp->me_hotfix, connect_resp->me_build); - - ret = 0; - -err: - kfree(reply); - kfree(cmd); - - return ret; -} - -static int mei_nfc_if_version(struct mei_nfc_dev *ndev) -{ - struct mei_device *dev; - struct mei_cl *cl; - - struct mei_nfc_cmd cmd; - struct mei_nfc_reply *reply = NULL; - struct mei_nfc_if_version *version; - size_t if_version_length; - int bytes_recv, ret; - - cl = ndev->cl_info; - dev = cl->dev; - - memset(&cmd, 0, sizeof(struct mei_nfc_cmd)); - cmd.command = MEI_NFC_CMD_MAINTENANCE; - cmd.data_size = 1; - cmd.sub_command = MEI_NFC_SUBCMD_IF_VERSION; - - ret = __mei_cl_send(cl, (u8 *)&cmd, sizeof(struct mei_nfc_cmd)); - if (ret < 0) { - dev_err(dev->dev, "Could not send IF version cmd\n"); - return ret; - } - - /* to be sure on the stack we alloc memory */ - if_version_length = sizeof(struct mei_nfc_reply) + - sizeof(struct mei_nfc_if_version); - - reply = kzalloc(if_version_length, GFP_KERNEL); - if (!reply) - return -ENOMEM; - - bytes_recv = __mei_cl_recv(cl, (u8 *)reply, if_version_length); - if (bytes_recv < 0 || bytes_recv < sizeof(struct mei_nfc_reply)) { - dev_err(dev->dev, "Could not read IF version\n"); - ret = -EIO; - goto err; - } - - version = (struct mei_nfc_if_version *)reply->data; - - ndev->fw_ivn = version->fw_ivn; - ndev->vendor_id = version->vendor_id; - ndev->radio_type = version->radio_type; - -err: - kfree(reply); - return ret; -} - -static int mei_nfc_enable(struct mei_cl_device *cldev) -{ - struct mei_device *dev; - struct mei_nfc_dev *ndev; - int ret; - - ndev = (struct mei_nfc_dev *)cldev->priv_data; - dev = ndev->cl->dev; - - ret = mei_nfc_connect(ndev); - if (ret < 0) { - dev_err(dev->dev, "Could not connect to NFC"); - return ret; - } - - return 0; -} - -static int mei_nfc_disable(struct mei_cl_device *cldev) -{ - return 0; -} - -static int mei_nfc_send(struct mei_cl_device *cldev, u8 *buf, size_t length) -{ - struct mei_device *dev; - struct mei_nfc_dev *ndev; - struct mei_nfc_hci_hdr *hdr; - u8 *mei_buf; - int err; - - ndev = (struct mei_nfc_dev *) cldev->priv_data; - dev = ndev->cl->dev; - - err = -ENOMEM; - mei_buf = kzalloc(length + MEI_NFC_HEADER_SIZE, GFP_KERNEL); - if (!mei_buf) - goto out; - - hdr = (struct mei_nfc_hci_hdr *) mei_buf; - hdr->cmd = MEI_NFC_CMD_HCI_SEND; - hdr->status = 0; - hdr->req_id = ndev->req_id; - hdr->reserved = 0; - hdr->data_size = length; - - memcpy(mei_buf + MEI_NFC_HEADER_SIZE, buf, length); - err = __mei_cl_send(ndev->cl, mei_buf, length + MEI_NFC_HEADER_SIZE); - if (err < 0) - goto out; - - if (!wait_event_interruptible_timeout(ndev->send_wq, - ndev->recv_req_id == ndev->req_id, HZ)) { - dev_err(dev->dev, "NFC MEI command timeout\n"); - err = -ETIME; - } else { - ndev->req_id++; - } -out: - kfree(mei_buf); - return err; -} - -static int mei_nfc_recv(struct mei_cl_device *cldev, u8 *buf, size_t length) -{ - struct mei_nfc_dev *ndev; - struct mei_nfc_hci_hdr *hci_hdr; - int received_length; - - ndev = (struct mei_nfc_dev *)cldev->priv_data; - - received_length = __mei_cl_recv(ndev->cl, buf, length); - if (received_length < 0) - return received_length; - - hci_hdr = (struct mei_nfc_hci_hdr *) buf; - - if (hci_hdr->cmd == MEI_NFC_CMD_HCI_SEND) { - ndev->recv_req_id = hci_hdr->req_id; - wake_up(&ndev->send_wq); - - return 0; - } - - return received_length; -} - -static struct mei_cl_ops nfc_ops = { - .enable = mei_nfc_enable, - .disable = mei_nfc_disable, - .send = mei_nfc_send, - .recv = mei_nfc_recv, -}; - -static void mei_nfc_init(struct work_struct *work) -{ - struct mei_device *dev; - struct mei_cl_device *cldev; - struct mei_nfc_dev *ndev; - struct mei_cl *cl_info; - - ndev = container_of(work, struct mei_nfc_dev, init_work); - - cl_info = ndev->cl_info; - dev = cl_info->dev; - - mutex_lock(&dev->device_lock); - - if (mei_cl_connect(cl_info, NULL) < 0) { - mutex_unlock(&dev->device_lock); - dev_err(dev->dev, "Could not connect to the NFC INFO ME client"); - - goto err; - } - - mutex_unlock(&dev->device_lock); - - if (mei_nfc_if_version(ndev) < 0) { - dev_err(dev->dev, "Could not get the NFC interface version"); - - goto err; - } - - dev_info(dev->dev, "NFC MEI VERSION: IVN 0x%x Vendor ID 0x%x Type 0x%x\n", - ndev->fw_ivn, ndev->vendor_id, ndev->radio_type); - - mutex_lock(&dev->device_lock); - - if (mei_cl_disconnect(cl_info) < 0) { - mutex_unlock(&dev->device_lock); - dev_err(dev->dev, "Could not disconnect the NFC INFO ME client"); - - goto err; - } - - mutex_unlock(&dev->device_lock); - - if (mei_nfc_build_bus_name(ndev) < 0) { - dev_err(dev->dev, "Could not build the bus ID name\n"); - return; - } - - cldev = mei_cl_add_device(dev, mei_nfc_guid, ndev->bus_name, &nfc_ops); - if (!cldev) { - dev_err(dev->dev, "Could not add the NFC device to the MEI bus\n"); - - goto err; - } - - cldev->priv_data = ndev; - - - return; - -err: - mutex_lock(&dev->device_lock); - mei_nfc_free(ndev); - mutex_unlock(&dev->device_lock); - -} - - -int mei_nfc_host_init(struct mei_device *dev) -{ - struct mei_nfc_dev *ndev; - struct mei_cl *cl_info, *cl; - struct mei_me_client *me_cl = NULL; - int ret; - - - /* in case of internal reset bail out - * as the device is already setup - */ - cl = mei_cl_bus_find_cl_by_uuid(dev, mei_nfc_guid); - if (cl) - return 0; - - ndev = kzalloc(sizeof(struct mei_nfc_dev), GFP_KERNEL); - if (!ndev) { - ret = -ENOMEM; - goto err; - } - - /* check for valid client id */ - me_cl = mei_me_cl_by_uuid(dev, &mei_nfc_info_guid); - if (!me_cl) { - dev_info(dev->dev, "nfc: failed to find the client\n"); - ret = -ENOTTY; - goto err; - } - - cl_info = mei_cl_alloc_linked(dev, MEI_HOST_CLIENT_ID_ANY); - if (IS_ERR(cl_info)) { - ret = PTR_ERR(cl_info); - goto err; - } - - cl_info->me_client_id = me_cl->client_id; - cl_info->cl_uuid = me_cl->props.protocol_name; - mei_me_cl_put(me_cl); - me_cl = NULL; - - list_add_tail(&cl_info->device_link, &dev->device_list); - - ndev->cl_info = cl_info; - - /* check for valid client id */ - me_cl = mei_me_cl_by_uuid(dev, &mei_nfc_guid); - if (!me_cl) { - dev_info(dev->dev, "nfc: failed to find the client\n"); - ret = -ENOTTY; - goto err; - } - - cl = mei_cl_alloc_linked(dev, MEI_HOST_CLIENT_ID_ANY); - if (IS_ERR(cl)) { - ret = PTR_ERR(cl); - goto err; - } - - cl->me_client_id = me_cl->client_id; - cl->cl_uuid = me_cl->props.protocol_name; - mei_me_cl_put(me_cl); - me_cl = NULL; - - list_add_tail(&cl->device_link, &dev->device_list); - - ndev->cl = cl; - - ndev->req_id = 1; - - INIT_WORK(&ndev->init_work, mei_nfc_init); - init_waitqueue_head(&ndev->send_wq); - schedule_work(&ndev->init_work); - - return 0; - -err: - mei_me_cl_put(me_cl); - mei_nfc_free(ndev); - - return ret; -} - -void mei_nfc_host_exit(struct mei_device *dev) -{ - struct mei_nfc_dev *ndev; - struct mei_cl *cl; - struct mei_cl_device *cldev; - - cl = mei_cl_bus_find_cl_by_uuid(dev, mei_nfc_guid); - if (!cl) - return; - - cldev = cl->device; - if (!cldev) - return; - - ndev = (struct mei_nfc_dev *)cldev->priv_data; - if (ndev) - cancel_work_sync(&ndev->init_work); - - cldev->priv_data = NULL; - - mutex_lock(&dev->device_lock); - /* Need to remove the device here - * since mei_nfc_free will unlink the clients - */ - mei_cl_remove_device(cldev); - mei_nfc_free(ndev); - mutex_unlock(&dev->device_lock); -} - - diff --git a/kernel/drivers/misc/mei/pci-me.c b/kernel/drivers/misc/mei/pci-me.c index 23f71f5ce..27678d815 100644 --- a/kernel/drivers/misc/mei/pci-me.c +++ b/kernel/drivers/misc/mei/pci-me.c @@ -82,6 +82,11 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP, mei_me_pch8_cfg)}, {MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP_2, mei_me_pch8_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_SPT, mei_me_pch8_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, mei_me_pch8_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_cfg)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_cfg)}, + /* required last entry */ {0, } }; @@ -128,6 +133,7 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent) const struct mei_cfg *cfg = (struct mei_cfg *)(ent->driver_data); struct mei_device *dev; struct mei_me_hw *hw; + unsigned int irqflags; int err; @@ -180,17 +186,12 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_enable_msi(pdev); /* request and enable interrupt */ - if (pci_dev_msi_enabled(pdev)) - err = request_threaded_irq(pdev->irq, - NULL, - mei_me_irq_thread_handler, - IRQF_ONESHOT, KBUILD_MODNAME, dev); - else - err = request_threaded_irq(pdev->irq, + irqflags = pci_dev_msi_enabled(pdev) ? IRQF_ONESHOT : IRQF_SHARED; + + err = request_threaded_irq(pdev->irq, mei_me_irq_quick_handler, mei_me_irq_thread_handler, - IRQF_SHARED, KBUILD_MODNAME, dev); - + irqflags, KBUILD_MODNAME, dev); if (err) { dev_err(&pdev->dev, "request_threaded_irq failure. irq = %d\n", pdev->irq); @@ -319,6 +320,7 @@ static int mei_me_pci_resume(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); struct mei_device *dev; + unsigned int irqflags; int err; dev = pci_get_drvdata(pdev); @@ -327,17 +329,13 @@ static int mei_me_pci_resume(struct device *device) pci_enable_msi(pdev); + irqflags = pci_dev_msi_enabled(pdev) ? IRQF_ONESHOT : IRQF_SHARED; + /* request and enable interrupt */ - if (pci_dev_msi_enabled(pdev)) - err = request_threaded_irq(pdev->irq, - NULL, - mei_me_irq_thread_handler, - IRQF_ONESHOT, KBUILD_MODNAME, dev); - else - err = request_threaded_irq(pdev->irq, + err = request_threaded_irq(pdev->irq, mei_me_irq_quick_handler, mei_me_irq_thread_handler, - IRQF_SHARED, KBUILD_MODNAME, dev); + irqflags, KBUILD_MODNAME, dev); if (err) { dev_err(&pdev->dev, "request_threaded_irq failed: irq = %d.\n", diff --git a/kernel/drivers/misc/mei/pci-txe.c b/kernel/drivers/misc/mei/pci-txe.c index dcfcba44b..0882c0201 100644 --- a/kernel/drivers/misc/mei/pci-txe.c +++ b/kernel/drivers/misc/mei/pci-txe.c @@ -338,7 +338,7 @@ static int mei_txe_pm_runtime_suspend(struct device *device) * However if device is not wakeable we do not enter * D-low state and we need to keep the interrupt kicking */ - if (!ret && pci_dev_run_wake(pdev)) + if (!ret && pci_dev_run_wake(pdev)) mei_disable_interrupts(dev); dev_dbg(&pdev->dev, "rpm: txe: runtime suspend ret=%d\n", ret); diff --git a/kernel/drivers/misc/mei/wd.c b/kernel/drivers/misc/mei/wd.c index 2725f865c..b34663883 100644 --- a/kernel/drivers/misc/mei/wd.c +++ b/kernel/drivers/misc/mei/wd.c @@ -50,15 +50,15 @@ static void mei_wd_set_start_timeout(struct mei_device *dev, u16 timeout) * mei_wd_host_init - connect to the watchdog client * * @dev: the device structure + * @me_cl: me client * * Return: -ENOTTY if wd client cannot be found * -EIO if write has failed * 0 on success */ -int mei_wd_host_init(struct mei_device *dev) +int mei_wd_host_init(struct mei_device *dev, struct mei_me_client *me_cl) { struct mei_cl *cl = &dev->wd_cl; - struct mei_me_client *me_cl; int ret; mei_cl_init(cl, dev); @@ -66,27 +66,13 @@ int mei_wd_host_init(struct mei_device *dev) dev->wd_timeout = MEI_WD_DEFAULT_TIMEOUT; dev->wd_state = MEI_WD_IDLE; - - /* check for valid client id */ - me_cl = mei_me_cl_by_uuid(dev, &mei_wd_guid); - if (!me_cl) { - dev_info(dev->dev, "wd: failed to find the client\n"); - return -ENOTTY; - } - - cl->me_client_id = me_cl->client_id; - cl->cl_uuid = me_cl->props.protocol_name; - mei_me_cl_put(me_cl); - ret = mei_cl_link(cl, MEI_WD_HOST_CLIENT_ID); - if (ret < 0) { dev_info(dev->dev, "wd: failed link client\n"); return ret; } - ret = mei_cl_connect(cl, NULL); - + ret = mei_cl_connect(cl, me_cl, NULL); if (ret) { dev_err(dev->dev, "wd: failed to connect = %d\n", ret); mei_cl_unlink(cl); @@ -118,7 +104,7 @@ int mei_wd_send(struct mei_device *dev) int ret; hdr.host_addr = cl->host_client_id; - hdr.me_addr = cl->me_client_id; + hdr.me_addr = mei_cl_me_id(cl); hdr.msg_complete = 1; hdr.reserved = 0; hdr.internal = 0; @@ -378,6 +364,7 @@ int mei_watchdog_register(struct mei_device *dev) int ret; + amt_wd_dev.parent = dev->dev; /* unlock to perserve correct locking order */ mutex_unlock(&dev->device_lock); ret = watchdog_register_device(&amt_wd_dev); diff --git a/kernel/drivers/misc/mic/Kconfig b/kernel/drivers/misc/mic/Kconfig index cc4eef040..40677df7f 100644 --- a/kernel/drivers/misc/mic/Kconfig +++ b/kernel/drivers/misc/mic/Kconfig @@ -15,11 +15,28 @@ config INTEL_MIC_BUS OS and tools for MIC to use with this driver are available from <http://software.intel.com/en-us/mic-developer>. +comment "SCIF Bus Driver" + +config SCIF_BUS + tristate "SCIF Bus Driver" + depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS + help + This option is selected by any driver which registers a + device or driver on the SCIF Bus, such as CONFIG_INTEL_MIC_HOST + and CONFIG_INTEL_MIC_CARD. + + If you are building a host/card kernel with an Intel MIC device + then say M (recommended) or Y, else say N. If unsure say N. + + More information about the Intel MIC family as well as the Linux + OS and tools for MIC to use with this driver are available from + <http://software.intel.com/en-us/mic-developer>. + comment "Intel MIC Host Driver" config INTEL_MIC_HOST tristate "Intel MIC Host Driver" - depends on 64BIT && PCI && X86 && INTEL_MIC_BUS + depends on 64BIT && PCI && X86 && INTEL_MIC_BUS && SCIF_BUS && MIC_COSM select VHOST_RING help This enables Host Driver support for the Intel Many Integrated @@ -39,7 +56,7 @@ comment "Intel MIC Card Driver" config INTEL_MIC_CARD tristate "Intel MIC Card Driver" - depends on 64BIT && X86 && INTEL_MIC_BUS + depends on 64BIT && X86 && INTEL_MIC_BUS && SCIF_BUS && MIC_COSM select VIRTIO help This enables card driver support for the Intel Many Integrated @@ -52,3 +69,41 @@ config INTEL_MIC_CARD For more information see <http://software.intel.com/en-us/mic-developer>. + +comment "SCIF Driver" + +config SCIF + tristate "SCIF Driver" + depends on 64BIT && PCI && X86 && SCIF_BUS && IOMMU_SUPPORT + select IOMMU_IOVA + help + This enables SCIF Driver support for the Intel Many Integrated + Core (MIC) family of PCIe form factor coprocessor devices that + run a 64 bit Linux OS. The Symmetric Communication Interface + (SCIF (pronounced as skiff)) is a low level communications API + across PCIe currently implemented for MIC. + + If you are building a host kernel with an Intel MIC device then + say M (recommended) or Y, else say N. If unsure say N. + + More information about the Intel MIC family as well as the Linux + OS and tools for MIC to use with this driver are available from + <http://software.intel.com/en-us/mic-developer>. + +comment "Intel MIC Coprocessor State Management (COSM) Drivers" + +config MIC_COSM + tristate "Intel MIC Coprocessor State Management (COSM) Drivers" + depends on 64BIT && PCI && X86 && SCIF + help + This enables COSM driver support for the Intel Many + Integrated Core (MIC) family of PCIe form factor coprocessor + devices. COSM drivers implement functions such as boot, + shutdown, reset and reboot of MIC devices. + + If you are building a host kernel with an Intel MIC device then + say M (recommended) or Y, else say N. If unsure say N. + + More information about the Intel MIC family as well as the Linux + OS and tools for MIC to use with this driver are available from + <http://software.intel.com/en-us/mic-developer>. diff --git a/kernel/drivers/misc/mic/Makefile b/kernel/drivers/misc/mic/Makefile index e9bf14875..e288a1106 100644 --- a/kernel/drivers/misc/mic/Makefile +++ b/kernel/drivers/misc/mic/Makefile @@ -4,4 +4,7 @@ # obj-$(CONFIG_INTEL_MIC_HOST) += host/ obj-$(CONFIG_INTEL_MIC_CARD) += card/ -obj-$(CONFIG_INTEL_MIC_BUS) += bus/ +obj-y += bus/ +obj-$(CONFIG_SCIF) += scif/ +obj-$(CONFIG_MIC_COSM) += cosm/ +obj-$(CONFIG_MIC_COSM) += cosm_client/ diff --git a/kernel/drivers/misc/mic/bus/Makefile b/kernel/drivers/misc/mic/bus/Makefile index d85c7f2a0..761842b0d 100644 --- a/kernel/drivers/misc/mic/bus/Makefile +++ b/kernel/drivers/misc/mic/bus/Makefile @@ -3,3 +3,5 @@ # Copyright(c) 2014, Intel Corporation. # obj-$(CONFIG_INTEL_MIC_BUS) += mic_bus.o +obj-$(CONFIG_SCIF_BUS) += scif_bus.o +obj-$(CONFIG_MIC_COSM) += cosm_bus.o diff --git a/kernel/drivers/misc/mic/bus/cosm_bus.c b/kernel/drivers/misc/mic/bus/cosm_bus.c new file mode 100644 index 000000000..d31d6c6e6 --- /dev/null +++ b/kernel/drivers/misc/mic/bus/cosm_bus.c @@ -0,0 +1,141 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC COSM Bus Driver + */ +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/idr.h> +#include "cosm_bus.h" + +/* Unique numbering for cosm devices. */ +static DEFINE_IDA(cosm_index_ida); + +static int cosm_dev_probe(struct device *d) +{ + struct cosm_device *dev = dev_to_cosm(d); + struct cosm_driver *drv = drv_to_cosm(dev->dev.driver); + + return drv->probe(dev); +} + +static int cosm_dev_remove(struct device *d) +{ + struct cosm_device *dev = dev_to_cosm(d); + struct cosm_driver *drv = drv_to_cosm(dev->dev.driver); + + drv->remove(dev); + return 0; +} + +static struct bus_type cosm_bus = { + .name = "cosm_bus", + .probe = cosm_dev_probe, + .remove = cosm_dev_remove, +}; + +int cosm_register_driver(struct cosm_driver *driver) +{ + driver->driver.bus = &cosm_bus; + return driver_register(&driver->driver); +} +EXPORT_SYMBOL_GPL(cosm_register_driver); + +void cosm_unregister_driver(struct cosm_driver *driver) +{ + driver_unregister(&driver->driver); +} +EXPORT_SYMBOL_GPL(cosm_unregister_driver); + +static inline void cosm_release_dev(struct device *d) +{ + struct cosm_device *cdev = dev_to_cosm(d); + + kfree(cdev); +} + +struct cosm_device * +cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops) +{ + struct cosm_device *cdev; + int ret; + + cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); + if (!cdev) + return ERR_PTR(-ENOMEM); + + cdev->dev.parent = pdev; + cdev->dev.release = cosm_release_dev; + cdev->hw_ops = hw_ops; + dev_set_drvdata(&cdev->dev, cdev); + cdev->dev.bus = &cosm_bus; + + /* Assign a unique device index and hence name */ + ret = ida_simple_get(&cosm_index_ida, 0, 0, GFP_KERNEL); + if (ret < 0) + goto free_cdev; + + cdev->index = ret; + cdev->dev.id = ret; + dev_set_name(&cdev->dev, "cosm-dev%u", cdev->index); + + ret = device_register(&cdev->dev); + if (ret) + goto ida_remove; + return cdev; +ida_remove: + ida_simple_remove(&cosm_index_ida, cdev->index); +free_cdev: + put_device(&cdev->dev); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(cosm_register_device); + +void cosm_unregister_device(struct cosm_device *dev) +{ + int index = dev->index; /* save for after device release */ + + device_unregister(&dev->dev); + ida_simple_remove(&cosm_index_ida, index); +} +EXPORT_SYMBOL_GPL(cosm_unregister_device); + +struct cosm_device *cosm_find_cdev_by_id(int id) +{ + struct device *dev = subsys_find_device_by_id(&cosm_bus, id, NULL); + + return dev ? container_of(dev, struct cosm_device, dev) : NULL; +} +EXPORT_SYMBOL_GPL(cosm_find_cdev_by_id); + +static int __init cosm_init(void) +{ + return bus_register(&cosm_bus); +} + +static void __exit cosm_exit(void) +{ + bus_unregister(&cosm_bus); + ida_destroy(&cosm_index_ida); +} + +core_initcall(cosm_init); +module_exit(cosm_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) MIC card OS state management bus driver"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/mic/bus/cosm_bus.h b/kernel/drivers/misc/mic/bus/cosm_bus.h new file mode 100644 index 000000000..f7c57f266 --- /dev/null +++ b/kernel/drivers/misc/mic/bus/cosm_bus.h @@ -0,0 +1,134 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC COSM Bus Driver + */ +#ifndef _COSM_BUS_H_ +#define _COSM_BUS_H_ + +#include <linux/scif.h> +#include <linux/mic_common.h> +#include "../common/mic_dev.h" + +/** + * cosm_device - representation of a cosm device + * + * @attr_group: Pointer to list of sysfs attribute groups. + * @sdev: Device for sysfs entries. + * @state: MIC state. + * @shutdown_status: MIC status reported by card for shutdown/crashes. + * @shutdown_status_int: Internal shutdown status maintained by the driver + * @cosm_mutex: Mutex for synchronizing access to data structures. + * @reset_trigger_work: Work for triggering reset requests. + * @scif_work: Work for handling per device SCIF connections + * @cmdline: Kernel command line. + * @firmware: Firmware file name. + * @ramdisk: Ramdisk file name. + * @bootmode: Boot mode i.e. "linux" or "elf" for flash updates. + * @log_buf_addr: Log buffer address for MIC. + * @log_buf_len: Log buffer length address for MIC. + * @state_sysfs: Sysfs dirent for notifying ring 3 about MIC state changes. + * @hw_ops: the hardware bus ops for this device. + * @dev: underlying device. + * @index: unique position on the cosm bus + * @dbg_dir: debug fs directory + * @newepd: new endpoint from scif accept to be assigned to this cdev + * @epd: SCIF endpoint for this cdev + * @heartbeat_watchdog_enable: if heartbeat watchdog is enabled for this cdev + * @sysfs_heartbeat_enable: sysfs setting for disabling heartbeat notification + */ +struct cosm_device { + const struct attribute_group **attr_group; + struct device *sdev; + u8 state; + u8 shutdown_status; + u8 shutdown_status_int; + struct mutex cosm_mutex; + struct work_struct reset_trigger_work; + struct work_struct scif_work; + char *cmdline; + char *firmware; + char *ramdisk; + char *bootmode; + void *log_buf_addr; + int *log_buf_len; + struct kernfs_node *state_sysfs; + struct cosm_hw_ops *hw_ops; + struct device dev; + int index; + struct dentry *dbg_dir; + scif_epd_t newepd; + scif_epd_t epd; + bool heartbeat_watchdog_enable; + bool sysfs_heartbeat_enable; +}; + +/** + * cosm_driver - operations for a cosm driver + * + * @driver: underlying device driver (populate name and owner). + * @probe: the function to call when a device is found. Returns 0 or -errno. + * @remove: the function to call when a device is removed. + */ +struct cosm_driver { + struct device_driver driver; + int (*probe)(struct cosm_device *dev); + void (*remove)(struct cosm_device *dev); +}; + +/** + * cosm_hw_ops - cosm bus ops + * + * @reset: trigger MIC reset + * @force_reset: force MIC reset + * @post_reset: inform MIC reset is complete + * @ready: is MIC ready for OS download + * @start: boot MIC + * @stop: prepare MIC for reset + * @family: return MIC HW family string + * @stepping: return MIC HW stepping string + * @aper: return MIC PCIe aperture + */ +struct cosm_hw_ops { + void (*reset)(struct cosm_device *cdev); + void (*force_reset)(struct cosm_device *cdev); + void (*post_reset)(struct cosm_device *cdev, enum mic_states state); + bool (*ready)(struct cosm_device *cdev); + int (*start)(struct cosm_device *cdev, int id); + void (*stop)(struct cosm_device *cdev, bool force); + ssize_t (*family)(struct cosm_device *cdev, char *buf); + ssize_t (*stepping)(struct cosm_device *cdev, char *buf); + struct mic_mw *(*aper)(struct cosm_device *cdev); +}; + +struct cosm_device * +cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops); +void cosm_unregister_device(struct cosm_device *dev); +int cosm_register_driver(struct cosm_driver *drv); +void cosm_unregister_driver(struct cosm_driver *drv); +struct cosm_device *cosm_find_cdev_by_id(int id); + +static inline struct cosm_device *dev_to_cosm(struct device *dev) +{ + return container_of(dev, struct cosm_device, dev); +} + +static inline struct cosm_driver *drv_to_cosm(struct device_driver *drv) +{ + return container_of(drv, struct cosm_driver, driver); +} +#endif /* _COSM_BUS_H */ diff --git a/kernel/drivers/misc/mic/bus/mic_bus.c b/kernel/drivers/misc/mic/bus/mic_bus.c index 961ae90aa..be37890ab 100644 --- a/kernel/drivers/misc/mic/bus/mic_bus.c +++ b/kernel/drivers/misc/mic/bus/mic_bus.c @@ -25,9 +25,6 @@ #include <linux/idr.h> #include <linux/mic_bus.h> -/* Unique numbering for mbus devices. */ -static DEFINE_IDA(mbus_index_ida); - static ssize_t device_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -147,7 +144,8 @@ static void mbus_release_dev(struct device *d) struct mbus_device * mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops, - struct mbus_hw_ops *hw_ops, void __iomem *mmio_va) + struct mbus_hw_ops *hw_ops, int index, + void __iomem *mmio_va) { int ret; struct mbus_device *mbdev; @@ -166,13 +164,7 @@ mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops, mbdev->dev.release = mbus_release_dev; mbdev->hw_ops = hw_ops; mbdev->dev.bus = &mic_bus; - - /* Assign a unique device index and hence name. */ - ret = ida_simple_get(&mbus_index_ida, 0, 0, GFP_KERNEL); - if (ret < 0) - goto free_mbdev; - - mbdev->index = ret; + mbdev->index = index; dev_set_name(&mbdev->dev, "mbus-dev%u", mbdev->index); /* * device_register() causes the bus infrastructure to look for a @@ -180,22 +172,17 @@ mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops, */ ret = device_register(&mbdev->dev); if (ret) - goto ida_remove; + goto free_mbdev; return mbdev; -ida_remove: - ida_simple_remove(&mbus_index_ida, mbdev->index); free_mbdev: - kfree(mbdev); + put_device(&mbdev->dev); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(mbus_register_device); void mbus_unregister_device(struct mbus_device *mbdev) { - int index = mbdev->index; /* save for after device release */ - device_unregister(&mbdev->dev); - ida_simple_remove(&mbus_index_ida, index); } EXPORT_SYMBOL_GPL(mbus_unregister_device); @@ -207,7 +194,6 @@ static int __init mbus_init(void) static void __exit mbus_exit(void) { bus_unregister(&mic_bus); - ida_destroy(&mbus_index_ida); } core_initcall(mbus_init); diff --git a/kernel/drivers/misc/mic/bus/scif_bus.c b/kernel/drivers/misc/mic/bus/scif_bus.c new file mode 100644 index 000000000..ff6e01c25 --- /dev/null +++ b/kernel/drivers/misc/mic/bus/scif_bus.c @@ -0,0 +1,209 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel Symmetric Communications Interface Bus driver. + */ +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/idr.h> +#include <linux/dma-mapping.h> + +#include "scif_bus.h" + +static ssize_t device_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct scif_hw_dev *dev = dev_to_scif(d); + + return sprintf(buf, "0x%04x\n", dev->id.device); +} +static DEVICE_ATTR_RO(device); + +static ssize_t vendor_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct scif_hw_dev *dev = dev_to_scif(d); + + return sprintf(buf, "0x%04x\n", dev->id.vendor); +} +static DEVICE_ATTR_RO(vendor); + +static ssize_t modalias_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct scif_hw_dev *dev = dev_to_scif(d); + + return sprintf(buf, "scif:d%08Xv%08X\n", + dev->id.device, dev->id.vendor); +} +static DEVICE_ATTR_RO(modalias); + +static struct attribute *scif_dev_attrs[] = { + &dev_attr_device.attr, + &dev_attr_vendor.attr, + &dev_attr_modalias.attr, + NULL, +}; +ATTRIBUTE_GROUPS(scif_dev); + +static inline int scif_id_match(const struct scif_hw_dev *dev, + const struct scif_hw_dev_id *id) +{ + if (id->device != dev->id.device && id->device != SCIF_DEV_ANY_ID) + return 0; + + return id->vendor == SCIF_DEV_ANY_ID || id->vendor == dev->id.vendor; +} + +/* + * This looks through all the IDs a driver claims to support. If any of them + * match, we return 1 and the kernel will call scif_dev_probe(). + */ +static int scif_dev_match(struct device *dv, struct device_driver *dr) +{ + unsigned int i; + struct scif_hw_dev *dev = dev_to_scif(dv); + const struct scif_hw_dev_id *ids; + + ids = drv_to_scif(dr)->id_table; + for (i = 0; ids[i].device; i++) + if (scif_id_match(dev, &ids[i])) + return 1; + return 0; +} + +static int scif_uevent(struct device *dv, struct kobj_uevent_env *env) +{ + struct scif_hw_dev *dev = dev_to_scif(dv); + + return add_uevent_var(env, "MODALIAS=scif:d%08Xv%08X", + dev->id.device, dev->id.vendor); +} + +static int scif_dev_probe(struct device *d) +{ + struct scif_hw_dev *dev = dev_to_scif(d); + struct scif_driver *drv = drv_to_scif(dev->dev.driver); + + return drv->probe(dev); +} + +static int scif_dev_remove(struct device *d) +{ + struct scif_hw_dev *dev = dev_to_scif(d); + struct scif_driver *drv = drv_to_scif(dev->dev.driver); + + drv->remove(dev); + return 0; +} + +static struct bus_type scif_bus = { + .name = "scif_bus", + .match = scif_dev_match, + .dev_groups = scif_dev_groups, + .uevent = scif_uevent, + .probe = scif_dev_probe, + .remove = scif_dev_remove, +}; + +int scif_register_driver(struct scif_driver *driver) +{ + driver->driver.bus = &scif_bus; + return driver_register(&driver->driver); +} +EXPORT_SYMBOL_GPL(scif_register_driver); + +void scif_unregister_driver(struct scif_driver *driver) +{ + driver_unregister(&driver->driver); +} +EXPORT_SYMBOL_GPL(scif_unregister_driver); + +static void scif_release_dev(struct device *d) +{ + struct scif_hw_dev *sdev = dev_to_scif(d); + + kfree(sdev); +} + +struct scif_hw_dev * +scif_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops, + struct scif_hw_ops *hw_ops, u8 dnode, u8 snode, + struct mic_mw *mmio, struct mic_mw *aper, void *dp, + void __iomem *rdp, struct dma_chan **chan, int num_chan, + bool card_rel_da) +{ + int ret; + struct scif_hw_dev *sdev; + + sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); + if (!sdev) + return ERR_PTR(-ENOMEM); + + sdev->dev.parent = pdev; + sdev->id.device = id; + sdev->id.vendor = SCIF_DEV_ANY_ID; + sdev->dev.archdata.dma_ops = dma_ops; + sdev->dev.release = scif_release_dev; + sdev->hw_ops = hw_ops; + sdev->dnode = dnode; + sdev->snode = snode; + dev_set_drvdata(&sdev->dev, sdev); + sdev->dev.bus = &scif_bus; + sdev->mmio = mmio; + sdev->aper = aper; + sdev->dp = dp; + sdev->rdp = rdp; + sdev->dev.dma_mask = &sdev->dev.coherent_dma_mask; + dma_set_mask(&sdev->dev, DMA_BIT_MASK(64)); + sdev->dma_ch = chan; + sdev->num_dma_ch = num_chan; + sdev->card_rel_da = card_rel_da; + dev_set_name(&sdev->dev, "scif-dev%u", sdev->dnode); + /* + * device_register() causes the bus infrastructure to look for a + * matching driver. + */ + ret = device_register(&sdev->dev); + if (ret) + goto free_sdev; + return sdev; +free_sdev: + put_device(&sdev->dev); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(scif_register_device); + +void scif_unregister_device(struct scif_hw_dev *sdev) +{ + device_unregister(&sdev->dev); +} +EXPORT_SYMBOL_GPL(scif_unregister_device); + +static int __init scif_init(void) +{ + return bus_register(&scif_bus); +} + +static void __exit scif_exit(void) +{ + bus_unregister(&scif_bus); +} + +core_initcall(scif_init); +module_exit(scif_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) SCIF Bus driver"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/mic/bus/scif_bus.h b/kernel/drivers/misc/mic/bus/scif_bus.h new file mode 100644 index 000000000..94f29ac60 --- /dev/null +++ b/kernel/drivers/misc/mic/bus/scif_bus.h @@ -0,0 +1,133 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel Symmetric Communications Interface Bus driver. + */ +#ifndef _SCIF_BUS_H_ +#define _SCIF_BUS_H_ +/* + * Everything a scif driver needs to work with any particular scif + * hardware abstraction layer. + */ +#include <linux/dma-mapping.h> + +#include <linux/mic_common.h> +#include "../common/mic_dev.h" + +struct scif_hw_dev_id { + u32 device; + u32 vendor; +}; + +#define MIC_SCIF_DEV 1 +#define SCIF_DEV_ANY_ID 0xffffffff + +/** + * scif_hw_dev - representation of a hardware device abstracted for scif + * @hw_ops: the hardware ops supported by this device + * @id: the device type identification (used to match it with a driver) + * @mmio: MMIO memory window + * @aper: Aperture memory window + * @dev: underlying device + * @dnode - The destination node which this device will communicate with. + * @snode - The source node for this device. + * @dp - Self device page + * @rdp - Remote device page + * @dma_ch - Array of DMA channels + * @num_dma_ch - Number of DMA channels available + * @card_rel_da - Set to true if DMA addresses programmed in the DMA engine + * are relative to the card point of view + */ +struct scif_hw_dev { + struct scif_hw_ops *hw_ops; + struct scif_hw_dev_id id; + struct mic_mw *mmio; + struct mic_mw *aper; + struct device dev; + u8 dnode; + u8 snode; + void *dp; + void __iomem *rdp; + struct dma_chan **dma_ch; + int num_dma_ch; + bool card_rel_da; +}; + +/** + * scif_driver - operations for a scif I/O driver + * @driver: underlying device driver (populate name and owner). + * @id_table: the ids serviced by this driver. + * @probe: the function to call when a device is found. Returns 0 or -errno. + * @remove: the function to call when a device is removed. + */ +struct scif_driver { + struct device_driver driver; + const struct scif_hw_dev_id *id_table; + int (*probe)(struct scif_hw_dev *dev); + void (*remove)(struct scif_hw_dev *dev); +}; + +/** + * scif_hw_ops - Hardware operations for accessing a SCIF device on the SCIF bus. + * + * @next_db: Obtain the next available doorbell. + * @request_irq: Request an interrupt on a particular doorbell. + * @free_irq: Free an interrupt requested previously. + * @ack_interrupt: acknowledge an interrupt in the ISR. + * @send_intr: Send an interrupt to the remote node on a specified doorbell. + * @send_p2p_intr: Send an interrupt to the peer node on a specified doorbell + * which is specifically targeted for a peer to peer node. + * @ioremap: Map a buffer with the specified physical address and length. + * @iounmap: Unmap a buffer previously mapped. + */ +struct scif_hw_ops { + int (*next_db)(struct scif_hw_dev *sdev); + struct mic_irq * (*request_irq)(struct scif_hw_dev *sdev, + irqreturn_t (*func)(int irq, + void *data), + const char *name, void *data, + int db); + void (*free_irq)(struct scif_hw_dev *sdev, + struct mic_irq *cookie, void *data); + void (*ack_interrupt)(struct scif_hw_dev *sdev, int num); + void (*send_intr)(struct scif_hw_dev *sdev, int db); + void (*send_p2p_intr)(struct scif_hw_dev *sdev, int db, + struct mic_mw *mw); + void __iomem * (*ioremap)(struct scif_hw_dev *sdev, + phys_addr_t pa, size_t len); + void (*iounmap)(struct scif_hw_dev *sdev, void __iomem *va); +}; + +int scif_register_driver(struct scif_driver *driver); +void scif_unregister_driver(struct scif_driver *driver); +struct scif_hw_dev * +scif_register_device(struct device *pdev, int id, + struct dma_map_ops *dma_ops, + struct scif_hw_ops *hw_ops, u8 dnode, u8 snode, + struct mic_mw *mmio, struct mic_mw *aper, + void *dp, void __iomem *rdp, + struct dma_chan **chan, int num_chan, + bool card_rel_da); +void scif_unregister_device(struct scif_hw_dev *sdev); + +static inline struct scif_hw_dev *dev_to_scif(struct device *dev) +{ + return container_of(dev, struct scif_hw_dev, dev); +} + +static inline struct scif_driver *drv_to_scif(struct device_driver *drv) +{ + return container_of(drv, struct scif_driver, driver); +} +#endif /* _SCIF_BUS_H */ diff --git a/kernel/drivers/misc/mic/card/mic_device.c b/kernel/drivers/misc/mic/card/mic_device.c index 83819eee5..d0edaf7e0 100644 --- a/kernel/drivers/misc/mic/card/mic_device.c +++ b/kernel/drivers/misc/mic/card/mic_device.c @@ -28,6 +28,8 @@ #include <linux/pci.h> #include <linux/interrupt.h> #include <linux/reboot.h> +#include <linux/dmaengine.h> +#include <linux/kmod.h> #include <linux/mic_common.h> #include "../common/mic_dev.h" @@ -35,71 +37,6 @@ #include "mic_virtio.h" static struct mic_driver *g_drv; -static struct mic_irq *shutdown_cookie; - -static void mic_notify_host(u8 state) -{ - struct mic_driver *mdrv = g_drv; - struct mic_bootparam __iomem *bootparam = mdrv->dp; - - iowrite8(state, &bootparam->shutdown_status); - dev_dbg(mdrv->dev, "%s %d system_state %d\n", - __func__, __LINE__, state); - mic_send_intr(&mdrv->mdev, ioread8(&bootparam->c2h_shutdown_db)); -} - -static int mic_panic_event(struct notifier_block *this, unsigned long event, - void *ptr) -{ - struct mic_driver *mdrv = g_drv; - struct mic_bootparam __iomem *bootparam = mdrv->dp; - - iowrite8(-1, &bootparam->h2c_config_db); - iowrite8(-1, &bootparam->h2c_shutdown_db); - mic_notify_host(MIC_CRASHED); - return NOTIFY_DONE; -} - -static struct notifier_block mic_panic = { - .notifier_call = mic_panic_event, -}; - -static irqreturn_t mic_shutdown_isr(int irq, void *data) -{ - struct mic_driver *mdrv = g_drv; - struct mic_bootparam __iomem *bootparam = mdrv->dp; - - mic_ack_interrupt(&g_drv->mdev); - if (ioread8(&bootparam->shutdown_card)) - orderly_poweroff(true); - return IRQ_HANDLED; -} - -static int mic_shutdown_init(void) -{ - int rc = 0; - struct mic_driver *mdrv = g_drv; - struct mic_bootparam __iomem *bootparam = mdrv->dp; - int shutdown_db; - - shutdown_db = mic_next_card_db(); - shutdown_cookie = mic_request_card_irq(mic_shutdown_isr, NULL, - "Shutdown", mdrv, shutdown_db); - if (IS_ERR(shutdown_cookie)) - rc = PTR_ERR(shutdown_cookie); - else - iowrite8(shutdown_db, &bootparam->h2c_shutdown_db); - return rc; -} - -static void mic_shutdown_uninit(void) -{ - struct mic_driver *mdrv = g_drv; - struct mic_bootparam __iomem *bootparam = mdrv->dp; - - iowrite8(-1, &bootparam->h2c_shutdown_db); - mic_free_card_irq(shutdown_cookie, mdrv); -} static int __init mic_dp_init(void) { @@ -240,6 +177,111 @@ static void mic_uninit_irq(void) kfree(mdrv->irq_info.irq_usage_count); } +static inline struct mic_driver *scdev_to_mdrv(struct scif_hw_dev *scdev) +{ + return dev_get_drvdata(scdev->dev.parent); +} + +static struct mic_irq * +___mic_request_irq(struct scif_hw_dev *scdev, + irqreturn_t (*func)(int irq, void *data), + const char *name, void *data, + int db) +{ + return mic_request_card_irq(func, NULL, name, data, db); +} + +static void +___mic_free_irq(struct scif_hw_dev *scdev, + struct mic_irq *cookie, void *data) +{ + return mic_free_card_irq(cookie, data); +} + +static void ___mic_ack_interrupt(struct scif_hw_dev *scdev, int num) +{ + struct mic_driver *mdrv = scdev_to_mdrv(scdev); + + mic_ack_interrupt(&mdrv->mdev); +} + +static int ___mic_next_db(struct scif_hw_dev *scdev) +{ + return mic_next_card_db(); +} + +static void ___mic_send_intr(struct scif_hw_dev *scdev, int db) +{ + struct mic_driver *mdrv = scdev_to_mdrv(scdev); + + mic_send_intr(&mdrv->mdev, db); +} + +static void ___mic_send_p2p_intr(struct scif_hw_dev *scdev, int db, + struct mic_mw *mw) +{ + mic_send_p2p_intr(db, mw); +} + +static void __iomem * +___mic_ioremap(struct scif_hw_dev *scdev, + phys_addr_t pa, size_t len) +{ + struct mic_driver *mdrv = scdev_to_mdrv(scdev); + + return mic_card_map(&mdrv->mdev, pa, len); +} + +static void ___mic_iounmap(struct scif_hw_dev *scdev, void __iomem *va) +{ + struct mic_driver *mdrv = scdev_to_mdrv(scdev); + + mic_card_unmap(&mdrv->mdev, va); +} + +static struct scif_hw_ops scif_hw_ops = { + .request_irq = ___mic_request_irq, + .free_irq = ___mic_free_irq, + .ack_interrupt = ___mic_ack_interrupt, + .next_db = ___mic_next_db, + .send_intr = ___mic_send_intr, + .send_p2p_intr = ___mic_send_p2p_intr, + .ioremap = ___mic_ioremap, + .iounmap = ___mic_iounmap, +}; + +static int mic_request_dma_chans(struct mic_driver *mdrv) +{ + dma_cap_mask_t mask; + struct dma_chan *chan; + + request_module("mic_x100_dma"); + dma_cap_zero(mask); + dma_cap_set(DMA_MEMCPY, mask); + + do { + chan = dma_request_channel(mask, NULL, NULL); + if (chan) { + mdrv->dma_ch[mdrv->num_dma_ch++] = chan; + if (mdrv->num_dma_ch >= MIC_MAX_DMA_CHAN) + break; + } + } while (chan); + dev_info(mdrv->dev, "DMA channels # %d\n", mdrv->num_dma_ch); + return mdrv->num_dma_ch; +} + +static void mic_free_dma_chans(struct mic_driver *mdrv) +{ + int i = 0; + + for (i = 0; i < mdrv->num_dma_ch; i++) { + dma_release_channel(mdrv->dma_ch[i]); + mdrv->dma_ch[i] = NULL; + } + mdrv->num_dma_ch = 0; +} + /* * mic_driver_init - MIC driver initialization tasks. * @@ -248,13 +290,11 @@ static void mic_uninit_irq(void) int __init mic_driver_init(struct mic_driver *mdrv) { int rc; + struct mic_bootparam __iomem *bootparam; + u8 node_id; g_drv = mdrv; - /* - * Unloading the card module is not supported. The MIC card module - * handles fundamental operations like host/card initiated shutdowns - * and informing the host about card crashes and cannot be unloaded. - */ + /* Unloading the card module is not supported. */ if (!try_module_get(mdrv->dev->driver->owner)) { rc = -ENODEV; goto done; @@ -265,18 +305,31 @@ int __init mic_driver_init(struct mic_driver *mdrv) rc = mic_init_irq(); if (rc) goto dp_uninit; - rc = mic_shutdown_init(); - if (rc) + if (!mic_request_dma_chans(mdrv)) { + rc = -ENODEV; goto irq_uninit; + } rc = mic_devices_init(mdrv); if (rc) - goto shutdown_uninit; + goto dma_free; + bootparam = mdrv->dp; + node_id = ioread8(&bootparam->node_id); + mdrv->scdev = scif_register_device(mdrv->dev, MIC_SCIF_DEV, + NULL, &scif_hw_ops, + 0, node_id, &mdrv->mdev.mmio, NULL, + NULL, mdrv->dp, mdrv->dma_ch, + mdrv->num_dma_ch, true); + if (IS_ERR(mdrv->scdev)) { + rc = PTR_ERR(mdrv->scdev); + goto device_uninit; + } mic_create_card_debug_dir(mdrv); - atomic_notifier_chain_register(&panic_notifier_list, &mic_panic); done: return rc; -shutdown_uninit: - mic_shutdown_uninit(); +device_uninit: + mic_devices_uninit(mdrv); +dma_free: + mic_free_dma_chans(mdrv); irq_uninit: mic_uninit_irq(); dp_uninit: @@ -294,14 +347,9 @@ put: void mic_driver_uninit(struct mic_driver *mdrv) { mic_delete_card_debug_dir(mdrv); + scif_unregister_device(mdrv->scdev); mic_devices_uninit(mdrv); - /* - * Inform the host about the shutdown status i.e. poweroff/restart etc. - * The module cannot be unloaded so the only code path to call - * mic_devices_uninit(..) is the shutdown callback. - */ - mic_notify_host(system_state); - mic_shutdown_uninit(); + mic_free_dma_chans(mdrv); mic_uninit_irq(); mic_dp_uninit(); module_put(mdrv->dev->driver->owner); diff --git a/kernel/drivers/misc/mic/card/mic_device.h b/kernel/drivers/misc/mic/card/mic_device.h index 844be8fc9..1dbf83c41 100644 --- a/kernel/drivers/misc/mic/card/mic_device.h +++ b/kernel/drivers/misc/mic/card/mic_device.h @@ -29,9 +29,9 @@ #include <linux/workqueue.h> #include <linux/io.h> -#include <linux/irqreturn.h> #include <linux/interrupt.h> #include <linux/mic_bus.h> +#include "../bus/scif_bus.h" /** * struct mic_intr_info - Contains h/w specific interrupt sources info @@ -73,6 +73,9 @@ struct mic_device { * @irq_info: The OS specific irq information * @intr_info: H/W specific interrupt information. * @dma_mbdev: dma device on the MIC virtual bus. + * @dma_ch - Array of DMA channels + * @num_dma_ch - Number of DMA channels available + * @scdev: SCIF device on the SCIF virtual bus. */ struct mic_driver { char name[20]; @@ -84,6 +87,9 @@ struct mic_driver { struct mic_irq_info irq_info; struct mic_intr_info intr_info; struct mbus_device *dma_mbdev; + struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN]; + int num_dma_ch; + struct scif_hw_dev *scdev; }; /** @@ -122,10 +128,11 @@ void mic_driver_uninit(struct mic_driver *mdrv); int mic_next_card_db(void); struct mic_irq * mic_request_card_irq(irq_handler_t handler, irq_handler_t thread_fn, - const char *name, void *data, int intr_src); + const char *name, void *data, int db); void mic_free_card_irq(struct mic_irq *cookie, void *data); u32 mic_read_spad(struct mic_device *mdev, unsigned int idx); void mic_send_intr(struct mic_device *mdev, int doorbell); +void mic_send_p2p_intr(int doorbell, struct mic_mw *mw); int mic_db_to_irq(struct mic_driver *mdrv, int db); u32 mic_ack_interrupt(struct mic_device *mdev); void mic_hw_intr_init(struct mic_driver *mdrv); diff --git a/kernel/drivers/misc/mic/card/mic_x100.c b/kernel/drivers/misc/mic/card/mic_x100.c index e98e537d6..b2958ce23 100644 --- a/kernel/drivers/misc/mic/card/mic_x100.c +++ b/kernel/drivers/misc/mic/card/mic_x100.c @@ -70,6 +70,41 @@ void mic_send_intr(struct mic_device *mdev, int doorbell) (MIC_X100_SBOX_SDBIC0 + (4 * doorbell))); } +/* + * mic_x100_send_sbox_intr - Send an MIC_X100_SBOX interrupt to MIC. + */ +static void mic_x100_send_sbox_intr(struct mic_mw *mw, int doorbell) +{ + u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8; + u32 apicicr_low = mic_mmio_read(mw, MIC_X100_SBOX_BASE_ADDRESS + + apic_icr_offset); + + /* for MIC we need to make sure we "hit" the send_icr bit (13) */ + apicicr_low = (apicicr_low | (1 << 13)); + /* + * Ensure that the interrupt is ordered w.r.t. previous stores + * to main memory. Fence instructions are not implemented in X100 + * since execution is in order but a compiler barrier is still + * required. + */ + wmb(); + mic_mmio_write(mw, apicicr_low, + MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset); +} + +static void mic_x100_send_rdmasr_intr(struct mic_mw *mw, int doorbell) +{ + int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2); + /* + * Ensure that the interrupt is ordered w.r.t. previous stores + * to main memory. Fence instructions are not implemented in X100 + * since execution is in order but a compiler barrier is still + * required. + */ + wmb(); + mic_mmio_write(mw, 0, MIC_X100_SBOX_BASE_ADDRESS + rdmasr_offset); +} + /** * mic_ack_interrupt - Device specific interrupt handling. * @mdev: pointer to mic_device instance @@ -91,6 +126,18 @@ static inline int mic_get_rdmasr_irq(int index) return MIC_X100_RDMASR_IRQ_BASE + index; } +void mic_send_p2p_intr(int db, struct mic_mw *mw) +{ + int rdmasr_index; + + if (db < MIC_X100_NUM_SBOX_IRQ) { + mic_x100_send_sbox_intr(mw, db); + } else { + rdmasr_index = db - MIC_X100_NUM_SBOX_IRQ; + mic_x100_send_rdmasr_intr(mw, rdmasr_index); + } +} + /** * mic_hw_intr_init - Initialize h/w specific interrupt * information. @@ -113,11 +160,15 @@ void mic_hw_intr_init(struct mic_driver *mdrv) int mic_db_to_irq(struct mic_driver *mdrv, int db) { int rdmasr_index; + + /* + * The total number of doorbell interrupts on the card are 16. Indices + * 0-8 falls in the SBOX category and 8-15 fall in the RDMASR category. + */ if (db < MIC_X100_NUM_SBOX_IRQ) { return mic_get_sbox_irq(db); } else { - rdmasr_index = db - MIC_X100_NUM_SBOX_IRQ + - MIC_X100_RDMASR_IRQ_BASE; + rdmasr_index = db - MIC_X100_NUM_SBOX_IRQ; return mic_get_rdmasr_irq(rdmasr_index); } } @@ -210,7 +261,7 @@ static int __init mic_probe(struct platform_device *pdev) mic_hw_intr_init(mdrv); platform_set_drvdata(pdev, mdrv); mdrv->dma_mbdev = mbus_register_device(mdrv->dev, MBUS_DEV_DMA_MIC, - NULL, &mbus_hw_ops, + NULL, &mbus_hw_ops, 0, mdrv->mdev.mmio.va); if (IS_ERR(mdrv->dma_mbdev)) { rc = PTR_ERR(mdrv->dma_mbdev); @@ -243,10 +294,16 @@ static void mic_platform_shutdown(struct platform_device *pdev) mic_remove(pdev); } +static u64 mic_dma_mask = DMA_BIT_MASK(64); + static struct platform_device mic_platform_dev = { .name = mic_driver_name, .id = 0, .num_resources = 0, + .dev = { + .dma_mask = &mic_dma_mask, + .coherent_dma_mask = DMA_BIT_MASK(64), + }, }; static struct platform_driver __refdata mic_platform_driver = { diff --git a/kernel/drivers/misc/mic/card/mic_x100.h b/kernel/drivers/misc/mic/card/mic_x100.h index d66ea5563..7e2224934 100644 --- a/kernel/drivers/misc/mic/card/mic_x100.h +++ b/kernel/drivers/misc/mic/card/mic_x100.h @@ -35,6 +35,7 @@ #define MIC_X100_SBOX_SDBIC0 0x0000CC90 #define MIC_X100_SBOX_SDBIC0_DBREQ_BIT 0x80000000 #define MIC_X100_SBOX_RDMASR0 0x0000B180 +#define MIC_X100_SBOX_APICICR0 0x0000A9D0 #define MIC_X100_MAX_DOORBELL_IDX 8 diff --git a/kernel/drivers/misc/mic/common/mic_dev.h b/kernel/drivers/misc/mic/common/mic_dev.h index 92999c2bb..50776772e 100644 --- a/kernel/drivers/misc/mic/common/mic_dev.h +++ b/kernel/drivers/misc/mic/common/mic_dev.h @@ -21,6 +21,19 @@ #ifndef __MIC_DEV_H__ #define __MIC_DEV_H__ +/* The maximum number of MIC devices supported in a single host system. */ +#define MIC_MAX_NUM_DEVS 128 + +/** + * enum mic_hw_family - The hardware family to which a device belongs. + */ +enum mic_hw_family { + MIC_FAMILY_X100 = 0, + MIC_FAMILY_X200, + MIC_FAMILY_UNKNOWN, + MIC_FAMILY_LAST +}; + /** * struct mic_mw - MIC memory window * @@ -48,4 +61,7 @@ struct mic_mw { #define MIC_VIRTIO_PARAM_DEV_REMOVE 0x1 #define MIC_VIRTIO_PARAM_CONFIG_CHANGED 0x2 +/* Maximum number of DMA channels */ +#define MIC_MAX_DMA_CHAN 4 + #endif diff --git a/kernel/drivers/misc/mic/cosm/Makefile b/kernel/drivers/misc/mic/cosm/Makefile new file mode 100644 index 000000000..b85d4d49d --- /dev/null +++ b/kernel/drivers/misc/mic/cosm/Makefile @@ -0,0 +1,10 @@ +# +# Makefile - Intel MIC Coprocessor State Management (COSM) Driver +# Copyright(c) 2015, Intel Corporation. +# +obj-$(CONFIG_MIC_COSM) += mic_cosm.o + +mic_cosm-objs := cosm_main.o +mic_cosm-objs += cosm_debugfs.o +mic_cosm-objs += cosm_sysfs.o +mic_cosm-objs += cosm_scif_server.o diff --git a/kernel/drivers/misc/mic/cosm/cosm_debugfs.c b/kernel/drivers/misc/mic/cosm/cosm_debugfs.c new file mode 100644 index 000000000..216cb3cd2 --- /dev/null +++ b/kernel/drivers/misc/mic/cosm/cosm_debugfs.c @@ -0,0 +1,156 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Coprocessor State Management (COSM) Driver + * + */ + +#include <linux/debugfs.h> +#include <linux/slab.h> +#include <linux/io.h> +#include "cosm_main.h" + +/* Debugfs parent dir */ +static struct dentry *cosm_dbg; + +/** + * cosm_log_buf_show - Display MIC kernel log buffer + * + * log_buf addr/len is read from System.map by user space + * and populated in sysfs entries. + */ +static int cosm_log_buf_show(struct seq_file *s, void *unused) +{ + void __iomem *log_buf_va; + int __iomem *log_buf_len_va; + struct cosm_device *cdev = s->private; + void *kva; + int size; + u64 aper_offset; + + if (!cdev || !cdev->log_buf_addr || !cdev->log_buf_len) + goto done; + + mutex_lock(&cdev->cosm_mutex); + switch (cdev->state) { + case MIC_BOOTING: + case MIC_ONLINE: + case MIC_SHUTTING_DOWN: + break; + default: + goto unlock; + } + + /* + * Card kernel will never be relocated and any kernel text/data mapping + * can be translated to phys address by subtracting __START_KERNEL_map. + */ + aper_offset = (u64)cdev->log_buf_len - __START_KERNEL_map; + log_buf_len_va = cdev->hw_ops->aper(cdev)->va + aper_offset; + aper_offset = (u64)cdev->log_buf_addr - __START_KERNEL_map; + log_buf_va = cdev->hw_ops->aper(cdev)->va + aper_offset; + + size = ioread32(log_buf_len_va); + kva = kmalloc(size, GFP_KERNEL); + if (!kva) + goto unlock; + + memcpy_fromio(kva, log_buf_va, size); + seq_write(s, kva, size); + kfree(kva); +unlock: + mutex_unlock(&cdev->cosm_mutex); +done: + return 0; +} + +static int cosm_log_buf_open(struct inode *inode, struct file *file) +{ + return single_open(file, cosm_log_buf_show, inode->i_private); +} + +static const struct file_operations log_buf_ops = { + .owner = THIS_MODULE, + .open = cosm_log_buf_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release +}; + +/** + * cosm_force_reset_show - Force MIC reset + * + * Invokes the force_reset COSM bus op instead of the standard reset + * op in case a force reset of the MIC device is required + */ +static int cosm_force_reset_show(struct seq_file *s, void *pos) +{ + struct cosm_device *cdev = s->private; + + cosm_stop(cdev, true); + return 0; +} + +static int cosm_force_reset_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, cosm_force_reset_show, inode->i_private); +} + +static const struct file_operations force_reset_ops = { + .owner = THIS_MODULE, + .open = cosm_force_reset_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release +}; + +void cosm_create_debug_dir(struct cosm_device *cdev) +{ + char name[16]; + + if (!cosm_dbg) + return; + + scnprintf(name, sizeof(name), "mic%d", cdev->index); + cdev->dbg_dir = debugfs_create_dir(name, cosm_dbg); + if (!cdev->dbg_dir) + return; + + debugfs_create_file("log_buf", 0444, cdev->dbg_dir, cdev, &log_buf_ops); + debugfs_create_file("force_reset", 0444, cdev->dbg_dir, cdev, + &force_reset_ops); +} + +void cosm_delete_debug_dir(struct cosm_device *cdev) +{ + if (!cdev->dbg_dir) + return; + + debugfs_remove_recursive(cdev->dbg_dir); +} + +void cosm_init_debugfs(void) +{ + cosm_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL); + if (!cosm_dbg) + pr_err("can't create debugfs dir\n"); +} + +void cosm_exit_debugfs(void) +{ + debugfs_remove(cosm_dbg); +} diff --git a/kernel/drivers/misc/mic/cosm/cosm_main.c b/kernel/drivers/misc/mic/cosm/cosm_main.c new file mode 100644 index 000000000..4b4b356c7 --- /dev/null +++ b/kernel/drivers/misc/mic/cosm/cosm_main.c @@ -0,0 +1,388 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Coprocessor State Management (COSM) Driver + * + */ + +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/idr.h> +#include <linux/slab.h> +#include <linux/cred.h> +#include "cosm_main.h" + +static const char cosm_driver_name[] = "mic"; + +/* COSM ID allocator */ +static struct ida g_cosm_ida; +/* Class of MIC devices for sysfs accessibility. */ +static struct class *g_cosm_class; +/* Number of MIC devices */ +static atomic_t g_num_dev; + +/** + * cosm_hw_reset - Issue a HW reset for the MIC device + * @cdev: pointer to cosm_device instance + */ +static void cosm_hw_reset(struct cosm_device *cdev, bool force) +{ + int i; + +#define MIC_RESET_TO (45) + if (force && cdev->hw_ops->force_reset) + cdev->hw_ops->force_reset(cdev); + else + cdev->hw_ops->reset(cdev); + + for (i = 0; i < MIC_RESET_TO; i++) { + if (cdev->hw_ops->ready(cdev)) { + cosm_set_state(cdev, MIC_READY); + return; + } + /* + * Resets typically take 10s of seconds to complete. + * Since an MMIO read is required to check if the + * firmware is ready or not, a 1 second delay works nicely. + */ + msleep(1000); + } + cosm_set_state(cdev, MIC_RESET_FAILED); +} + +/** + * cosm_start - Start the MIC + * @cdev: pointer to cosm_device instance + * + * This function prepares an MIC for boot and initiates boot. + * RETURNS: An appropriate -ERRNO error value on error, or 0 for success. + */ +int cosm_start(struct cosm_device *cdev) +{ + const struct cred *orig_cred; + struct cred *override_cred; + int rc; + + mutex_lock(&cdev->cosm_mutex); + if (!cdev->bootmode) { + dev_err(&cdev->dev, "%s %d bootmode not set\n", + __func__, __LINE__); + rc = -EINVAL; + goto unlock_ret; + } +retry: + if (cdev->state != MIC_READY) { + dev_err(&cdev->dev, "%s %d MIC state not READY\n", + __func__, __LINE__); + rc = -EINVAL; + goto unlock_ret; + } + if (!cdev->hw_ops->ready(cdev)) { + cosm_hw_reset(cdev, false); + /* + * The state will either be MIC_READY if the reset succeeded + * or MIC_RESET_FAILED if the firmware reset failed. + */ + goto retry; + } + + /* + * Set credentials to root to allow non-root user to download initramsfs + * with 600 permissions + */ + override_cred = prepare_creds(); + if (!override_cred) { + dev_err(&cdev->dev, "%s %d prepare_creds failed\n", + __func__, __LINE__); + rc = -ENOMEM; + goto unlock_ret; + } + override_cred->fsuid = GLOBAL_ROOT_UID; + orig_cred = override_creds(override_cred); + + rc = cdev->hw_ops->start(cdev, cdev->index); + + revert_creds(orig_cred); + put_cred(override_cred); + if (rc) + goto unlock_ret; + + /* + * If linux is being booted, card is treated 'online' only + * when the scif interface in the card is up. If anything else + * is booted, we set card to 'online' immediately. + */ + if (!strcmp(cdev->bootmode, "linux")) + cosm_set_state(cdev, MIC_BOOTING); + else + cosm_set_state(cdev, MIC_ONLINE); +unlock_ret: + mutex_unlock(&cdev->cosm_mutex); + if (rc) + dev_err(&cdev->dev, "cosm_start failed rc %d\n", rc); + return rc; +} + +/** + * cosm_stop - Prepare the MIC for reset and trigger reset + * @cdev: pointer to cosm_device instance + * @force: force a MIC to reset even if it is already reset and ready. + * + * RETURNS: None + */ +void cosm_stop(struct cosm_device *cdev, bool force) +{ + mutex_lock(&cdev->cosm_mutex); + if (cdev->state != MIC_READY || force) { + /* + * Don't call hw_ops if they have been called previously. + * stop(..) calls device_unregister and will crash the system if + * called multiple times. + */ + bool call_hw_ops = cdev->state != MIC_RESET_FAILED && + cdev->state != MIC_READY; + + if (cdev->state != MIC_RESETTING) + cosm_set_state(cdev, MIC_RESETTING); + cdev->heartbeat_watchdog_enable = false; + if (call_hw_ops) + cdev->hw_ops->stop(cdev, force); + cosm_hw_reset(cdev, force); + cosm_set_shutdown_status(cdev, MIC_NOP); + if (call_hw_ops && cdev->hw_ops->post_reset) + cdev->hw_ops->post_reset(cdev, cdev->state); + } + mutex_unlock(&cdev->cosm_mutex); + flush_work(&cdev->scif_work); +} + +/** + * cosm_reset_trigger_work - Trigger MIC reset + * @work: The work structure + * + * This work is scheduled whenever the host wants to reset the MIC. + */ +static void cosm_reset_trigger_work(struct work_struct *work) +{ + struct cosm_device *cdev = container_of(work, struct cosm_device, + reset_trigger_work); + cosm_stop(cdev, false); +} + +/** + * cosm_reset - Schedule MIC reset + * @cdev: pointer to cosm_device instance + * + * RETURNS: An -EINVAL if the card is already READY or 0 for success. + */ +int cosm_reset(struct cosm_device *cdev) +{ + int rc = 0; + + mutex_lock(&cdev->cosm_mutex); + if (cdev->state != MIC_READY) { + cosm_set_state(cdev, MIC_RESETTING); + schedule_work(&cdev->reset_trigger_work); + } else { + dev_err(&cdev->dev, "%s %d MIC is READY\n", __func__, __LINE__); + rc = -EINVAL; + } + mutex_unlock(&cdev->cosm_mutex); + return rc; +} + +/** + * cosm_shutdown - Initiate MIC shutdown. + * @cdev: pointer to cosm_device instance + * + * RETURNS: None + */ +int cosm_shutdown(struct cosm_device *cdev) +{ + struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN }; + int rc = 0; + + mutex_lock(&cdev->cosm_mutex); + if (cdev->state != MIC_ONLINE) { + rc = -EINVAL; + dev_err(&cdev->dev, "%s %d skipping shutdown in state: %s\n", + __func__, __LINE__, cosm_state_string[cdev->state]); + goto err; + } + + if (!cdev->epd) { + rc = -ENOTCONN; + dev_err(&cdev->dev, "%s %d scif endpoint not connected rc %d\n", + __func__, __LINE__, rc); + goto err; + } + + rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK); + if (rc < 0) { + dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n", + __func__, __LINE__, rc); + goto err; + } + cdev->heartbeat_watchdog_enable = false; + cosm_set_state(cdev, MIC_SHUTTING_DOWN); + rc = 0; +err: + mutex_unlock(&cdev->cosm_mutex); + return rc; +} + +static int cosm_driver_probe(struct cosm_device *cdev) +{ + int rc; + + /* Initialize SCIF server at first probe */ + if (atomic_add_return(1, &g_num_dev) == 1) { + rc = cosm_scif_init(); + if (rc) + goto scif_exit; + } + mutex_init(&cdev->cosm_mutex); + INIT_WORK(&cdev->reset_trigger_work, cosm_reset_trigger_work); + INIT_WORK(&cdev->scif_work, cosm_scif_work); + cdev->sysfs_heartbeat_enable = true; + cosm_sysfs_init(cdev); + cdev->sdev = device_create_with_groups(g_cosm_class, cdev->dev.parent, + MKDEV(0, cdev->index), cdev, cdev->attr_group, + "mic%d", cdev->index); + if (IS_ERR(cdev->sdev)) { + rc = PTR_ERR(cdev->sdev); + dev_err(&cdev->dev, "device_create_with_groups failed rc %d\n", + rc); + goto scif_exit; + } + + cdev->state_sysfs = sysfs_get_dirent(cdev->sdev->kobj.sd, + "state"); + if (!cdev->state_sysfs) { + rc = -ENODEV; + dev_err(&cdev->dev, "sysfs_get_dirent failed rc %d\n", rc); + goto destroy_device; + } + cosm_create_debug_dir(cdev); + return 0; +destroy_device: + device_destroy(g_cosm_class, MKDEV(0, cdev->index)); +scif_exit: + if (atomic_dec_and_test(&g_num_dev)) + cosm_scif_exit(); + return rc; +} + +static void cosm_driver_remove(struct cosm_device *cdev) +{ + cosm_delete_debug_dir(cdev); + sysfs_put(cdev->state_sysfs); + device_destroy(g_cosm_class, MKDEV(0, cdev->index)); + flush_work(&cdev->reset_trigger_work); + cosm_stop(cdev, false); + if (atomic_dec_and_test(&g_num_dev)) + cosm_scif_exit(); + + /* These sysfs entries might have allocated */ + kfree(cdev->cmdline); + kfree(cdev->firmware); + kfree(cdev->ramdisk); + kfree(cdev->bootmode); +} + +static int cosm_suspend(struct device *dev) +{ + struct cosm_device *cdev = dev_to_cosm(dev); + + mutex_lock(&cdev->cosm_mutex); + switch (cdev->state) { + /** + * Suspend/freeze hooks in userspace have already shutdown the card. + * Card should be 'ready' in most cases. It is however possible that + * some userspace application initiated a boot. In those cases, we + * simply reset the card. + */ + case MIC_ONLINE: + case MIC_BOOTING: + case MIC_SHUTTING_DOWN: + mutex_unlock(&cdev->cosm_mutex); + cosm_stop(cdev, false); + break; + default: + mutex_unlock(&cdev->cosm_mutex); + break; + } + return 0; +} + +static const struct dev_pm_ops cosm_pm_ops = { + .suspend = cosm_suspend, + .freeze = cosm_suspend +}; + +static struct cosm_driver cosm_driver = { + .driver = { + .name = KBUILD_MODNAME, + .owner = THIS_MODULE, + .pm = &cosm_pm_ops, + }, + .probe = cosm_driver_probe, + .remove = cosm_driver_remove +}; + +static int __init cosm_init(void) +{ + int ret; + + cosm_init_debugfs(); + + g_cosm_class = class_create(THIS_MODULE, cosm_driver_name); + if (IS_ERR(g_cosm_class)) { + ret = PTR_ERR(g_cosm_class); + pr_err("class_create failed ret %d\n", ret); + goto cleanup_debugfs; + } + + ida_init(&g_cosm_ida); + ret = cosm_register_driver(&cosm_driver); + if (ret) { + pr_err("cosm_register_driver failed ret %d\n", ret); + goto ida_destroy; + } + return 0; +ida_destroy: + ida_destroy(&g_cosm_ida); + class_destroy(g_cosm_class); +cleanup_debugfs: + cosm_exit_debugfs(); + return ret; +} + +static void __exit cosm_exit(void) +{ + cosm_unregister_driver(&cosm_driver); + ida_destroy(&g_cosm_ida); + class_destroy(g_cosm_class); + cosm_exit_debugfs(); +} + +module_init(cosm_init); +module_exit(cosm_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) MIC Coprocessor State Management (COSM) Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/mic/cosm/cosm_main.h b/kernel/drivers/misc/mic/cosm/cosm_main.h new file mode 100644 index 000000000..f01156fca --- /dev/null +++ b/kernel/drivers/misc/mic/cosm/cosm_main.h @@ -0,0 +1,70 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Coprocessor State Management (COSM) Driver + * + */ +#ifndef _COSM_COSM_H_ +#define _COSM_COSM_H_ + +#include <linux/scif.h> +#include "../bus/cosm_bus.h" + +#define COSM_HEARTBEAT_SEND_SEC 30 +#define SCIF_COSM_LISTEN_PORT 201 + +/** + * enum COSM msg id's + * @COSM_MSG_SHUTDOWN: host->card trigger shutdown + * @COSM_MSG_SYNC_TIME: host->card send host time to card to sync time + * @COSM_MSG_HEARTBEAT: card->host heartbeat + * @COSM_MSG_SHUTDOWN_STATUS: card->host with shutdown status as payload + */ +enum cosm_msg_id { + COSM_MSG_SHUTDOWN, + COSM_MSG_SYNC_TIME, + COSM_MSG_HEARTBEAT, + COSM_MSG_SHUTDOWN_STATUS, +}; + +struct cosm_msg { + u64 id; + union { + u64 shutdown_status; + struct timespec64 timespec; + }; +}; + +extern const char * const cosm_state_string[]; +extern const char * const cosm_shutdown_status_string[]; + +void cosm_sysfs_init(struct cosm_device *cdev); +int cosm_start(struct cosm_device *cdev); +void cosm_stop(struct cosm_device *cdev, bool force); +int cosm_reset(struct cosm_device *cdev); +int cosm_shutdown(struct cosm_device *cdev); +void cosm_set_state(struct cosm_device *cdev, u8 state); +void cosm_set_shutdown_status(struct cosm_device *cdev, u8 status); +void cosm_init_debugfs(void); +void cosm_exit_debugfs(void); +void cosm_create_debug_dir(struct cosm_device *cdev); +void cosm_delete_debug_dir(struct cosm_device *cdev); +int cosm_scif_init(void); +void cosm_scif_exit(void); +void cosm_scif_work(struct work_struct *work); + +#endif diff --git a/kernel/drivers/misc/mic/cosm/cosm_scif_server.c b/kernel/drivers/misc/mic/cosm/cosm_scif_server.c new file mode 100644 index 000000000..5696df432 --- /dev/null +++ b/kernel/drivers/misc/mic/cosm/cosm_scif_server.c @@ -0,0 +1,405 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Coprocessor State Management (COSM) Driver + * + */ +#include <linux/kthread.h> +#include "cosm_main.h" + +/* + * The COSM driver uses SCIF to communicate between the management node and the + * MIC cards. SCIF is used to (a) Send a shutdown command to the card (b) + * receive a shutdown status back from the card upon completion of shutdown and + * (c) receive periodic heartbeat messages from the card used to deduce if the + * card has crashed. + * + * A COSM server consisting of a SCIF listening endpoint waits for incoming + * connections from the card. Upon acceptance of the connection, a separate + * work-item is scheduled to handle SCIF message processing for that card. The + * life-time of this work-item is therefore the time from which the connection + * from a card is accepted to the time at which the connection is closed. A new + * work-item starts each time the card boots and is alive till the card (a) + * shuts down (b) is reset (c) crashes (d) cosm_client driver on the card is + * unloaded. + * + * From the point of view of COSM interactions with SCIF during card + * shutdown, reset and crash are as follows: + * + * Card shutdown + * ------------- + * 1. COSM client on the card invokes orderly_poweroff() in response to SHUTDOWN + * message from the host. + * 2. Card driver shutdown callback invokes scif_unregister_device(..) resulting + * in scif_remove(..) getting called on the card + * 3. scif_remove -> scif_stop -> scif_handle_remove_node -> + * scif_peer_unregister_device -> device_unregister for the host peer device + * 4. During device_unregister remove(..) method of cosm_client is invoked which + * closes the COSM SCIF endpoint on the card. This results in a SCIF_DISCNCT + * message being sent to host SCIF. SCIF_DISCNCT message processing on the + * host SCIF sets the host COSM SCIF endpoint state to DISCONNECTED and wakes + * up the host COSM thread blocked in scif_poll(..) resulting in + * scif_poll(..) returning POLLHUP. + * 5. On the card, scif_peer_release_dev is next called which results in an + * SCIF_EXIT message being sent to the host and after receiving the + * SCIF_EXIT_ACK from the host the peer device teardown on the card is + * complete. + * 6. As part of the SCIF_EXIT message processing on the host, host sends a + * SCIF_REMOVE_NODE to itself corresponding to the card being removed. This + * starts a similar SCIF peer device teardown sequence on the host + * corresponding to the card being shut down. + * + * Card reset + * ---------- + * The case of interest here is when the card has not been previously shut down + * since most of the steps below are skipped in that case: + + * 1. cosm_stop(..) invokes hw_ops->stop(..) method of the base PCIe driver + * which unregisters the SCIF HW device resulting in scif_remove(..) being + * called on the host. + * 2. scif_remove(..) calls scif_disconnect_node(..) which results in a + * SCIF_EXIT message being sent to the card. + * 3. The card executes scif_stop() as part of SCIF_EXIT message + * processing. This results in the COSM endpoint on the card being closed and + * the SCIF host peer device on the card getting unregistered similar to + * steps 3, 4 and 5 for the card shutdown case above. scif_poll(..) on the + * host returns POLLHUP as a result. + * 4. On the host, card peer device unregister and SCIF HW remove(..) also + * subsequently complete. + * + * Card crash + * ---------- + * If a reset is issued after the card has crashed, there is no SCIF_DISCNT + * message from the card which would result in scif_poll(..) returning + * POLLHUP. In this case when the host SCIF driver sends a SCIF_REMOVE_NODE + * message to itself resulting in the card SCIF peer device being unregistered, + * this results in a scif_peer_release_dev -> scif_cleanup_scifdev-> + * scif_invalidate_ep call sequence which sets the endpoint state to + * DISCONNECTED and results in scif_poll(..) returning POLLHUP. + */ + +#define COSM_SCIF_BACKLOG 16 +#define COSM_HEARTBEAT_CHECK_DELTA_SEC 10 +#define COSM_HEARTBEAT_TIMEOUT_SEC \ + (COSM_HEARTBEAT_SEND_SEC + COSM_HEARTBEAT_CHECK_DELTA_SEC) +#define COSM_HEARTBEAT_TIMEOUT_MSEC (COSM_HEARTBEAT_TIMEOUT_SEC * MSEC_PER_SEC) + +static struct task_struct *server_thread; +static scif_epd_t listen_epd; + +/* Publish MIC card's shutdown status to user space MIC daemon */ +static void cosm_update_mic_status(struct cosm_device *cdev) +{ + if (cdev->shutdown_status_int != MIC_NOP) { + cosm_set_shutdown_status(cdev, cdev->shutdown_status_int); + cdev->shutdown_status_int = MIC_NOP; + } +} + +/* Store MIC card's shutdown status internally when it is received */ +static void cosm_shutdown_status_int(struct cosm_device *cdev, + enum mic_status shutdown_status) +{ + switch (shutdown_status) { + case MIC_HALTED: + case MIC_POWER_OFF: + case MIC_RESTART: + case MIC_CRASHED: + break; + default: + dev_err(&cdev->dev, "%s %d Unexpected shutdown_status %d\n", + __func__, __LINE__, shutdown_status); + return; + }; + cdev->shutdown_status_int = shutdown_status; + cdev->heartbeat_watchdog_enable = false; + + if (cdev->state != MIC_SHUTTING_DOWN) + cosm_set_state(cdev, MIC_SHUTTING_DOWN); +} + +/* Non-blocking recv. Read and process all available messages */ +static void cosm_scif_recv(struct cosm_device *cdev) +{ + struct cosm_msg msg; + int rc; + + while (1) { + rc = scif_recv(cdev->epd, &msg, sizeof(msg), 0); + if (!rc) { + break; + } else if (rc < 0) { + dev_dbg(&cdev->dev, "%s: %d rc %d\n", + __func__, __LINE__, rc); + break; + } + dev_dbg(&cdev->dev, "%s: %d rc %d id 0x%llx\n", + __func__, __LINE__, rc, msg.id); + + switch (msg.id) { + case COSM_MSG_SHUTDOWN_STATUS: + cosm_shutdown_status_int(cdev, msg.shutdown_status); + break; + case COSM_MSG_HEARTBEAT: + /* Nothing to do, heartbeat only unblocks scif_poll */ + break; + default: + dev_err(&cdev->dev, "%s: %d unknown msg.id %lld\n", + __func__, __LINE__, msg.id); + break; + } + } +} + +/* Publish crashed status for this MIC card */ +static void cosm_set_crashed(struct cosm_device *cdev) +{ + dev_err(&cdev->dev, "node alive timeout\n"); + cosm_shutdown_status_int(cdev, MIC_CRASHED); + cosm_update_mic_status(cdev); +} + +/* Send host time to the MIC card to sync system time between host and MIC */ +static void cosm_send_time(struct cosm_device *cdev) +{ + struct cosm_msg msg = { .id = COSM_MSG_SYNC_TIME }; + int rc; + + getnstimeofday64(&msg.timespec); + rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK); + if (rc < 0) + dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n", + __func__, __LINE__, rc); +} + +/* + * Close this cosm_device's endpoint after its peer endpoint on the card has + * been closed. In all cases except MIC card crash POLLHUP on the host is + * triggered by the client's endpoint being closed. + */ +static void cosm_scif_close(struct cosm_device *cdev) +{ + /* + * Because SHUTDOWN_STATUS message is sent by the MIC cards in the + * reboot notifier when shutdown is still not complete, we notify mpssd + * to reset the card when SCIF endpoint is closed. + */ + cosm_update_mic_status(cdev); + scif_close(cdev->epd); + cdev->epd = NULL; + dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__); +} + +/* + * Set card state to ONLINE when a new SCIF connection from a MIC card is + * received. Normally the state is BOOTING when the connection comes in, but can + * be ONLINE if cosm_client driver on the card was unloaded and then reloaded. + */ +static int cosm_set_online(struct cosm_device *cdev) +{ + int rc = 0; + + if (MIC_BOOTING == cdev->state || MIC_ONLINE == cdev->state) { + cdev->heartbeat_watchdog_enable = cdev->sysfs_heartbeat_enable; + cdev->epd = cdev->newepd; + if (cdev->state == MIC_BOOTING) + cosm_set_state(cdev, MIC_ONLINE); + cosm_send_time(cdev); + dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__); + } else { + dev_warn(&cdev->dev, "%s %d not going online in state: %s\n", + __func__, __LINE__, cosm_state_string[cdev->state]); + rc = -EINVAL; + } + /* Drop reference acquired by bus_find_device in the server thread */ + put_device(&cdev->dev); + return rc; +} + +/* + * Work function for handling work for a SCIF connection from a particular MIC + * card. It first sets the card state to ONLINE and then calls scif_poll to + * block on activity such as incoming messages on the SCIF endpoint. When the + * endpoint is closed, the work function exits, completing its life cycle, from + * MIC card boot to card shutdown/reset/crash. + */ +void cosm_scif_work(struct work_struct *work) +{ + struct cosm_device *cdev = container_of(work, struct cosm_device, + scif_work); + struct scif_pollepd pollepd; + int rc; + + mutex_lock(&cdev->cosm_mutex); + if (cosm_set_online(cdev)) + goto exit; + + while (1) { + pollepd.epd = cdev->epd; + pollepd.events = POLLIN; + + /* Drop the mutex before blocking in scif_poll(..) */ + mutex_unlock(&cdev->cosm_mutex); + /* poll(..) with timeout on our endpoint */ + rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_TIMEOUT_MSEC); + mutex_lock(&cdev->cosm_mutex); + if (rc < 0) { + dev_err(&cdev->dev, "%s %d scif_poll rc %d\n", + __func__, __LINE__, rc); + continue; + } + + /* There is a message from the card */ + if (pollepd.revents & POLLIN) + cosm_scif_recv(cdev); + + /* The peer endpoint is closed or this endpoint disconnected */ + if (pollepd.revents & POLLHUP) { + cosm_scif_close(cdev); + break; + } + + /* Did we timeout from poll? */ + if (!rc && cdev->heartbeat_watchdog_enable) + cosm_set_crashed(cdev); + } +exit: + dev_dbg(&cdev->dev, "%s %d exiting\n", __func__, __LINE__); + mutex_unlock(&cdev->cosm_mutex); +} + +/* + * COSM SCIF server thread function. Accepts incoming SCIF connections from MIC + * cards, finds the correct cosm_device to associate that connection with and + * schedules individual work items for each MIC card. + */ +static int cosm_scif_server(void *unused) +{ + struct cosm_device *cdev; + scif_epd_t newepd; + struct scif_port_id port_id; + int rc; + + allow_signal(SIGKILL); + + while (!kthread_should_stop()) { + rc = scif_accept(listen_epd, &port_id, &newepd, + SCIF_ACCEPT_SYNC); + if (rc < 0) { + if (-ERESTARTSYS != rc) + pr_err("%s %d rc %d\n", __func__, __LINE__, rc); + continue; + } + + /* + * Associate the incoming connection with a particular + * cosm_device, COSM device ID == SCIF node ID - 1 + */ + cdev = cosm_find_cdev_by_id(port_id.node - 1); + if (!cdev) + continue; + cdev->newepd = newepd; + schedule_work(&cdev->scif_work); + } + + pr_debug("%s %d Server thread stopped\n", __func__, __LINE__); + return 0; +} + +static int cosm_scif_listen(void) +{ + int rc; + + listen_epd = scif_open(); + if (!listen_epd) { + pr_err("%s %d scif_open failed\n", __func__, __LINE__); + return -ENOMEM; + } + + rc = scif_bind(listen_epd, SCIF_COSM_LISTEN_PORT); + if (rc < 0) { + pr_err("%s %d scif_bind failed rc %d\n", + __func__, __LINE__, rc); + goto err; + } + + rc = scif_listen(listen_epd, COSM_SCIF_BACKLOG); + if (rc < 0) { + pr_err("%s %d scif_listen rc %d\n", __func__, __LINE__, rc); + goto err; + } + pr_debug("%s %d listen_epd set up\n", __func__, __LINE__); + return 0; +err: + scif_close(listen_epd); + listen_epd = NULL; + return rc; +} + +static void cosm_scif_listen_exit(void) +{ + pr_debug("%s %d closing listen_epd\n", __func__, __LINE__); + if (listen_epd) { + scif_close(listen_epd); + listen_epd = NULL; + } +} + +/* + * Create a listening SCIF endpoint and a server kthread which accepts incoming + * SCIF connections from MIC cards + */ +int cosm_scif_init(void) +{ + int rc = cosm_scif_listen(); + + if (rc) { + pr_err("%s %d cosm_scif_listen rc %d\n", + __func__, __LINE__, rc); + goto err; + } + + server_thread = kthread_run(cosm_scif_server, NULL, "cosm_server"); + if (IS_ERR(server_thread)) { + rc = PTR_ERR(server_thread); + pr_err("%s %d kthread_run rc %d\n", __func__, __LINE__, rc); + goto listen_exit; + } + return 0; +listen_exit: + cosm_scif_listen_exit(); +err: + return rc; +} + +/* Stop the running server thread and close the listening SCIF endpoint */ +void cosm_scif_exit(void) +{ + int rc; + + if (!IS_ERR_OR_NULL(server_thread)) { + rc = send_sig(SIGKILL, server_thread, 0); + if (rc) { + pr_err("%s %d send_sig rc %d\n", + __func__, __LINE__, rc); + return; + } + kthread_stop(server_thread); + } + + cosm_scif_listen_exit(); +} diff --git a/kernel/drivers/misc/mic/cosm/cosm_sysfs.c b/kernel/drivers/misc/mic/cosm/cosm_sysfs.c new file mode 100644 index 000000000..29d6863b6 --- /dev/null +++ b/kernel/drivers/misc/mic/cosm/cosm_sysfs.c @@ -0,0 +1,461 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Coprocessor State Management (COSM) Driver + * + */ +#include <linux/slab.h> +#include "cosm_main.h" + +/* + * A state-to-string lookup table, for exposing a human readable state + * via sysfs. Always keep in sync with enum cosm_states + */ +const char * const cosm_state_string[] = { + [MIC_READY] = "ready", + [MIC_BOOTING] = "booting", + [MIC_ONLINE] = "online", + [MIC_SHUTTING_DOWN] = "shutting_down", + [MIC_RESETTING] = "resetting", + [MIC_RESET_FAILED] = "reset_failed", +}; + +/* + * A shutdown-status-to-string lookup table, for exposing a human + * readable state via sysfs. Always keep in sync with enum cosm_shutdown_status + */ +const char * const cosm_shutdown_status_string[] = { + [MIC_NOP] = "nop", + [MIC_CRASHED] = "crashed", + [MIC_HALTED] = "halted", + [MIC_POWER_OFF] = "poweroff", + [MIC_RESTART] = "restart", +}; + +void cosm_set_shutdown_status(struct cosm_device *cdev, u8 shutdown_status) +{ + dev_dbg(&cdev->dev, "Shutdown Status %s -> %s\n", + cosm_shutdown_status_string[cdev->shutdown_status], + cosm_shutdown_status_string[shutdown_status]); + cdev->shutdown_status = shutdown_status; +} + +void cosm_set_state(struct cosm_device *cdev, u8 state) +{ + dev_dbg(&cdev->dev, "State %s -> %s\n", + cosm_state_string[cdev->state], + cosm_state_string[state]); + cdev->state = state; + sysfs_notify_dirent(cdev->state_sysfs); +} + +static ssize_t +family_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + return cdev->hw_ops->family(cdev, buf); +} +static DEVICE_ATTR_RO(family); + +static ssize_t +stepping_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + return cdev->hw_ops->stepping(cdev, buf); +} +static DEVICE_ATTR_RO(stepping); + +static ssize_t +state_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev || cdev->state >= MIC_LAST) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%s\n", + cosm_state_string[cdev->state]); +} + +static ssize_t +state_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + int rc; + + if (!cdev) + return -EINVAL; + + if (sysfs_streq(buf, "boot")) { + rc = cosm_start(cdev); + goto done; + } + if (sysfs_streq(buf, "reset")) { + rc = cosm_reset(cdev); + goto done; + } + + if (sysfs_streq(buf, "shutdown")) { + rc = cosm_shutdown(cdev); + goto done; + } + rc = -EINVAL; +done: + if (rc) + count = rc; + return count; +} +static DEVICE_ATTR_RW(state); + +static ssize_t shutdown_status_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev || cdev->shutdown_status >= MIC_STATUS_LAST) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%s\n", + cosm_shutdown_status_string[cdev->shutdown_status]); +} +static DEVICE_ATTR_RO(shutdown_status); + +static ssize_t +heartbeat_enable_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%d\n", cdev->sysfs_heartbeat_enable); +} + +static ssize_t +heartbeat_enable_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + int enable; + int ret; + + if (!cdev) + return -EINVAL; + + mutex_lock(&cdev->cosm_mutex); + ret = kstrtoint(buf, 10, &enable); + if (ret) + goto unlock; + + cdev->sysfs_heartbeat_enable = enable; + /* if state is not online, cdev->heartbeat_watchdog_enable is 0 */ + if (cdev->state == MIC_ONLINE) + cdev->heartbeat_watchdog_enable = enable; + ret = count; +unlock: + mutex_unlock(&cdev->cosm_mutex); + return ret; +} +static DEVICE_ATTR_RW(heartbeat_enable); + +static ssize_t +cmdline_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + char *cmdline; + + if (!cdev) + return -EINVAL; + + cmdline = cdev->cmdline; + + if (cmdline) + return scnprintf(buf, PAGE_SIZE, "%s\n", cmdline); + return 0; +} + +static ssize_t +cmdline_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + mutex_lock(&cdev->cosm_mutex); + kfree(cdev->cmdline); + + cdev->cmdline = kmalloc(count + 1, GFP_KERNEL); + if (!cdev->cmdline) { + count = -ENOMEM; + goto unlock; + } + + strncpy(cdev->cmdline, buf, count); + + if (cdev->cmdline[count - 1] == '\n') + cdev->cmdline[count - 1] = '\0'; + else + cdev->cmdline[count] = '\0'; +unlock: + mutex_unlock(&cdev->cosm_mutex); + return count; +} +static DEVICE_ATTR_RW(cmdline); + +static ssize_t +firmware_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + char *firmware; + + if (!cdev) + return -EINVAL; + + firmware = cdev->firmware; + + if (firmware) + return scnprintf(buf, PAGE_SIZE, "%s\n", firmware); + return 0; +} + +static ssize_t +firmware_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + mutex_lock(&cdev->cosm_mutex); + kfree(cdev->firmware); + + cdev->firmware = kmalloc(count + 1, GFP_KERNEL); + if (!cdev->firmware) { + count = -ENOMEM; + goto unlock; + } + strncpy(cdev->firmware, buf, count); + + if (cdev->firmware[count - 1] == '\n') + cdev->firmware[count - 1] = '\0'; + else + cdev->firmware[count] = '\0'; +unlock: + mutex_unlock(&cdev->cosm_mutex); + return count; +} +static DEVICE_ATTR_RW(firmware); + +static ssize_t +ramdisk_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + char *ramdisk; + + if (!cdev) + return -EINVAL; + + ramdisk = cdev->ramdisk; + + if (ramdisk) + return scnprintf(buf, PAGE_SIZE, "%s\n", ramdisk); + return 0; +} + +static ssize_t +ramdisk_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + mutex_lock(&cdev->cosm_mutex); + kfree(cdev->ramdisk); + + cdev->ramdisk = kmalloc(count + 1, GFP_KERNEL); + if (!cdev->ramdisk) { + count = -ENOMEM; + goto unlock; + } + + strncpy(cdev->ramdisk, buf, count); + + if (cdev->ramdisk[count - 1] == '\n') + cdev->ramdisk[count - 1] = '\0'; + else + cdev->ramdisk[count] = '\0'; +unlock: + mutex_unlock(&cdev->cosm_mutex); + return count; +} +static DEVICE_ATTR_RW(ramdisk); + +static ssize_t +bootmode_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + char *bootmode; + + if (!cdev) + return -EINVAL; + + bootmode = cdev->bootmode; + + if (bootmode) + return scnprintf(buf, PAGE_SIZE, "%s\n", bootmode); + return 0; +} + +static ssize_t +bootmode_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + if (!sysfs_streq(buf, "linux") && !sysfs_streq(buf, "flash")) + return -EINVAL; + + mutex_lock(&cdev->cosm_mutex); + kfree(cdev->bootmode); + + cdev->bootmode = kmalloc(count + 1, GFP_KERNEL); + if (!cdev->bootmode) { + count = -ENOMEM; + goto unlock; + } + + strncpy(cdev->bootmode, buf, count); + + if (cdev->bootmode[count - 1] == '\n') + cdev->bootmode[count - 1] = '\0'; + else + cdev->bootmode[count] = '\0'; +unlock: + mutex_unlock(&cdev->cosm_mutex); + return count; +} +static DEVICE_ATTR_RW(bootmode); + +static ssize_t +log_buf_addr_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_addr); +} + +static ssize_t +log_buf_addr_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + int ret; + unsigned long addr; + + if (!cdev) + return -EINVAL; + + ret = kstrtoul(buf, 16, &addr); + if (ret) + goto exit; + + cdev->log_buf_addr = (void *)addr; + ret = count; +exit: + return ret; +} +static DEVICE_ATTR_RW(log_buf_addr); + +static ssize_t +log_buf_len_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_len); +} + +static ssize_t +log_buf_len_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + int ret; + unsigned long addr; + + if (!cdev) + return -EINVAL; + + ret = kstrtoul(buf, 16, &addr); + if (ret) + goto exit; + + cdev->log_buf_len = (int *)addr; + ret = count; +exit: + return ret; +} +static DEVICE_ATTR_RW(log_buf_len); + +static struct attribute *cosm_default_attrs[] = { + &dev_attr_family.attr, + &dev_attr_stepping.attr, + &dev_attr_state.attr, + &dev_attr_shutdown_status.attr, + &dev_attr_heartbeat_enable.attr, + &dev_attr_cmdline.attr, + &dev_attr_firmware.attr, + &dev_attr_ramdisk.attr, + &dev_attr_bootmode.attr, + &dev_attr_log_buf_addr.attr, + &dev_attr_log_buf_len.attr, + + NULL +}; + +ATTRIBUTE_GROUPS(cosm_default); + +void cosm_sysfs_init(struct cosm_device *cdev) +{ + cdev->attr_group = cosm_default_groups; +} diff --git a/kernel/drivers/misc/mic/cosm_client/Makefile b/kernel/drivers/misc/mic/cosm_client/Makefile new file mode 100644 index 000000000..6f751a519 --- /dev/null +++ b/kernel/drivers/misc/mic/cosm_client/Makefile @@ -0,0 +1,7 @@ +# +# Makefile - Intel MIC COSM Client Driver +# Copyright(c) 2015, Intel Corporation. +# +obj-$(CONFIG_MIC_COSM) += cosm_client.o + +cosm_client-objs += cosm_scif_client.o diff --git a/kernel/drivers/misc/mic/cosm_client/cosm_scif_client.c b/kernel/drivers/misc/mic/cosm_client/cosm_scif_client.c new file mode 100644 index 000000000..03e98bf1a --- /dev/null +++ b/kernel/drivers/misc/mic/cosm_client/cosm_scif_client.c @@ -0,0 +1,275 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC COSM Client Driver + * + */ +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/kthread.h> +#include "../cosm/cosm_main.h" + +#define COSM_SCIF_MAX_RETRIES 10 +#define COSM_HEARTBEAT_SEND_MSEC (COSM_HEARTBEAT_SEND_SEC * MSEC_PER_SEC) + +static struct task_struct *client_thread; +static scif_epd_t client_epd; +static struct scif_peer_dev *client_spdev; + +/* + * Reboot notifier: receives shutdown status from the OS and communicates it + * back to the COSM process on the host + */ +static int cosm_reboot_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN_STATUS }; + int rc; + + event = (event == SYS_RESTART) ? SYSTEM_RESTART : event; + dev_info(&client_spdev->dev, "%s %d received event %ld\n", + __func__, __LINE__, event); + + msg.shutdown_status = event; + rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK); + if (rc < 0) + dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n", + __func__, __LINE__, rc); + + return NOTIFY_DONE; +} + +static struct notifier_block cosm_reboot = { + .notifier_call = cosm_reboot_event, +}; + +/* Set system time from timespec value received from the host */ +static void cosm_set_time(struct cosm_msg *msg) +{ + int rc = do_settimeofday64(&msg->timespec); + + if (rc) + dev_err(&client_spdev->dev, "%s: %d settimeofday rc %d\n", + __func__, __LINE__, rc); +} + +/* COSM client receive message processing */ +static void cosm_client_recv(void) +{ + struct cosm_msg msg; + int rc; + + while (1) { + rc = scif_recv(client_epd, &msg, sizeof(msg), 0); + if (!rc) { + return; + } else if (rc < 0) { + dev_err(&client_spdev->dev, "%s: %d rc %d\n", + __func__, __LINE__, rc); + return; + } + + dev_dbg(&client_spdev->dev, "%s: %d rc %d id 0x%llx\n", + __func__, __LINE__, rc, msg.id); + + switch (msg.id) { + case COSM_MSG_SYNC_TIME: + cosm_set_time(&msg); + break; + case COSM_MSG_SHUTDOWN: + orderly_poweroff(true); + break; + default: + dev_err(&client_spdev->dev, "%s: %d unknown id %lld\n", + __func__, __LINE__, msg.id); + break; + } + } +} + +/* Initiate connection to the COSM server on the host */ +static int cosm_scif_connect(void) +{ + struct scif_port_id port_id; + int i, rc; + + client_epd = scif_open(); + if (!client_epd) { + dev_err(&client_spdev->dev, "%s %d scif_open failed\n", + __func__, __LINE__); + return -ENOMEM; + } + + port_id.node = 0; + port_id.port = SCIF_COSM_LISTEN_PORT; + + for (i = 0; i < COSM_SCIF_MAX_RETRIES; i++) { + rc = scif_connect(client_epd, &port_id); + if (rc < 0) + msleep(1000); + else + break; + } + + if (rc < 0) { + dev_err(&client_spdev->dev, "%s %d scif_connect rc %d\n", + __func__, __LINE__, rc); + scif_close(client_epd); + client_epd = NULL; + } + return rc < 0 ? rc : 0; +} + +/* Close host SCIF connection */ +static void cosm_scif_connect_exit(void) +{ + if (client_epd) { + scif_close(client_epd); + client_epd = NULL; + } +} + +/* + * COSM SCIF client thread function: waits for messages from the host and sends + * a heartbeat to the host + */ +static int cosm_scif_client(void *unused) +{ + struct cosm_msg msg = { .id = COSM_MSG_HEARTBEAT }; + struct scif_pollepd pollepd; + int rc; + + allow_signal(SIGKILL); + + while (!kthread_should_stop()) { + pollepd.epd = client_epd; + pollepd.events = POLLIN; + + rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_SEND_MSEC); + if (rc < 0) { + if (-EINTR != rc) + dev_err(&client_spdev->dev, + "%s %d scif_poll rc %d\n", + __func__, __LINE__, rc); + continue; + } + + if (pollepd.revents & POLLIN) + cosm_client_recv(); + + msg.id = COSM_MSG_HEARTBEAT; + rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK); + if (rc < 0) + dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n", + __func__, __LINE__, rc); + } + + dev_dbg(&client_spdev->dev, "%s %d Client thread stopped\n", + __func__, __LINE__); + return 0; +} + +static void cosm_scif_probe(struct scif_peer_dev *spdev) +{ + int rc; + + dev_dbg(&spdev->dev, "%s %d: dnode %d\n", + __func__, __LINE__, spdev->dnode); + + /* We are only interested in the host with spdev->dnode == 0 */ + if (spdev->dnode) + return; + + client_spdev = spdev; + rc = cosm_scif_connect(); + if (rc) + goto exit; + + rc = register_reboot_notifier(&cosm_reboot); + if (rc) { + dev_err(&spdev->dev, + "reboot notifier registration failed rc %d\n", rc); + goto connect_exit; + } + + client_thread = kthread_run(cosm_scif_client, NULL, "cosm_client"); + if (IS_ERR(client_thread)) { + rc = PTR_ERR(client_thread); + dev_err(&spdev->dev, "%s %d kthread_run rc %d\n", + __func__, __LINE__, rc); + goto unreg_reboot; + } + return; +unreg_reboot: + unregister_reboot_notifier(&cosm_reboot); +connect_exit: + cosm_scif_connect_exit(); +exit: + client_spdev = NULL; +} + +static void cosm_scif_remove(struct scif_peer_dev *spdev) +{ + int rc; + + dev_dbg(&spdev->dev, "%s %d: dnode %d\n", + __func__, __LINE__, spdev->dnode); + + if (spdev->dnode) + return; + + if (!IS_ERR_OR_NULL(client_thread)) { + rc = send_sig(SIGKILL, client_thread, 0); + if (rc) { + pr_err("%s %d send_sig rc %d\n", + __func__, __LINE__, rc); + return; + } + kthread_stop(client_thread); + } + unregister_reboot_notifier(&cosm_reboot); + cosm_scif_connect_exit(); + client_spdev = NULL; +} + +static struct scif_client scif_client_cosm = { + .name = KBUILD_MODNAME, + .probe = cosm_scif_probe, + .remove = cosm_scif_remove, +}; + +static int __init cosm_client_init(void) +{ + int rc = scif_client_register(&scif_client_cosm); + + if (rc) + pr_err("scif_client_register failed rc %d\n", rc); + return rc; +} + +static void __exit cosm_client_exit(void) +{ + scif_client_unregister(&scif_client_cosm); +} + +module_init(cosm_client_init); +module_exit(cosm_client_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) MIC card OS state management client driver"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/mic/host/Makefile b/kernel/drivers/misc/mic/host/Makefile index c2197f999..004d3db0f 100644 --- a/kernel/drivers/misc/mic/host/Makefile +++ b/kernel/drivers/misc/mic/host/Makefile @@ -5,7 +5,6 @@ obj-$(CONFIG_INTEL_MIC_HOST) += mic_host.o mic_host-objs := mic_main.o mic_host-objs += mic_x100.o -mic_host-objs += mic_sysfs.o mic_host-objs += mic_smpt.o mic_host-objs += mic_intr.o mic_host-objs += mic_boot.o diff --git a/kernel/drivers/misc/mic/host/mic_boot.c b/kernel/drivers/misc/mic/host/mic_boot.c index d9fa609da..7845564df 100644 --- a/kernel/drivers/misc/mic/host/mic_boot.c +++ b/kernel/drivers/misc/mic/host/mic_boot.c @@ -21,14 +21,197 @@ #include <linux/delay.h> #include <linux/firmware.h> #include <linux/pci.h> - +#include <linux/kmod.h> #include <linux/mic_common.h> #include <linux/mic_bus.h> +#include "../bus/scif_bus.h" #include "../common/mic_dev.h" #include "mic_device.h" #include "mic_smpt.h" #include "mic_virtio.h" +static inline struct mic_device *scdev_to_mdev(struct scif_hw_dev *scdev) +{ + return dev_get_drvdata(scdev->dev.parent); +} + +static void *__mic_dma_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + struct dma_attrs *attrs) +{ + struct scif_hw_dev *scdev = dev_get_drvdata(dev); + struct mic_device *mdev = scdev_to_mdev(scdev); + dma_addr_t tmp; + void *va = kmalloc(size, gfp); + + if (va) { + tmp = mic_map_single(mdev, va, size); + if (dma_mapping_error(dev, tmp)) { + kfree(va); + va = NULL; + } else { + *dma_handle = tmp; + } + } + return va; +} + +static void __mic_dma_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, struct dma_attrs *attrs) +{ + struct scif_hw_dev *scdev = dev_get_drvdata(dev); + struct mic_device *mdev = scdev_to_mdev(scdev); + + mic_unmap_single(mdev, dma_handle, size); + kfree(vaddr); +} + +static dma_addr_t +__mic_dma_map_page(struct device *dev, struct page *page, unsigned long offset, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + void *va = phys_to_virt(page_to_phys(page)) + offset; + struct scif_hw_dev *scdev = dev_get_drvdata(dev); + struct mic_device *mdev = scdev_to_mdev(scdev); + + return mic_map_single(mdev, va, size); +} + +static void +__mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct scif_hw_dev *scdev = dev_get_drvdata(dev); + struct mic_device *mdev = scdev_to_mdev(scdev); + + mic_unmap_single(mdev, dma_addr, size); +} + +static int __mic_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct scif_hw_dev *scdev = dev_get_drvdata(dev); + struct mic_device *mdev = scdev_to_mdev(scdev); + struct scatterlist *s; + int i, j, ret; + dma_addr_t da; + + ret = dma_map_sg(&mdev->pdev->dev, sg, nents, dir); + if (ret <= 0) + return 0; + + for_each_sg(sg, s, nents, i) { + da = mic_map(mdev, sg_dma_address(s) + s->offset, s->length); + if (!da) + goto err; + sg_dma_address(s) = da; + } + return nents; +err: + for_each_sg(sg, s, i, j) { + mic_unmap(mdev, sg_dma_address(s), s->length); + sg_dma_address(s) = mic_to_dma_addr(mdev, sg_dma_address(s)); + } + dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir); + return 0; +} + +static void __mic_dma_unmap_sg(struct device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct scif_hw_dev *scdev = dev_get_drvdata(dev); + struct mic_device *mdev = scdev_to_mdev(scdev); + struct scatterlist *s; + dma_addr_t da; + int i; + + for_each_sg(sg, s, nents, i) { + da = mic_to_dma_addr(mdev, sg_dma_address(s)); + mic_unmap(mdev, sg_dma_address(s), s->length); + sg_dma_address(s) = da; + } + dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir); +} + +static struct dma_map_ops __mic_dma_ops = { + .alloc = __mic_dma_alloc, + .free = __mic_dma_free, + .map_page = __mic_dma_map_page, + .unmap_page = __mic_dma_unmap_page, + .map_sg = __mic_dma_map_sg, + .unmap_sg = __mic_dma_unmap_sg, +}; + +static struct mic_irq * +___mic_request_irq(struct scif_hw_dev *scdev, + irqreturn_t (*func)(int irq, void *data), + const char *name, + void *data, int db) +{ + struct mic_device *mdev = scdev_to_mdev(scdev); + + return mic_request_threaded_irq(mdev, func, NULL, name, data, + db, MIC_INTR_DB); +} + +static void +___mic_free_irq(struct scif_hw_dev *scdev, + struct mic_irq *cookie, void *data) +{ + struct mic_device *mdev = scdev_to_mdev(scdev); + + return mic_free_irq(mdev, cookie, data); +} + +static void ___mic_ack_interrupt(struct scif_hw_dev *scdev, int num) +{ + struct mic_device *mdev = scdev_to_mdev(scdev); + + mdev->ops->intr_workarounds(mdev); +} + +static int ___mic_next_db(struct scif_hw_dev *scdev) +{ + struct mic_device *mdev = scdev_to_mdev(scdev); + + return mic_next_db(mdev); +} + +static void ___mic_send_intr(struct scif_hw_dev *scdev, int db) +{ + struct mic_device *mdev = scdev_to_mdev(scdev); + + mdev->ops->send_intr(mdev, db); +} + +static void __iomem *___mic_ioremap(struct scif_hw_dev *scdev, + phys_addr_t pa, size_t len) +{ + struct mic_device *mdev = scdev_to_mdev(scdev); + + return mdev->aper.va + pa; +} + +static void ___mic_iounmap(struct scif_hw_dev *scdev, void __iomem *va) +{ + /* nothing to do */ +} + +static struct scif_hw_ops scif_hw_ops = { + .request_irq = ___mic_request_irq, + .free_irq = ___mic_free_irq, + .ack_interrupt = ___mic_ack_interrupt, + .next_db = ___mic_next_db, + .send_intr = ___mic_send_intr, + .ioremap = ___mic_ioremap, + .iounmap = ___mic_iounmap, +}; + static inline struct mic_device *mbdev_to_mdev(struct mbus_device *mbdev) { return dev_get_drvdata(mbdev->dev.parent); @@ -87,295 +270,213 @@ static struct mbus_hw_ops mbus_hw_ops = { .ack_interrupt = _mic_ack_interrupt, }; -/** - * mic_reset - Reset the MIC device. - * @mdev: pointer to mic_device instance - */ -static void mic_reset(struct mic_device *mdev) +/* Initialize the MIC bootparams */ +void mic_bootparam_init(struct mic_device *mdev) { - int i; + struct mic_bootparam *bootparam = mdev->dp; + + bootparam->magic = cpu_to_le32(MIC_MAGIC); + bootparam->h2c_config_db = -1; + bootparam->node_id = mdev->id + 1; + bootparam->scif_host_dma_addr = 0x0; + bootparam->scif_card_dma_addr = 0x0; + bootparam->c2h_scif_db = -1; + bootparam->h2c_scif_db = -1; +} + +static inline struct mic_device *cosmdev_to_mdev(struct cosm_device *cdev) +{ + return dev_get_drvdata(cdev->dev.parent); +} -#define MIC_RESET_TO (45) +static void _mic_reset(struct cosm_device *cdev) +{ + struct mic_device *mdev = cosmdev_to_mdev(cdev); - reinit_completion(&mdev->reset_wait); mdev->ops->reset_fw_ready(mdev); mdev->ops->reset(mdev); - - for (i = 0; i < MIC_RESET_TO; i++) { - if (mdev->ops->is_fw_ready(mdev)) - goto done; - /* - * Resets typically take 10s of seconds to complete. - * Since an MMIO read is required to check if the - * firmware is ready or not, a 1 second delay works nicely. - */ - msleep(1000); - } - mic_set_state(mdev, MIC_RESET_FAILED); -done: - complete_all(&mdev->reset_wait); } -/* Initialize the MIC bootparams */ -void mic_bootparam_init(struct mic_device *mdev) +static bool _mic_ready(struct cosm_device *cdev) { - struct mic_bootparam *bootparam = mdev->dp; + struct mic_device *mdev = cosmdev_to_mdev(cdev); - bootparam->magic = cpu_to_le32(MIC_MAGIC); - bootparam->c2h_shutdown_db = mdev->shutdown_db; - bootparam->h2c_shutdown_db = -1; - bootparam->h2c_config_db = -1; - bootparam->shutdown_status = 0; - bootparam->shutdown_card = 0; + return mdev->ops->is_fw_ready(mdev); +} + +/** + * mic_request_dma_chans - Request DMA channels + * @mdev: pointer to mic_device instance + * + * returns number of DMA channels acquired + */ +static int mic_request_dma_chans(struct mic_device *mdev) +{ + dma_cap_mask_t mask; + struct dma_chan *chan; + + request_module("mic_x100_dma"); + dma_cap_zero(mask); + dma_cap_set(DMA_MEMCPY, mask); + + do { + chan = dma_request_channel(mask, mdev->ops->dma_filter, + &mdev->pdev->dev); + if (chan) { + mdev->dma_ch[mdev->num_dma_ch++] = chan; + if (mdev->num_dma_ch >= MIC_MAX_DMA_CHAN) + break; + } + } while (chan); + dev_info(&mdev->pdev->dev, "DMA channels # %d\n", mdev->num_dma_ch); + return mdev->num_dma_ch; } /** - * mic_start - Start the MIC. + * mic_free_dma_chans - release DMA channels * @mdev: pointer to mic_device instance - * @buf: buffer containing boot string including firmware/ramdisk path. + * + * returns none + */ +static void mic_free_dma_chans(struct mic_device *mdev) +{ + int i = 0; + + for (i = 0; i < mdev->num_dma_ch; i++) { + dma_release_channel(mdev->dma_ch[i]); + mdev->dma_ch[i] = NULL; + } + mdev->num_dma_ch = 0; +} + +/** + * _mic_start - Start the MIC. + * @cdev: pointer to cosm_device instance + * @id: MIC device id/index provided by COSM used in other drivers like SCIF * * This function prepares an MIC for boot and initiates boot. * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + * + * For all cosm_hw_ops the caller holds a mutex to ensure serialization. */ -int mic_start(struct mic_device *mdev, const char *buf) +static int _mic_start(struct cosm_device *cdev, int id) { + struct mic_device *mdev = cosmdev_to_mdev(cdev); int rc; - mutex_lock(&mdev->mic_mutex); -retry: - if (MIC_OFFLINE != mdev->state) { - rc = -EINVAL; - goto unlock_ret; - } - if (!mdev->ops->is_fw_ready(mdev)) { - mic_reset(mdev); - /* - * The state will either be MIC_OFFLINE if the reset succeeded - * or MIC_RESET_FAILED if the firmware reset failed. - */ - goto retry; - } - mdev->dma_mbdev = mbus_register_device(mdev->sdev->parent, + + mic_bootparam_init(mdev); + mdev->dma_mbdev = mbus_register_device(&mdev->pdev->dev, MBUS_DEV_DMA_HOST, &mic_dma_ops, - &mbus_hw_ops, mdev->mmio.va); + &mbus_hw_ops, id, mdev->mmio.va); if (IS_ERR(mdev->dma_mbdev)) { rc = PTR_ERR(mdev->dma_mbdev); goto unlock_ret; } - mdev->dma_ch = mic_request_dma_chan(mdev); - if (!mdev->dma_ch) { - rc = -ENXIO; + if (!mic_request_dma_chans(mdev)) { + rc = -ENODEV; goto dma_remove; } - rc = mdev->ops->load_mic_fw(mdev, buf); + mdev->scdev = scif_register_device(&mdev->pdev->dev, MIC_SCIF_DEV, + &__mic_dma_ops, &scif_hw_ops, + id + 1, 0, &mdev->mmio, + &mdev->aper, mdev->dp, NULL, + mdev->dma_ch, mdev->num_dma_ch, + true); + if (IS_ERR(mdev->scdev)) { + rc = PTR_ERR(mdev->scdev); + goto dma_free; + } + + rc = mdev->ops->load_mic_fw(mdev, NULL); if (rc) - goto dma_release; + goto scif_remove; mic_smpt_restore(mdev); mic_intr_restore(mdev); mdev->intr_ops->enable_interrupts(mdev); mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr); mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32); mdev->ops->send_firmware_intr(mdev); - mic_set_state(mdev, MIC_ONLINE); goto unlock_ret; -dma_release: - dma_release_channel(mdev->dma_ch); +scif_remove: + scif_unregister_device(mdev->scdev); +dma_free: + mic_free_dma_chans(mdev); dma_remove: mbus_unregister_device(mdev->dma_mbdev); unlock_ret: - mutex_unlock(&mdev->mic_mutex); return rc; } /** - * mic_stop - Prepare the MIC for reset and trigger reset. - * @mdev: pointer to mic_device instance + * _mic_stop - Prepare the MIC for reset and trigger reset. + * @cdev: pointer to cosm_device instance * @force: force a MIC to reset even if it is already offline. * * RETURNS: None. */ -void mic_stop(struct mic_device *mdev, bool force) +static void _mic_stop(struct cosm_device *cdev, bool force) { - mutex_lock(&mdev->mic_mutex); - if (MIC_OFFLINE != mdev->state || force) { - mic_virtio_reset_devices(mdev); - if (mdev->dma_ch) { - dma_release_channel(mdev->dma_ch); - mdev->dma_ch = NULL; - } - mbus_unregister_device(mdev->dma_mbdev); - mic_bootparam_init(mdev); - mic_reset(mdev); - if (MIC_RESET_FAILED == mdev->state) - goto unlock; - mic_set_shutdown_status(mdev, MIC_NOP); - if (MIC_SUSPENDED != mdev->state) - mic_set_state(mdev, MIC_OFFLINE); - } -unlock: - mutex_unlock(&mdev->mic_mutex); -} - -/** - * mic_shutdown - Initiate MIC shutdown. - * @mdev: pointer to mic_device instance - * - * RETURNS: None. - */ -void mic_shutdown(struct mic_device *mdev) -{ - struct mic_bootparam *bootparam = mdev->dp; - s8 db = bootparam->h2c_shutdown_db; - - mutex_lock(&mdev->mic_mutex); - if (MIC_ONLINE == mdev->state && db != -1) { - bootparam->shutdown_card = 1; - mdev->ops->send_intr(mdev, db); - mic_set_state(mdev, MIC_SHUTTING_DOWN); - } - mutex_unlock(&mdev->mic_mutex); -} - -/** - * mic_shutdown_work - Handle shutdown interrupt from MIC. - * @work: The work structure. - * - * This work is scheduled whenever the host has received a shutdown - * interrupt from the MIC. - */ -void mic_shutdown_work(struct work_struct *work) -{ - struct mic_device *mdev = container_of(work, struct mic_device, - shutdown_work); - struct mic_bootparam *bootparam = mdev->dp; - - mutex_lock(&mdev->mic_mutex); - mic_set_shutdown_status(mdev, bootparam->shutdown_status); - bootparam->shutdown_status = 0; + struct mic_device *mdev = cosmdev_to_mdev(cdev); /* - * if state is MIC_SUSPENDED, OSPM suspend is in progress. We do not - * change the state here so as to prevent users from booting the card - * during and after the suspend operation. + * Since SCIF handles card shutdown and reset (using COSM), it will + * will be the first to be registered and the last to be + * unregistered. */ - if (MIC_SHUTTING_DOWN != mdev->state && - MIC_SUSPENDED != mdev->state) - mic_set_state(mdev, MIC_SHUTTING_DOWN); - mutex_unlock(&mdev->mic_mutex); + mic_virtio_reset_devices(mdev); + scif_unregister_device(mdev->scdev); + mic_free_dma_chans(mdev); + mbus_unregister_device(mdev->dma_mbdev); + mic_bootparam_init(mdev); } -/** - * mic_reset_trigger_work - Trigger MIC reset. - * @work: The work structure. - * - * This work is scheduled whenever the host wants to reset the MIC. - */ -void mic_reset_trigger_work(struct work_struct *work) +static ssize_t _mic_family(struct cosm_device *cdev, char *buf) { - struct mic_device *mdev = container_of(work, struct mic_device, - reset_trigger_work); + struct mic_device *mdev = cosmdev_to_mdev(cdev); + static const char *family[MIC_FAMILY_LAST] = { "x100", "Unknown" }; - mic_stop(mdev, false); + return scnprintf(buf, PAGE_SIZE, "%s\n", family[mdev->family]); } -/** - * mic_complete_resume - Complete MIC Resume after an OSPM suspend/hibernate - * event. - * @mdev: pointer to mic_device instance - * - * RETURNS: None. - */ -void mic_complete_resume(struct mic_device *mdev) +static ssize_t _mic_stepping(struct cosm_device *cdev, char *buf) { - if (mdev->state != MIC_SUSPENDED) { - dev_warn(mdev->sdev->parent, "state %d should be %d\n", - mdev->state, MIC_SUSPENDED); - return; - } - - /* Make sure firmware is ready */ - if (!mdev->ops->is_fw_ready(mdev)) - mic_stop(mdev, true); - - mutex_lock(&mdev->mic_mutex); - mic_set_state(mdev, MIC_OFFLINE); - mutex_unlock(&mdev->mic_mutex); -} + struct mic_device *mdev = cosmdev_to_mdev(cdev); + const char *string = "??"; -/** - * mic_prepare_suspend - Handle suspend notification for the MIC device. - * @mdev: pointer to mic_device instance - * - * RETURNS: None. - */ -void mic_prepare_suspend(struct mic_device *mdev) -{ - unsigned long timeout; - -#define MIC_SUSPEND_TIMEOUT (60 * HZ) - - mutex_lock(&mdev->mic_mutex); - switch (mdev->state) { - case MIC_OFFLINE: - /* - * Card is already offline. Set state to MIC_SUSPENDED - * to prevent users from booting the card. - */ - mic_set_state(mdev, MIC_SUSPENDED); - mutex_unlock(&mdev->mic_mutex); + switch (mdev->stepping) { + case MIC_A0_STEP: + string = "A0"; break; - case MIC_ONLINE: - /* - * Card is online. Set state to MIC_SUSPENDING and notify - * MIC user space daemon which will issue card - * shutdown and reset. - */ - mic_set_state(mdev, MIC_SUSPENDING); - mutex_unlock(&mdev->mic_mutex); - timeout = wait_for_completion_timeout(&mdev->reset_wait, - MIC_SUSPEND_TIMEOUT); - /* Force reset the card if the shutdown completion timed out */ - if (!timeout) { - mutex_lock(&mdev->mic_mutex); - mic_set_state(mdev, MIC_SUSPENDED); - mutex_unlock(&mdev->mic_mutex); - mic_stop(mdev, true); - } + case MIC_B0_STEP: + string = "B0"; break; - case MIC_SHUTTING_DOWN: - /* - * Card is shutting down. Set state to MIC_SUSPENDED - * to prevent further boot of the card. - */ - mic_set_state(mdev, MIC_SUSPENDED); - mutex_unlock(&mdev->mic_mutex); - timeout = wait_for_completion_timeout(&mdev->reset_wait, - MIC_SUSPEND_TIMEOUT); - /* Force reset the card if the shutdown completion timed out */ - if (!timeout) - mic_stop(mdev, true); + case MIC_B1_STEP: + string = "B1"; + break; + case MIC_C0_STEP: + string = "C0"; break; default: - mutex_unlock(&mdev->mic_mutex); break; } + return scnprintf(buf, PAGE_SIZE, "%s\n", string); } -/** - * mic_suspend - Initiate MIC suspend. Suspend merely issues card shutdown. - * @mdev: pointer to mic_device instance - * - * RETURNS: None. - */ -void mic_suspend(struct mic_device *mdev) +static struct mic_mw *_mic_aper(struct cosm_device *cdev) { - struct mic_bootparam *bootparam = mdev->dp; - s8 db = bootparam->h2c_shutdown_db; + struct mic_device *mdev = cosmdev_to_mdev(cdev); - mutex_lock(&mdev->mic_mutex); - if (MIC_SUSPENDING == mdev->state && db != -1) { - bootparam->shutdown_card = 1; - mdev->ops->send_intr(mdev, db); - mic_set_state(mdev, MIC_SUSPENDED); - } - mutex_unlock(&mdev->mic_mutex); + return &mdev->aper; } + +struct cosm_hw_ops cosm_hw_ops = { + .reset = _mic_reset, + .force_reset = _mic_reset, + .post_reset = NULL, + .ready = _mic_ready, + .start = _mic_start, + .stop = _mic_stop, + .family = _mic_family, + .stepping = _mic_stepping, + .aper = _mic_aper, +}; diff --git a/kernel/drivers/misc/mic/host/mic_debugfs.c b/kernel/drivers/misc/mic/host/mic_debugfs.c index 687e9aacf..105816007 100644 --- a/kernel/drivers/misc/mic/host/mic_debugfs.c +++ b/kernel/drivers/misc/mic/host/mic_debugfs.c @@ -31,71 +31,6 @@ /* Debugfs parent dir */ static struct dentry *mic_dbg; -/** - * mic_log_buf_show - Display MIC kernel log buffer. - * - * log_buf addr/len is read from System.map by user space - * and populated in sysfs entries. - */ -static int mic_log_buf_show(struct seq_file *s, void *unused) -{ - void __iomem *log_buf_va; - int __iomem *log_buf_len_va; - struct mic_device *mdev = s->private; - void *kva; - int size; - unsigned long aper_offset; - - if (!mdev || !mdev->log_buf_addr || !mdev->log_buf_len) - goto done; - /* - * Card kernel will never be relocated and any kernel text/data mapping - * can be translated to phys address by subtracting __START_KERNEL_map. - */ - aper_offset = (unsigned long)mdev->log_buf_len - __START_KERNEL_map; - log_buf_len_va = mdev->aper.va + aper_offset; - aper_offset = (unsigned long)mdev->log_buf_addr - __START_KERNEL_map; - log_buf_va = mdev->aper.va + aper_offset; - size = ioread32(log_buf_len_va); - - kva = kmalloc(size, GFP_KERNEL); - if (!kva) - goto done; - mutex_lock(&mdev->mic_mutex); - memcpy_fromio(kva, log_buf_va, size); - switch (mdev->state) { - case MIC_ONLINE: - /* Fall through */ - case MIC_SHUTTING_DOWN: - seq_write(s, kva, size); - break; - default: - break; - } - mutex_unlock(&mdev->mic_mutex); - kfree(kva); -done: - return 0; -} - -static int mic_log_buf_open(struct inode *inode, struct file *file) -{ - return single_open(file, mic_log_buf_show, inode->i_private); -} - -static int mic_log_buf_release(struct inode *inode, struct file *file) -{ - return single_release(inode, file); -} - -static const struct file_operations log_buf_ops = { - .owner = THIS_MODULE, - .open = mic_log_buf_open, - .read = seq_read, - .llseek = seq_lseek, - .release = mic_log_buf_release -}; - static int mic_smpt_show(struct seq_file *s, void *pos) { int i; @@ -138,32 +73,6 @@ static const struct file_operations smpt_file_ops = { .release = mic_smpt_debug_release }; -static int mic_soft_reset_show(struct seq_file *s, void *pos) -{ - struct mic_device *mdev = s->private; - - mic_stop(mdev, true); - return 0; -} - -static int mic_soft_reset_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, mic_soft_reset_show, inode->i_private); -} - -static int mic_soft_reset_debug_release(struct inode *inode, struct file *file) -{ - return single_release(inode, file); -} - -static const struct file_operations soft_reset_ops = { - .owner = THIS_MODULE, - .open = mic_soft_reset_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = mic_soft_reset_debug_release -}; - static int mic_post_code_show(struct seq_file *s, void *pos) { struct mic_device *mdev = s->private; @@ -204,16 +113,19 @@ static int mic_dp_show(struct seq_file *s, void *pos) seq_printf(s, "Bootparam: magic 0x%x\n", bootparam->magic); - seq_printf(s, "Bootparam: h2c_shutdown_db %d\n", - bootparam->h2c_shutdown_db); seq_printf(s, "Bootparam: h2c_config_db %d\n", bootparam->h2c_config_db); - seq_printf(s, "Bootparam: c2h_shutdown_db %d\n", - bootparam->c2h_shutdown_db); - seq_printf(s, "Bootparam: shutdown_status %d\n", - bootparam->shutdown_status); - seq_printf(s, "Bootparam: shutdown_card %d\n", - bootparam->shutdown_card); + seq_printf(s, "Bootparam: node_id %d\n", + bootparam->node_id); + seq_printf(s, "Bootparam: c2h_scif_db %d\n", + bootparam->c2h_scif_db); + seq_printf(s, "Bootparam: h2c_scif_db %d\n", + bootparam->h2c_scif_db); + seq_printf(s, "Bootparam: scif_host_dma_addr 0x%llx\n", + bootparam->scif_host_dma_addr); + seq_printf(s, "Bootparam: scif_card_dma_addr 0x%llx\n", + bootparam->scif_card_dma_addr); + for (i = sizeof(*bootparam); i < MIC_DP_SIZE; i += mic_total_desc_size(d)) { @@ -379,8 +291,7 @@ static int mic_msi_irq_info_show(struct seq_file *s, void *pos) int i, j; u16 entry; u16 vector; - struct pci_dev *pdev = container_of(mdev->sdev->parent, - struct pci_dev, dev); + struct pci_dev *pdev = mdev->pdev; if (pci_dev_msi_enabled(pdev)) { for (i = 0; i < mdev->irq_info.num_vectors; i++) { @@ -441,20 +352,18 @@ static const struct file_operations msi_irq_info_ops = { */ void mic_create_debug_dir(struct mic_device *mdev) { + char name[16]; + if (!mic_dbg) return; - mdev->dbg_dir = debugfs_create_dir(dev_name(mdev->sdev), mic_dbg); + scnprintf(name, sizeof(name), "mic%d", mdev->id); + mdev->dbg_dir = debugfs_create_dir(name, mic_dbg); if (!mdev->dbg_dir) return; - debugfs_create_file("log_buf", 0444, mdev->dbg_dir, mdev, &log_buf_ops); - debugfs_create_file("smpt", 0444, mdev->dbg_dir, mdev, &smpt_file_ops); - debugfs_create_file("soft_reset", 0444, mdev->dbg_dir, mdev, - &soft_reset_ops); - debugfs_create_file("post_code", 0444, mdev->dbg_dir, mdev, &post_code_ops); diff --git a/kernel/drivers/misc/mic/host/mic_device.h b/kernel/drivers/misc/mic/host/mic_device.h index 016bd15a7..461184a12 100644 --- a/kernel/drivers/misc/mic/host/mic_device.h +++ b/kernel/drivers/misc/mic/host/mic_device.h @@ -26,21 +26,12 @@ #include <linux/notifier.h> #include <linux/irqreturn.h> #include <linux/dmaengine.h> +#include <linux/miscdevice.h> #include <linux/mic_bus.h> - +#include "../bus/scif_bus.h" +#include "../bus/cosm_bus.h" #include "mic_intr.h" -/* The maximum number of MIC devices supported in a single host system. */ -#define MIC_MAX_NUM_DEVS 256 - -/** - * enum mic_hw_family - The hardware family to which a device belongs. - */ -enum mic_hw_family { - MIC_FAMILY_X100 = 0, - MIC_FAMILY_UNKNOWN -}; - /** * enum mic_stepping - MIC stepping ids. */ @@ -51,6 +42,8 @@ enum mic_stepping { MIC_C0_STEP = 0x20, }; +extern struct cosm_hw_ops cosm_hw_ops; + /** * struct mic_device - MIC device information for each card. * @@ -60,8 +53,7 @@ enum mic_stepping { * @ops: MIC HW specific operations. * @id: The unique device id for this MIC device. * @stepping: Stepping ID. - * @attr_group: Pointer to list of sysfs attribute groups. - * @sdev: Device for sysfs entries. + * @pdev: Underlying PCI device. * @mic_mutex: Mutex for synchronizing access to mic_device. * @intr_ops: HW specific interrupt operations. * @smpt_ops: Hardware specific SMPT operations. @@ -69,28 +61,17 @@ enum mic_stepping { * @intr_info: H/W specific interrupt information. * @irq_info: The OS specific irq information * @dbg_dir: debugfs directory of this MIC device. - * @cmdline: Kernel command line. - * @firmware: Firmware file name. - * @ramdisk: Ramdisk file name. - * @bootmode: Boot mode i.e. "linux" or "elf" for flash updates. * @bootaddr: MIC boot address. - * @reset_trigger_work: Work for triggering reset requests. - * @shutdown_work: Work for handling shutdown interrupts. - * @state: MIC state. - * @shutdown_status: MIC status reported by card for shutdown/crashes. - * @state_sysfs: Sysfs dirent for notifying ring 3 about MIC state changes. - * @reset_wait: Waitqueue for sleeping while reset completes. - * @log_buf_addr: Log buffer address for MIC. - * @log_buf_len: Log buffer length address for MIC. * @dp: virtio device page * @dp_dma_addr: virtio device page DMA address. - * @shutdown_db: shutdown doorbell. - * @shutdown_cookie: shutdown cookie. - * @cdev: Character device for MIC. + * @name: name for the misc char device + * @miscdev: registered misc char device * @vdev_list: list of virtio devices. - * @pm_notifier: Handles PM notifications from the OS. * @dma_mbdev: MIC BUS DMA device. - * @dma_ch: DMA channel reserved by this driver for use by virtio devices. + * @dma_ch - Array of DMA channels + * @num_dma_ch - Number of DMA channels available + * @scdev: SCIF device on the SCIF virtual bus. + * @cosm_dev: COSM device */ struct mic_device { struct mic_mw mmio; @@ -99,8 +80,7 @@ struct mic_device { struct mic_hw_ops *ops; int id; enum mic_stepping stepping; - const struct attribute_group **attr_group; - struct device *sdev; + struct pci_dev *pdev; struct mutex mic_mutex; struct mic_hw_intr_ops *intr_ops; struct mic_smpt_ops *smpt_ops; @@ -108,28 +88,17 @@ struct mic_device { struct mic_intr_info *intr_info; struct mic_irq_info irq_info; struct dentry *dbg_dir; - char *cmdline; - char *firmware; - char *ramdisk; - char *bootmode; u32 bootaddr; - struct work_struct reset_trigger_work; - struct work_struct shutdown_work; - u8 state; - u8 shutdown_status; - struct kernfs_node *state_sysfs; - struct completion reset_wait; - void *log_buf_addr; - int *log_buf_len; void *dp; dma_addr_t dp_dma_addr; - int shutdown_db; - struct mic_irq *shutdown_cookie; - struct cdev cdev; + char name[16]; + struct miscdevice miscdev; struct list_head vdev_list; - struct notifier_block pm_notifier; struct mbus_device *dma_mbdev; - struct dma_chan *dma_ch; + struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN]; + int num_dma_ch; + struct scif_hw_dev *scdev; + struct cosm_device *cosm_dev; }; /** @@ -195,37 +164,9 @@ mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset) iowrite32(val, mw->va + offset); } -static inline struct dma_chan *mic_request_dma_chan(struct mic_device *mdev) -{ - dma_cap_mask_t mask; - struct dma_chan *chan; - - dma_cap_zero(mask); - dma_cap_set(DMA_MEMCPY, mask); - chan = dma_request_channel(mask, mdev->ops->dma_filter, - mdev->sdev->parent); - if (chan) - return chan; - dev_err(mdev->sdev->parent, "%s %d unable to acquire channel\n", - __func__, __LINE__); - return NULL; -} - -void mic_sysfs_init(struct mic_device *mdev); -int mic_start(struct mic_device *mdev, const char *buf); -void mic_stop(struct mic_device *mdev, bool force); -void mic_shutdown(struct mic_device *mdev); -void mic_reset_delayed_work(struct work_struct *work); -void mic_reset_trigger_work(struct work_struct *work); -void mic_shutdown_work(struct work_struct *work); void mic_bootparam_init(struct mic_device *mdev); -void mic_set_state(struct mic_device *mdev, u8 state); -void mic_set_shutdown_status(struct mic_device *mdev, u8 status); void mic_create_debug_dir(struct mic_device *dev); void mic_delete_debug_dir(struct mic_device *dev); void __init mic_init_debugfs(void); void mic_exit_debugfs(void); -void mic_prepare_suspend(struct mic_device *mdev); -void mic_complete_resume(struct mic_device *mdev); -void mic_suspend(struct mic_device *mdev); #endif diff --git a/kernel/drivers/misc/mic/host/mic_fops.c b/kernel/drivers/misc/mic/host/mic_fops.c index 85776d732..8cc1d90cd 100644 --- a/kernel/drivers/misc/mic/host/mic_fops.c +++ b/kernel/drivers/misc/mic/host/mic_fops.c @@ -30,8 +30,8 @@ int mic_open(struct inode *inode, struct file *f) { struct mic_vdev *mvdev; - struct mic_device *mdev = container_of(inode->i_cdev, - struct mic_device, cdev); + struct mic_device *mdev = container_of(f->private_data, + struct mic_device, miscdev); mvdev = kzalloc(sizeof(*mvdev), GFP_KERNEL); if (!mvdev) diff --git a/kernel/drivers/misc/mic/host/mic_intr.c b/kernel/drivers/misc/mic/host/mic_intr.c index b4ca6c884..08ca3e372 100644 --- a/kernel/drivers/misc/mic/host/mic_intr.c +++ b/kernel/drivers/misc/mic/host/mic_intr.c @@ -30,8 +30,7 @@ static irqreturn_t mic_thread_fn(int irq, void *dev) struct mic_intr_info *intr_info = mdev->intr_info; struct mic_irq_info *irq_info = &mdev->irq_info; struct mic_intr_cb *intr_cb; - struct pci_dev *pdev = container_of(mdev->sdev->parent, - struct pci_dev, dev); + struct pci_dev *pdev = mdev->pdev; int i; spin_lock(&irq_info->mic_thread_lock); @@ -57,8 +56,7 @@ static irqreturn_t mic_interrupt(int irq, void *dev) struct mic_intr_info *intr_info = mdev->intr_info; struct mic_irq_info *irq_info = &mdev->irq_info; struct mic_intr_cb *intr_cb; - struct pci_dev *pdev = container_of(mdev->sdev->parent, - struct pci_dev, dev); + struct pci_dev *pdev = mdev->pdev; u32 mask; int i; @@ -83,7 +81,7 @@ static irqreturn_t mic_interrupt(int irq, void *dev) /* Return the interrupt offset from the index. Index is 0 based. */ static u16 mic_map_src_to_offset(struct mic_device *mdev, - int intr_src, enum mic_intr_type type) + int intr_src, enum mic_intr_type type) { if (type >= MIC_NUM_INTR_TYPES) return MIC_NUM_OFFSETS; @@ -214,7 +212,7 @@ static int mic_setup_msix(struct mic_device *mdev, struct pci_dev *pdev) mdev->irq_info.msix_entries[i].entry = i; rc = pci_enable_msix_exact(pdev, mdev->irq_info.msix_entries, - MIC_MIN_MSIX); + MIC_MIN_MSIX); if (rc) { dev_dbg(&pdev->dev, "Error enabling MSIx. rc = %d\n", rc); goto err_enable_msix; @@ -229,7 +227,7 @@ static int mic_setup_msix(struct mic_device *mdev, struct pci_dev *pdev) goto err_nomem2; } - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "%d MSIx irqs setup\n", mdev->irq_info.num_vectors); return 0; err_nomem2: @@ -281,7 +279,6 @@ static void mic_release_callbacks(struct mic_device *mdev) spin_lock(&mdev->irq_info.mic_thread_lock); spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags); for (i = 0; i < MIC_NUM_OFFSETS; i++) { - if (list_empty(&mdev->irq_info.cb_list[i])) break; @@ -443,12 +440,11 @@ mic_request_threaded_irq(struct mic_device *mdev, unsigned long cookie = 0; u16 entry; struct mic_intr_cb *intr_cb; - struct pci_dev *pdev = container_of(mdev->sdev->parent, - struct pci_dev, dev); + struct pci_dev *pdev = mdev->pdev; offset = mic_map_src_to_offset(mdev, intr_src, type); if (offset >= MIC_NUM_OFFSETS) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "Error mapping index %d to a valid source id.\n", intr_src); rc = -EINVAL; @@ -458,7 +454,7 @@ mic_request_threaded_irq(struct mic_device *mdev, if (mdev->irq_info.num_vectors > 1) { msix = mic_get_available_vector(mdev); if (!msix) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "No MSIx vectors available for use.\n"); rc = -ENOSPC; goto err; @@ -467,7 +463,7 @@ mic_request_threaded_irq(struct mic_device *mdev, rc = request_threaded_irq(msix->vector, handler, thread_fn, 0, name, data); if (rc) { - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "request irq failed rc = %d\n", rc); goto err; } @@ -476,13 +472,13 @@ mic_request_threaded_irq(struct mic_device *mdev, mdev->intr_ops->program_msi_to_src_map(mdev, entry, offset, true); cookie = MK_COOKIE(entry, offset); - dev_dbg(mdev->sdev->parent, "irq: %d assigned for src: %d\n", + dev_dbg(&mdev->pdev->dev, "irq: %d assigned for src: %d\n", msix->vector, intr_src); } else { intr_cb = mic_register_intr_callback(mdev, offset, handler, thread_fn, data); if (IS_ERR(intr_cb)) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "No available callback entries for use\n"); rc = PTR_ERR(intr_cb); goto err; @@ -495,7 +491,7 @@ mic_request_threaded_irq(struct mic_device *mdev, entry, offset, true); } cookie = MK_COOKIE(entry, intr_cb->cb_id); - dev_dbg(mdev->sdev->parent, "callback %d registered for src: %d\n", + dev_dbg(&mdev->pdev->dev, "callback %d registered for src: %d\n", intr_cb->cb_id, intr_src); } return (struct mic_irq *)cookie; @@ -515,20 +511,19 @@ err: * returns: none. */ void mic_free_irq(struct mic_device *mdev, - struct mic_irq *cookie, void *data) + struct mic_irq *cookie, void *data) { u32 offset; u32 entry; u8 src_id; unsigned int irq; - struct pci_dev *pdev = container_of(mdev->sdev->parent, - struct pci_dev, dev); + struct pci_dev *pdev = mdev->pdev; entry = GET_ENTRY((unsigned long)cookie); offset = GET_OFFSET((unsigned long)cookie); if (mdev->irq_info.num_vectors > 1) { if (entry >= mdev->irq_info.num_vectors) { - dev_warn(mdev->sdev->parent, + dev_warn(&mdev->pdev->dev, "entry %d should be < num_irq %d\n", entry, mdev->irq_info.num_vectors); return; @@ -539,12 +534,12 @@ void mic_free_irq(struct mic_device *mdev, mdev->intr_ops->program_msi_to_src_map(mdev, entry, offset, false); - dev_dbg(mdev->sdev->parent, "irq: %d freed\n", irq); + dev_dbg(&mdev->pdev->dev, "irq: %d freed\n", irq); } else { irq = pdev->irq; src_id = mic_unregister_intr_callback(mdev, offset); if (src_id >= MIC_NUM_OFFSETS) { - dev_warn(mdev->sdev->parent, "Error unregistering callback\n"); + dev_warn(&mdev->pdev->dev, "Error unregistering callback\n"); return; } if (pci_dev_msi_enabled(pdev)) { @@ -552,7 +547,7 @@ void mic_free_irq(struct mic_device *mdev, mdev->intr_ops->program_msi_to_src_map(mdev, entry, src_id, false); } - dev_dbg(mdev->sdev->parent, "callback %d unregistered for src: %d\n", + dev_dbg(&mdev->pdev->dev, "callback %d unregistered for src: %d\n", offset, src_id); } } @@ -579,7 +574,7 @@ int mic_setup_interrupts(struct mic_device *mdev, struct pci_dev *pdev) rc = mic_setup_intx(mdev, pdev); if (rc) { - dev_err(mdev->sdev->parent, "no usable interrupts\n"); + dev_err(&mdev->pdev->dev, "no usable interrupts\n"); return rc; } done: @@ -635,8 +630,7 @@ void mic_free_interrupts(struct mic_device *mdev, struct pci_dev *pdev) void mic_intr_restore(struct mic_device *mdev) { int entry, offset; - struct pci_dev *pdev = container_of(mdev->sdev->parent, - struct pci_dev, dev); + struct pci_dev *pdev = mdev->pdev; if (!pci_dev_msi_enabled(pdev)) return; diff --git a/kernel/drivers/misc/mic/host/mic_intr.h b/kernel/drivers/misc/mic/host/mic_intr.h index 9f783d4ad..cce28824d 100644 --- a/kernel/drivers/misc/mic/host/mic_intr.h +++ b/kernel/drivers/misc/mic/host/mic_intr.h @@ -28,8 +28,9 @@ * 3 for virtio network, console and block devices. * 1 for card shutdown notifications. * 4 for host owned DMA channels. + * 1 for SCIF */ -#define MIC_MIN_MSIX 8 +#define MIC_MIN_MSIX 9 #define MIC_NUM_OFFSETS 32 /** diff --git a/kernel/drivers/misc/mic/host/mic_main.c b/kernel/drivers/misc/mic/host/mic_main.c index ab37a3117..153894e7e 100644 --- a/kernel/drivers/misc/mic/host/mic_main.c +++ b/kernel/drivers/misc/mic/host/mic_main.c @@ -16,17 +16,11 @@ * the file called "COPYING". * * Intel MIC Host driver. - * - * Global TODO's across the driver to be added after initial base - * patches are accepted upstream: - * 1) Enable DMA support. - * 2) Enable per vring interrupt support. */ #include <linux/fs.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/poll.h> -#include <linux/suspend.h> #include <linux/mic_common.h> #include "../common/mic_dev.h" @@ -63,8 +57,6 @@ MODULE_DEVICE_TABLE(pci, mic_pci_tbl); /* ID allocator for MIC devices */ static struct ida g_mic_ida; -/* Class of MIC devices for sysfs accessibility. */ -static struct class *g_mic_class; /* Base device node number for MIC devices */ static dev_t g_mic_devno; @@ -81,17 +73,14 @@ static const struct file_operations mic_fops = { static int mic_dp_init(struct mic_device *mdev) { mdev->dp = kzalloc(MIC_DP_SIZE, GFP_KERNEL); - if (!mdev->dp) { - dev_err(mdev->sdev->parent, "%s %d err %d\n", - __func__, __LINE__, -ENOMEM); + if (!mdev->dp) return -ENOMEM; - } mdev->dp_dma_addr = mic_map_single(mdev, mdev->dp, MIC_DP_SIZE); if (mic_map_error(mdev->dp_dma_addr)) { kfree(mdev->dp); - dev_err(mdev->sdev->parent, "%s %d err %d\n", + dev_err(&mdev->pdev->dev, "%s %d err %d\n", __func__, __LINE__, -ENOMEM); return -ENOMEM; } @@ -108,30 +97,6 @@ static void mic_dp_uninit(struct mic_device *mdev) } /** - * mic_shutdown_db - Shutdown doorbell interrupt handler. - */ -static irqreturn_t mic_shutdown_db(int irq, void *data) -{ - struct mic_device *mdev = data; - struct mic_bootparam *bootparam = mdev->dp; - - mdev->ops->intr_workarounds(mdev); - - switch (bootparam->shutdown_status) { - case MIC_HALTED: - case MIC_POWER_OFF: - case MIC_RESTART: - /* Fall through */ - case MIC_CRASHED: - schedule_work(&mdev->shutdown_work); - break; - default: - break; - }; - return IRQ_HANDLED; -} - -/** * mic_ops_init: Initialize HW specific operation tables. * * @mdev: pointer to mic_device instance @@ -188,43 +153,6 @@ static enum mic_hw_family mic_get_family(struct pci_dev *pdev) } /** -* mic_pm_notifier: Notifier callback function that handles -* PM notifications. -* -* @notifier_block: The notifier structure. -* @pm_event: The event for which the driver was notified. -* @unused: Meaningless. Always NULL. -* -* returns NOTIFY_DONE -*/ -static int mic_pm_notifier(struct notifier_block *notifier, - unsigned long pm_event, void *unused) -{ - struct mic_device *mdev = container_of(notifier, - struct mic_device, pm_notifier); - - switch (pm_event) { - case PM_HIBERNATION_PREPARE: - /* Fall through */ - case PM_SUSPEND_PREPARE: - mic_prepare_suspend(mdev); - break; - case PM_POST_HIBERNATION: - /* Fall through */ - case PM_POST_SUSPEND: - /* Fall through */ - case PM_POST_RESTORE: - mic_complete_resume(mdev); - break; - case PM_RESTORE_PREPARE: - break; - default: - break; - } - return NOTIFY_DONE; -} - -/** * mic_device_init - Allocates and initializes the MIC device structure * * @mdev: pointer to mic_device instance @@ -232,52 +160,16 @@ static int mic_pm_notifier(struct notifier_block *notifier, * * returns none. */ -static int +static void mic_device_init(struct mic_device *mdev, struct pci_dev *pdev) { - int rc; - + mdev->pdev = pdev; mdev->family = mic_get_family(pdev); mdev->stepping = pdev->revision; mic_ops_init(mdev); - mic_sysfs_init(mdev); mutex_init(&mdev->mic_mutex); mdev->irq_info.next_avail_src = 0; - INIT_WORK(&mdev->reset_trigger_work, mic_reset_trigger_work); - INIT_WORK(&mdev->shutdown_work, mic_shutdown_work); - init_completion(&mdev->reset_wait); INIT_LIST_HEAD(&mdev->vdev_list); - mdev->pm_notifier.notifier_call = mic_pm_notifier; - rc = register_pm_notifier(&mdev->pm_notifier); - if (rc) { - dev_err(&pdev->dev, "register_pm_notifier failed rc %d\n", - rc); - goto register_pm_notifier_fail; - } - return 0; -register_pm_notifier_fail: - flush_work(&mdev->shutdown_work); - flush_work(&mdev->reset_trigger_work); - return rc; -} - -/** - * mic_device_uninit - Frees resources allocated during mic_device_init(..) - * - * @mdev: pointer to mic_device instance - * - * returns none - */ -static void mic_device_uninit(struct mic_device *mdev) -{ - /* The cmdline sysfs entry might have allocated cmdline */ - kfree(mdev->cmdline); - kfree(mdev->firmware); - kfree(mdev->ramdisk); - kfree(mdev->bootmode); - flush_work(&mdev->reset_trigger_work); - flush_work(&mdev->shutdown_work); - unregister_pm_notifier(&mdev->pm_notifier); } /** @@ -289,7 +181,7 @@ static void mic_device_uninit(struct mic_device *mdev) * returns 0 on success, < 0 on failure. */ static int mic_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) + const struct pci_device_id *ent) { int rc; struct mic_device *mdev; @@ -307,16 +199,12 @@ static int mic_probe(struct pci_dev *pdev, goto ida_fail; } - rc = mic_device_init(mdev, pdev); - if (rc) { - dev_err(&pdev->dev, "mic_device_init failed rc %d\n", rc); - goto device_init_fail; - } + mic_device_init(mdev, pdev); rc = pci_enable_device(pdev); if (rc) { dev_err(&pdev->dev, "failed to enable pci device.\n"); - goto uninit_device; + goto ida_remove; } pci_set_master(pdev); @@ -365,61 +253,39 @@ static int mic_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, mdev); - mdev->sdev = device_create_with_groups(g_mic_class, &pdev->dev, - MKDEV(MAJOR(g_mic_devno), mdev->id), NULL, - mdev->attr_group, "mic%d", mdev->id); - if (IS_ERR(mdev->sdev)) { - rc = PTR_ERR(mdev->sdev); - dev_err(&pdev->dev, - "device_create_with_groups failed rc %d\n", rc); - goto smpt_uninit; - } - mdev->state_sysfs = sysfs_get_dirent(mdev->sdev->kobj.sd, "state"); - if (!mdev->state_sysfs) { - rc = -ENODEV; - dev_err(&pdev->dev, "sysfs_get_dirent failed rc %d\n", rc); - goto destroy_device; - } - rc = mic_dp_init(mdev); if (rc) { dev_err(&pdev->dev, "mic_dp_init failed rc %d\n", rc); - goto sysfs_put; - } - mutex_lock(&mdev->mic_mutex); - - mdev->shutdown_db = mic_next_db(mdev); - mdev->shutdown_cookie = mic_request_threaded_irq(mdev, mic_shutdown_db, - NULL, "shutdown-interrupt", mdev, - mdev->shutdown_db, MIC_INTR_DB); - if (IS_ERR(mdev->shutdown_cookie)) { - rc = PTR_ERR(mdev->shutdown_cookie); - mutex_unlock(&mdev->mic_mutex); - goto dp_uninit; + goto smpt_uninit; } - mutex_unlock(&mdev->mic_mutex); mic_bootparam_init(mdev); mic_create_debug_dir(mdev); - cdev_init(&mdev->cdev, &mic_fops); - mdev->cdev.owner = THIS_MODULE; - rc = cdev_add(&mdev->cdev, MKDEV(MAJOR(g_mic_devno), mdev->id), 1); + + mdev->miscdev.minor = MISC_DYNAMIC_MINOR; + snprintf(mdev->name, sizeof(mdev->name), "mic%d", mdev->id); + mdev->miscdev.name = mdev->name; + mdev->miscdev.fops = &mic_fops; + mdev->miscdev.parent = &mdev->pdev->dev; + rc = misc_register(&mdev->miscdev); if (rc) { - dev_err(&pdev->dev, "cdev_add err id %d rc %d\n", mdev->id, rc); + dev_err(&pdev->dev, "misc_register err id %d rc %d\n", + mdev->id, rc); goto cleanup_debug_dir; } + + mdev->cosm_dev = cosm_register_device(&mdev->pdev->dev, &cosm_hw_ops); + if (IS_ERR(mdev->cosm_dev)) { + rc = PTR_ERR(mdev->cosm_dev); + dev_err(&pdev->dev, "cosm_add_device failed rc %d\n", rc); + goto misc_dereg; + } return 0; +misc_dereg: + misc_deregister(&mdev->miscdev); cleanup_debug_dir: mic_delete_debug_dir(mdev); - mutex_lock(&mdev->mic_mutex); - mic_free_irq(mdev, mdev->shutdown_cookie, mdev); - mutex_unlock(&mdev->mic_mutex); -dp_uninit: mic_dp_uninit(mdev); -sysfs_put: - sysfs_put(mdev->state_sysfs); -destroy_device: - device_destroy(g_mic_class, MKDEV(MAJOR(g_mic_devno), mdev->id)); smpt_uninit: mic_smpt_uninit(mdev); free_interrupts: @@ -432,9 +298,7 @@ release_regions: pci_release_regions(pdev); disable_device: pci_disable_device(pdev); -uninit_device: - mic_device_uninit(mdev); -device_init_fail: +ida_remove: ida_simple_remove(&g_mic_ida, mdev->id); ida_fail: kfree(mdev); @@ -458,26 +322,20 @@ static void mic_remove(struct pci_dev *pdev) if (!mdev) return; - mic_stop(mdev, false); - cdev_del(&mdev->cdev); + cosm_unregister_device(mdev->cosm_dev); + misc_deregister(&mdev->miscdev); mic_delete_debug_dir(mdev); - mutex_lock(&mdev->mic_mutex); - mic_free_irq(mdev, mdev->shutdown_cookie, mdev); - mutex_unlock(&mdev->mic_mutex); - flush_work(&mdev->shutdown_work); mic_dp_uninit(mdev); - sysfs_put(mdev->state_sysfs); - device_destroy(g_mic_class, MKDEV(MAJOR(g_mic_devno), mdev->id)); mic_smpt_uninit(mdev); mic_free_interrupts(mdev, pdev); - iounmap(mdev->mmio.va); iounmap(mdev->aper.va); - mic_device_uninit(mdev); + iounmap(mdev->mmio.va); pci_release_regions(pdev); pci_disable_device(pdev); ida_simple_remove(&g_mic_ida, mdev->id); kfree(mdev); } + static struct pci_driver mic_driver = { .name = mic_driver_name, .id_table = mic_pci_tbl, @@ -490,31 +348,23 @@ static int __init mic_init(void) int ret; ret = alloc_chrdev_region(&g_mic_devno, 0, - MIC_MAX_NUM_DEVS, mic_driver_name); + MIC_MAX_NUM_DEVS, mic_driver_name); if (ret) { pr_err("alloc_chrdev_region failed ret %d\n", ret); goto error; } - g_mic_class = class_create(THIS_MODULE, mic_driver_name); - if (IS_ERR(g_mic_class)) { - ret = PTR_ERR(g_mic_class); - pr_err("class_create failed ret %d\n", ret); - goto cleanup_chrdev; - } - mic_init_debugfs(); ida_init(&g_mic_ida); ret = pci_register_driver(&mic_driver); if (ret) { pr_err("pci_register_driver failed ret %d\n", ret); - goto cleanup_debugfs; + goto cleanup_chrdev; } return ret; -cleanup_debugfs: - mic_exit_debugfs(); - class_destroy(g_mic_class); cleanup_chrdev: + ida_destroy(&g_mic_ida); + mic_exit_debugfs(); unregister_chrdev_region(g_mic_devno, MIC_MAX_NUM_DEVS); error: return ret; @@ -525,7 +375,6 @@ static void __exit mic_exit(void) pci_unregister_driver(&mic_driver); ida_destroy(&g_mic_ida); mic_exit_debugfs(); - class_destroy(g_mic_class); unregister_chrdev_region(g_mic_devno, MIC_MAX_NUM_DEVS); } diff --git a/kernel/drivers/misc/mic/host/mic_smpt.c b/kernel/drivers/misc/mic/host/mic_smpt.c index fae474c48..c3f958580 100644 --- a/kernel/drivers/misc/mic/host/mic_smpt.c +++ b/kernel/drivers/misc/mic/host/mic_smpt.c @@ -76,7 +76,7 @@ mic_is_system_addr(struct mic_device *mdev, dma_addr_t pa) /* Populate an SMPT entry and update the reference counts. */ static void mic_add_smpt_entry(int spt, s64 *ref, u64 addr, - int entries, struct mic_device *mdev) + int entries, struct mic_device *mdev) { struct mic_smpt_info *smpt_info = mdev->smpt; int i; @@ -97,7 +97,7 @@ static void mic_add_smpt_entry(int spt, s64 *ref, u64 addr, * for a given DMA address and size. */ static dma_addr_t mic_smpt_op(struct mic_device *mdev, u64 dma_addr, - int entries, s64 *ref, size_t size) + int entries, s64 *ref, size_t size) { int spt; int ae = 0; @@ -148,7 +148,7 @@ found: * and the starting smpt address */ static int mic_get_smpt_ref_count(struct mic_device *mdev, dma_addr_t dma_addr, - size_t size, s64 *ref, u64 *smpt_start) + size_t size, s64 *ref, u64 *smpt_start) { u64 start = dma_addr; u64 end = dma_addr + size; @@ -174,15 +174,14 @@ static int mic_get_smpt_ref_count(struct mic_device *mdev, dma_addr_t dma_addr, * * returns a DMA address. */ -static dma_addr_t -mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr) +dma_addr_t mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr) { struct mic_smpt_info *smpt_info = mdev->smpt; int spt; dma_addr_t dma_addr; if (!mic_is_system_addr(mdev, mic_addr)) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "mic_addr is invalid. mic_addr = 0x%llx\n", mic_addr); return -EINVAL; } @@ -214,12 +213,12 @@ dma_addr_t mic_map(struct mic_device *mdev, dma_addr_t dma_addr, size_t size) if (!size || size > mic_max_system_memory(mdev)) return mic_addr; - ref = kmalloc(mdev->smpt->info.num_reg * sizeof(s64), GFP_KERNEL); + ref = kmalloc_array(mdev->smpt->info.num_reg, sizeof(s64), GFP_ATOMIC); if (!ref) return mic_addr; num_entries = mic_get_smpt_ref_count(mdev, dma_addr, size, - ref, &smpt_start); + ref, &smpt_start); /* Set the smpt table appropriately and get 16G aligned mic address */ mic_addr = mic_smpt_op(mdev, smpt_start, num_entries, ref, size); @@ -232,7 +231,7 @@ dma_addr_t mic_map(struct mic_device *mdev, dma_addr_t dma_addr, size_t size) * else generate mic_addr by adding the 16G offset in dma_addr */ if (!mic_addr && MIC_FAMILY_X100 == mdev->family) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "mic_map failed dma_addr 0x%llx size 0x%lx\n", dma_addr, size); return mic_addr; @@ -265,13 +264,13 @@ void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size) return; if (!mic_is_system_addr(mdev, mic_addr)) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "invalid address: 0x%llx\n", mic_addr); return; } spt = mic_sys_addr_to_smpt(mdev, mic_addr); - ref = kmalloc(mdev->smpt->info.num_reg * sizeof(s64), GFP_KERNEL); + ref = kmalloc_array(mdev->smpt->info.num_reg, sizeof(s64), GFP_ATOMIC); if (!ref) return; @@ -285,7 +284,7 @@ void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size) for (i = spt; i < spt + num_smpt; i++) { smpt_info->entry[i].ref_count -= ref[i - spt]; if (smpt_info->entry[i].ref_count < 0) - dev_warn(mdev->sdev->parent, + dev_warn(&mdev->pdev->dev, "ref count for entry %d is negative\n", i); } spin_unlock_irqrestore(&smpt_info->smpt_lock, flags); @@ -308,15 +307,14 @@ void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size) dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size) { dma_addr_t mic_addr = 0; - struct pci_dev *pdev = container_of(mdev->sdev->parent, - struct pci_dev, dev); + struct pci_dev *pdev = mdev->pdev; dma_addr_t dma_addr = pci_map_single(pdev, va, size, PCI_DMA_BIDIRECTIONAL); if (!pci_dma_mapping_error(pdev, dma_addr)) { mic_addr = mic_map(mdev, dma_addr, size); if (!mic_addr) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "mic_map failed dma_addr 0x%llx size 0x%lx\n", dma_addr, size); pci_unmap_single(pdev, dma_addr, @@ -340,8 +338,7 @@ dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size) void mic_unmap_single(struct mic_device *mdev, dma_addr_t mic_addr, size_t size) { - struct pci_dev *pdev = container_of(mdev->sdev->parent, - struct pci_dev, dev); + struct pci_dev *pdev = mdev->pdev; dma_addr_t dma_addr = mic_to_dma_addr(mdev, mic_addr); mic_unmap(mdev, mic_addr, size); pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); @@ -400,18 +397,18 @@ void mic_smpt_uninit(struct mic_device *mdev) struct mic_smpt_info *smpt_info = mdev->smpt; int i; - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "nodeid %d SMPT ref count %lld map %lld unmap %lld\n", mdev->id, smpt_info->ref_count, smpt_info->map_count, smpt_info->unmap_count); for (i = 0; i < smpt_info->info.num_reg; i++) { - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "SMPT entry[%d] dma_addr = 0x%llx ref_count = %lld\n", i, smpt_info->entry[i].dma_addr, smpt_info->entry[i].ref_count); if (smpt_info->entry[i].ref_count) - dev_warn(mdev->sdev->parent, + dev_warn(&mdev->pdev->dev, "ref count for entry %d is not zero\n", i); } kfree(smpt_info->entry); diff --git a/kernel/drivers/misc/mic/host/mic_smpt.h b/kernel/drivers/misc/mic/host/mic_smpt.h index 51970abfe..68721c6e7 100644 --- a/kernel/drivers/misc/mic/host/mic_smpt.h +++ b/kernel/drivers/misc/mic/host/mic_smpt.h @@ -78,6 +78,7 @@ void mic_unmap_single(struct mic_device *mdev, dma_addr_t mic_map(struct mic_device *mdev, dma_addr_t dma_addr, size_t size); void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size); +dma_addr_t mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr); /** * mic_map_error - Check a MIC address for errors. diff --git a/kernel/drivers/misc/mic/host/mic_sysfs.c b/kernel/drivers/misc/mic/host/mic_sysfs.c deleted file mode 100644 index 6dd864e4a..000000000 --- a/kernel/drivers/misc/mic/host/mic_sysfs.c +++ /dev/null @@ -1,459 +0,0 @@ -/* - * Intel MIC Platform Software Stack (MPSS) - * - * Copyright(c) 2013 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Intel MIC Host driver. - * - */ -#include <linux/pci.h> - -#include <linux/mic_common.h> -#include "../common/mic_dev.h" -#include "mic_device.h" - -/* - * A state-to-string lookup table, for exposing a human readable state - * via sysfs. Always keep in sync with enum mic_states - */ -static const char * const mic_state_string[] = { - [MIC_OFFLINE] = "offline", - [MIC_ONLINE] = "online", - [MIC_SHUTTING_DOWN] = "shutting_down", - [MIC_RESET_FAILED] = "reset_failed", - [MIC_SUSPENDING] = "suspending", - [MIC_SUSPENDED] = "suspended", -}; - -/* - * A shutdown-status-to-string lookup table, for exposing a human - * readable state via sysfs. Always keep in sync with enum mic_shutdown_status - */ -static const char * const mic_shutdown_status_string[] = { - [MIC_NOP] = "nop", - [MIC_CRASHED] = "crashed", - [MIC_HALTED] = "halted", - [MIC_POWER_OFF] = "poweroff", - [MIC_RESTART] = "restart", -}; - -void mic_set_shutdown_status(struct mic_device *mdev, u8 shutdown_status) -{ - dev_dbg(mdev->sdev->parent, "Shutdown Status %s -> %s\n", - mic_shutdown_status_string[mdev->shutdown_status], - mic_shutdown_status_string[shutdown_status]); - mdev->shutdown_status = shutdown_status; -} - -void mic_set_state(struct mic_device *mdev, u8 state) -{ - dev_dbg(mdev->sdev->parent, "State %s -> %s\n", - mic_state_string[mdev->state], - mic_state_string[state]); - mdev->state = state; - sysfs_notify_dirent(mdev->state_sysfs); -} - -static ssize_t -family_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - static const char x100[] = "x100"; - static const char unknown[] = "Unknown"; - const char *card = NULL; - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev) - return -EINVAL; - - switch (mdev->family) { - case MIC_FAMILY_X100: - card = x100; - break; - default: - card = unknown; - break; - } - return scnprintf(buf, PAGE_SIZE, "%s\n", card); -} -static DEVICE_ATTR_RO(family); - -static ssize_t -stepping_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - char *string = "??"; - - if (!mdev) - return -EINVAL; - - switch (mdev->stepping) { - case MIC_A0_STEP: - string = "A0"; - break; - case MIC_B0_STEP: - string = "B0"; - break; - case MIC_B1_STEP: - string = "B1"; - break; - case MIC_C0_STEP: - string = "C0"; - break; - default: - break; - } - return scnprintf(buf, PAGE_SIZE, "%s\n", string); -} -static DEVICE_ATTR_RO(stepping); - -static ssize_t -state_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev || mdev->state >= MIC_LAST) - return -EINVAL; - - return scnprintf(buf, PAGE_SIZE, "%s\n", - mic_state_string[mdev->state]); -} - -static ssize_t -state_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - int rc = 0; - struct mic_device *mdev = dev_get_drvdata(dev->parent); - if (!mdev) - return -EINVAL; - if (sysfs_streq(buf, "boot")) { - rc = mic_start(mdev, buf); - if (rc) { - dev_err(mdev->sdev->parent, - "mic_boot failed rc %d\n", rc); - count = rc; - } - goto done; - } - - if (sysfs_streq(buf, "reset")) { - schedule_work(&mdev->reset_trigger_work); - goto done; - } - - if (sysfs_streq(buf, "shutdown")) { - mic_shutdown(mdev); - goto done; - } - - if (sysfs_streq(buf, "suspend")) { - mic_suspend(mdev); - goto done; - } - - count = -EINVAL; -done: - return count; -} -static DEVICE_ATTR_RW(state); - -static ssize_t shutdown_status_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev || mdev->shutdown_status >= MIC_STATUS_LAST) - return -EINVAL; - - return scnprintf(buf, PAGE_SIZE, "%s\n", - mic_shutdown_status_string[mdev->shutdown_status]); -} -static DEVICE_ATTR_RO(shutdown_status); - -static ssize_t -cmdline_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - char *cmdline; - - if (!mdev) - return -EINVAL; - - cmdline = mdev->cmdline; - - if (cmdline) - return scnprintf(buf, PAGE_SIZE, "%s\n", cmdline); - return 0; -} - -static ssize_t -cmdline_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev) - return -EINVAL; - - mutex_lock(&mdev->mic_mutex); - kfree(mdev->cmdline); - - mdev->cmdline = kmalloc(count + 1, GFP_KERNEL); - if (!mdev->cmdline) { - count = -ENOMEM; - goto unlock; - } - - strncpy(mdev->cmdline, buf, count); - - if (mdev->cmdline[count - 1] == '\n') - mdev->cmdline[count - 1] = '\0'; - else - mdev->cmdline[count] = '\0'; -unlock: - mutex_unlock(&mdev->mic_mutex); - return count; -} -static DEVICE_ATTR_RW(cmdline); - -static ssize_t -firmware_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - char *firmware; - - if (!mdev) - return -EINVAL; - - firmware = mdev->firmware; - - if (firmware) - return scnprintf(buf, PAGE_SIZE, "%s\n", firmware); - return 0; -} - -static ssize_t -firmware_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev) - return -EINVAL; - - mutex_lock(&mdev->mic_mutex); - kfree(mdev->firmware); - - mdev->firmware = kmalloc(count + 1, GFP_KERNEL); - if (!mdev->firmware) { - count = -ENOMEM; - goto unlock; - } - strncpy(mdev->firmware, buf, count); - - if (mdev->firmware[count - 1] == '\n') - mdev->firmware[count - 1] = '\0'; - else - mdev->firmware[count] = '\0'; -unlock: - mutex_unlock(&mdev->mic_mutex); - return count; -} -static DEVICE_ATTR_RW(firmware); - -static ssize_t -ramdisk_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - char *ramdisk; - - if (!mdev) - return -EINVAL; - - ramdisk = mdev->ramdisk; - - if (ramdisk) - return scnprintf(buf, PAGE_SIZE, "%s\n", ramdisk); - return 0; -} - -static ssize_t -ramdisk_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev) - return -EINVAL; - - mutex_lock(&mdev->mic_mutex); - kfree(mdev->ramdisk); - - mdev->ramdisk = kmalloc(count + 1, GFP_KERNEL); - if (!mdev->ramdisk) { - count = -ENOMEM; - goto unlock; - } - - strncpy(mdev->ramdisk, buf, count); - - if (mdev->ramdisk[count - 1] == '\n') - mdev->ramdisk[count - 1] = '\0'; - else - mdev->ramdisk[count] = '\0'; -unlock: - mutex_unlock(&mdev->mic_mutex); - return count; -} -static DEVICE_ATTR_RW(ramdisk); - -static ssize_t -bootmode_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - char *bootmode; - - if (!mdev) - return -EINVAL; - - bootmode = mdev->bootmode; - - if (bootmode) - return scnprintf(buf, PAGE_SIZE, "%s\n", bootmode); - return 0; -} - -static ssize_t -bootmode_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev) - return -EINVAL; - - if (!sysfs_streq(buf, "linux") && !sysfs_streq(buf, "elf")) - return -EINVAL; - - mutex_lock(&mdev->mic_mutex); - kfree(mdev->bootmode); - - mdev->bootmode = kmalloc(count + 1, GFP_KERNEL); - if (!mdev->bootmode) { - count = -ENOMEM; - goto unlock; - } - - strncpy(mdev->bootmode, buf, count); - - if (mdev->bootmode[count - 1] == '\n') - mdev->bootmode[count - 1] = '\0'; - else - mdev->bootmode[count] = '\0'; -unlock: - mutex_unlock(&mdev->mic_mutex); - return count; -} -static DEVICE_ATTR_RW(bootmode); - -static ssize_t -log_buf_addr_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev) - return -EINVAL; - - return scnprintf(buf, PAGE_SIZE, "%p\n", mdev->log_buf_addr); -} - -static ssize_t -log_buf_addr_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - int ret; - unsigned long addr; - - if (!mdev) - return -EINVAL; - - ret = kstrtoul(buf, 16, &addr); - if (ret) - goto exit; - - mdev->log_buf_addr = (void *)addr; - ret = count; -exit: - return ret; -} -static DEVICE_ATTR_RW(log_buf_addr); - -static ssize_t -log_buf_len_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev) - return -EINVAL; - - return scnprintf(buf, PAGE_SIZE, "%p\n", mdev->log_buf_len); -} - -static ssize_t -log_buf_len_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - int ret; - unsigned long addr; - - if (!mdev) - return -EINVAL; - - ret = kstrtoul(buf, 16, &addr); - if (ret) - goto exit; - - mdev->log_buf_len = (int *)addr; - ret = count; -exit: - return ret; -} -static DEVICE_ATTR_RW(log_buf_len); - -static struct attribute *mic_default_attrs[] = { - &dev_attr_family.attr, - &dev_attr_stepping.attr, - &dev_attr_state.attr, - &dev_attr_shutdown_status.attr, - &dev_attr_cmdline.attr, - &dev_attr_firmware.attr, - &dev_attr_ramdisk.attr, - &dev_attr_bootmode.attr, - &dev_attr_log_buf_addr.attr, - &dev_attr_log_buf_len.attr, - - NULL -}; - -ATTRIBUTE_GROUPS(mic_default); - -void mic_sysfs_init(struct mic_device *mdev) -{ - mdev->attr_group = mic_default_groups; -} diff --git a/kernel/drivers/misc/mic/host/mic_virtio.c b/kernel/drivers/misc/mic/host/mic_virtio.c index a020e4eb4..58b107a24 100644 --- a/kernel/drivers/misc/mic/host/mic_virtio.c +++ b/kernel/drivers/misc/mic/host/mic_virtio.c @@ -23,7 +23,6 @@ #include <linux/uaccess.h> #include <linux/dmaengine.h> #include <linux/mic_common.h> - #include "../common/mic_dev.h" #include "mic_device.h" #include "mic_smpt.h" @@ -40,7 +39,7 @@ static int mic_sync_dma(struct mic_device *mdev, dma_addr_t dst, { int err = 0; struct dma_async_tx_descriptor *tx; - struct dma_chan *mic_ch = mdev->dma_ch; + struct dma_chan *mic_ch = mdev->dma_ch[0]; if (!mic_ch) { err = -EBUSY; @@ -62,7 +61,7 @@ static int mic_sync_dma(struct mic_device *mdev, dma_addr_t dst, } error: if (err) - dev_err(mdev->sdev->parent, "%s %d err %d\n", + dev_err(&mdev->pdev->dev, "%s %d err %d\n", __func__, __LINE__, err); return err; } @@ -80,7 +79,7 @@ static int mic_virtio_copy_to_user(struct mic_vdev *mvdev, void __user *ubuf, struct mic_device *mdev = mvdev->mdev; void __iomem *dbuf = mdev->aper.va + daddr; struct mic_vringh *mvr = &mvdev->mvr[vr_idx]; - size_t dma_alignment = 1 << mdev->dma_ch->device->copy_align; + size_t dma_alignment = 1 << mdev->dma_ch[0]->device->copy_align; size_t dma_offset; size_t partlen; int err; @@ -129,7 +128,7 @@ static int mic_virtio_copy_from_user(struct mic_vdev *mvdev, void __user *ubuf, struct mic_device *mdev = mvdev->mdev; void __iomem *dbuf = mdev->aper.va + daddr; struct mic_vringh *mvr = &mvdev->mvr[vr_idx]; - size_t dma_alignment = 1 << mdev->dma_ch->device->copy_align; + size_t dma_alignment = 1 << mdev->dma_ch[0]->device->copy_align; size_t partlen; int err; @@ -440,7 +439,7 @@ void mic_virtio_reset_devices(struct mic_device *mdev) struct list_head *pos, *tmp; struct mic_vdev *mvdev; - dev_dbg(mdev->sdev->parent, "%s\n", __func__); + dev_dbg(&mdev->pdev->dev, "%s\n", __func__); list_for_each_safe(pos, tmp, &mdev->vdev_list) { mvdev = list_entry(pos, struct mic_vdev, list); @@ -686,7 +685,7 @@ int mic_virtio_add_device(struct mic_vdev *mvdev, mvr->head = USHRT_MAX; mvr->mvdev = mvdev; mvr->vrh.notify = mic_notify; - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "%s %d index %d va %p info %p vr_size 0x%x\n", __func__, __LINE__, i, vr->va, vr->info, vr_size); mvr->buf = (void *)__get_free_pages(GFP_KERNEL, @@ -704,7 +703,7 @@ int mic_virtio_add_device(struct mic_vdev *mvdev, mvdev->virtio_db, MIC_INTR_DB); if (IS_ERR(mvdev->virtio_cookie)) { ret = PTR_ERR(mvdev->virtio_cookie); - dev_dbg(mdev->sdev->parent, "request irq failed\n"); + dev_dbg(&mdev->pdev->dev, "request irq failed\n"); goto err; } @@ -720,7 +719,7 @@ int mic_virtio_add_device(struct mic_vdev *mvdev, smp_wmb(); dd->type = type; - dev_dbg(mdev->sdev->parent, "Added virtio device id %d\n", dd->type); + dev_dbg(&mdev->pdev->dev, "Added virtio device id %d\n", dd->type); db = bootparam->h2c_config_db; if (db != -1) @@ -755,7 +754,7 @@ void mic_virtio_del_device(struct mic_vdev *mvdev) db = bootparam->h2c_config_db; if (db == -1) goto skip_hot_remove; - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "Requesting hot remove id %d\n", mvdev->virtio_id); mvdev->dc->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE; mdev->ops->send_intr(mdev, db); @@ -765,7 +764,7 @@ void mic_virtio_del_device(struct mic_vdev *mvdev) if (ret) break; } - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "Device id %d config_change %d guest_ack %d retry %d\n", mvdev->virtio_id, mvdev->dc->config_change, mvdev->dc->guest_ack, retry); @@ -794,7 +793,7 @@ skip_hot_remove: tmp_mvdev = list_entry(pos, struct mic_vdev, list); if (tmp_mvdev == mvdev) { list_del(pos); - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "Removing virtio device id %d\n", mvdev->virtio_id); break; diff --git a/kernel/drivers/misc/mic/host/mic_virtio.h b/kernel/drivers/misc/mic/host/mic_virtio.h index d574efb85..a80631f27 100644 --- a/kernel/drivers/misc/mic/host/mic_virtio.h +++ b/kernel/drivers/misc/mic/host/mic_virtio.h @@ -124,7 +124,7 @@ void mic_bh_handler(struct work_struct *work); /* Helper API to obtain the MIC PCIe device */ static inline struct device *mic_dev(struct mic_vdev *mvdev) { - return mvdev->mdev->sdev->parent; + return &mvdev->mdev->pdev->dev; } /* Helper API to check if a virtio device is initialized */ diff --git a/kernel/drivers/misc/mic/host/mic_x100.c b/kernel/drivers/misc/mic/host/mic_x100.c index b7a21e11d..8118ac48c 100644 --- a/kernel/drivers/misc/mic/host/mic_x100.c +++ b/kernel/drivers/misc/mic/host/mic_x100.c @@ -43,7 +43,7 @@ static void mic_x100_write_spad(struct mic_device *mdev, unsigned int idx, u32 val) { - dev_dbg(mdev->sdev->parent, "Writing 0x%x to scratch pad index %d\n", + dev_dbg(&mdev->pdev->dev, "Writing 0x%x to scratch pad index %d\n", val, idx); mic_mmio_write(&mdev->mmio, val, MIC_X100_SBOX_BASE_ADDRESS + @@ -66,7 +66,7 @@ mic_x100_read_spad(struct mic_device *mdev, unsigned int idx) MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SPAD0 + idx * 4); - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "Reading 0x%x from scratch pad index %d\n", val, idx); return val; } @@ -126,7 +126,7 @@ static void mic_x100_disable_interrupts(struct mic_device *mdev) * @mdev: pointer to mic_device instance */ static void mic_x100_send_sbox_intr(struct mic_device *mdev, - int doorbell) + int doorbell) { struct mic_mw *mw = &mdev->mmio; u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8; @@ -147,7 +147,7 @@ static void mic_x100_send_sbox_intr(struct mic_device *mdev, * @mdev: pointer to mic_device instance */ static void mic_x100_send_rdmasr_intr(struct mic_device *mdev, - int doorbell) + int doorbell) { int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2); /* Ensure that the interrupt is ordered w.r.t. previous stores. */ @@ -167,8 +167,7 @@ static void mic_x100_send_intr(struct mic_device *mdev, int doorbell) if (doorbell < MIC_X100_NUM_SBOX_IRQ) { mic_x100_send_sbox_intr(mdev, doorbell); } else { - rdmasr_db = doorbell - MIC_X100_NUM_SBOX_IRQ + - MIC_X100_RDMASR_IRQ_BASE; + rdmasr_db = doorbell - MIC_X100_NUM_SBOX_IRQ; mic_x100_send_rdmasr_intr(mdev, rdmasr_db); } } @@ -360,15 +359,14 @@ mic_x100_load_command_line(struct mic_device *mdev, const struct firmware *fw) boot_mem = mdev->aper.len >> 20; buf = kzalloc(CMDLINE_SIZE, GFP_KERNEL); - if (!buf) { - dev_err(mdev->sdev->parent, - "%s %d allocation failed\n", __func__, __LINE__); + if (!buf) return -ENOMEM; - } + len += snprintf(buf, CMDLINE_SIZE - len, " mem=%dM", boot_mem); - if (mdev->cmdline) - snprintf(buf + len, CMDLINE_SIZE - len, " %s", mdev->cmdline); + if (mdev->cosm_dev->cmdline) + snprintf(buf + len, CMDLINE_SIZE - len, " %s", + mdev->cosm_dev->cmdline); memcpy_toio(cmd_line_va, buf, strlen(buf) + 1); kfree(buf); return 0; @@ -387,12 +385,11 @@ mic_x100_load_ramdisk(struct mic_device *mdev) int rc; struct boot_params __iomem *bp = mdev->aper.va + mdev->bootaddr; - rc = request_firmware(&fw, - mdev->ramdisk, mdev->sdev->parent); + rc = request_firmware(&fw, mdev->cosm_dev->ramdisk, &mdev->pdev->dev); if (rc < 0) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "ramdisk request_firmware failed: %d %s\n", - rc, mdev->ramdisk); + rc, mdev->cosm_dev->ramdisk); goto error; } /* @@ -424,10 +421,10 @@ mic_x100_get_boot_addr(struct mic_device *mdev) scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO); boot_addr = MIC_X100_SPAD2_DOWNLOAD_ADDR(scratch2); - dev_dbg(mdev->sdev->parent, "%s %d boot_addr 0x%x\n", + dev_dbg(&mdev->pdev->dev, "%s %d boot_addr 0x%x\n", __func__, __LINE__, boot_addr); if (boot_addr > (1 << 31)) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "incorrect bootaddr 0x%x\n", boot_addr); rc = -EINVAL; @@ -455,37 +452,37 @@ mic_x100_load_firmware(struct mic_device *mdev, const char *buf) if (rc) goto error; /* load OS */ - rc = request_firmware(&fw, mdev->firmware, mdev->sdev->parent); + rc = request_firmware(&fw, mdev->cosm_dev->firmware, &mdev->pdev->dev); if (rc < 0) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "ramdisk request_firmware failed: %d %s\n", - rc, mdev->firmware); + rc, mdev->cosm_dev->firmware); goto error; } if (mdev->bootaddr > mdev->aper.len - fw->size) { rc = -EINVAL; - dev_err(mdev->sdev->parent, "%s %d rc %d bootaddr 0x%x\n", + dev_err(&mdev->pdev->dev, "%s %d rc %d bootaddr 0x%x\n", __func__, __LINE__, rc, mdev->bootaddr); release_firmware(fw); goto error; } memcpy_toio(mdev->aper.va + mdev->bootaddr, fw->data, fw->size); mdev->ops->write_spad(mdev, MIC_X100_FW_SIZE, fw->size); - if (!strcmp(mdev->bootmode, "elf")) + if (!strcmp(mdev->cosm_dev->bootmode, "flash")) goto done; /* load command line */ rc = mic_x100_load_command_line(mdev, fw); if (rc) { - dev_err(mdev->sdev->parent, "%s %d rc %d\n", + dev_err(&mdev->pdev->dev, "%s %d rc %d\n", __func__, __LINE__, rc); goto error; } release_firmware(fw); /* load ramdisk */ - if (mdev->ramdisk) + if (mdev->cosm_dev->ramdisk) rc = mic_x100_load_ramdisk(mdev); error: - dev_dbg(mdev->sdev->parent, "%s %d rc %d\n", __func__, __LINE__, rc); + dev_dbg(&mdev->pdev->dev, "%s %d rc %d\n", __func__, __LINE__, rc); done: return rc; } diff --git a/kernel/drivers/misc/mic/scif/Makefile b/kernel/drivers/misc/mic/scif/Makefile new file mode 100644 index 000000000..29cfc3e51 --- /dev/null +++ b/kernel/drivers/misc/mic/scif/Makefile @@ -0,0 +1,20 @@ +# +# Makefile - SCIF driver. +# Copyright(c) 2014, Intel Corporation. +# +obj-$(CONFIG_SCIF) += scif.o +scif-objs := scif_main.o +scif-objs += scif_peer_bus.o +scif-objs += scif_ports.o +scif-objs += scif_debugfs.o +scif-objs += scif_fd.o +scif-objs += scif_api.o +scif-objs += scif_epd.o +scif-objs += scif_rb.o +scif-objs += scif_nodeqp.o +scif-objs += scif_nm.o +scif-objs += scif_dma.o +scif-objs += scif_fence.o +scif-objs += scif_mmap.o +scif-objs += scif_rma.o +scif-objs += scif_rma_list.o diff --git a/kernel/drivers/misc/mic/scif/scif_api.c b/kernel/drivers/misc/mic/scif/scif_api.c new file mode 100644 index 000000000..ddc9e4b08 --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_api.c @@ -0,0 +1,1496 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include <linux/scif.h> +#include "scif_main.h" +#include "scif_map.h" + +static const char * const scif_ep_states[] = { + "Unbound", + "Bound", + "Listening", + "Connected", + "Connecting", + "Mapping", + "Closing", + "Close Listening", + "Disconnected", + "Zombie"}; + +enum conn_async_state { + ASYNC_CONN_IDLE = 1, /* ep setup for async connect */ + ASYNC_CONN_INPROGRESS, /* async connect in progress */ + ASYNC_CONN_FLUSH_WORK /* async work flush in progress */ +}; + +/* + * File operations for anonymous inode file associated with a SCIF endpoint, + * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the + * poll API in the kernel and these take in a struct file *. Since a struct + * file is not available to kernel mode SCIF, it uses an anonymous file for + * this purpose. + */ +const struct file_operations scif_anon_fops = { + .owner = THIS_MODULE, +}; + +scif_epd_t scif_open(void) +{ + struct scif_endpt *ep; + int err; + + might_sleep(); + ep = kzalloc(sizeof(*ep), GFP_KERNEL); + if (!ep) + goto err_ep_alloc; + + ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL); + if (!ep->qp_info.qp) + goto err_qp_alloc; + + err = scif_anon_inode_getfile(ep); + if (err) + goto err_anon_inode; + + spin_lock_init(&ep->lock); + mutex_init(&ep->sendlock); + mutex_init(&ep->recvlock); + + scif_rma_ep_init(ep); + ep->state = SCIFEP_UNBOUND; + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI open: ep %p success\n", ep); + return ep; + +err_anon_inode: + kfree(ep->qp_info.qp); +err_qp_alloc: + kfree(ep); +err_ep_alloc: + return NULL; +} +EXPORT_SYMBOL_GPL(scif_open); + +/* + * scif_disconnect_ep - Disconnects the endpoint if found + * @epd: The end point returned from scif_open() + */ +static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep) +{ + struct scifmsg msg; + struct scif_endpt *fep = NULL; + struct scif_endpt *tmpep; + struct list_head *pos, *tmpq; + int err; + + /* + * Wake up any threads blocked in send()/recv() before closing + * out the connection. Grabbing and releasing the send/recv lock + * will ensure that any blocked senders/receivers have exited for + * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after + * close. Ring 3 endpoints are not affected since close will not + * be called while there are IOCTLs executing. + */ + wake_up_interruptible(&ep->sendwq); + wake_up_interruptible(&ep->recvwq); + mutex_lock(&ep->sendlock); + mutex_unlock(&ep->sendlock); + mutex_lock(&ep->recvlock); + mutex_unlock(&ep->recvlock); + + /* Remove from the connected list */ + mutex_lock(&scif_info.connlock); + list_for_each_safe(pos, tmpq, &scif_info.connected) { + tmpep = list_entry(pos, struct scif_endpt, list); + if (tmpep == ep) { + list_del(pos); + fep = tmpep; + spin_lock(&ep->lock); + break; + } + } + + if (!fep) { + /* + * The other side has completed the disconnect before + * the end point can be removed from the list. Therefore + * the ep lock is not locked, traverse the disconnected + * list to find the endpoint and release the conn lock. + */ + list_for_each_safe(pos, tmpq, &scif_info.disconnected) { + tmpep = list_entry(pos, struct scif_endpt, list); + if (tmpep == ep) { + list_del(pos); + break; + } + } + mutex_unlock(&scif_info.connlock); + return NULL; + } + + init_completion(&ep->discon); + msg.uop = SCIF_DISCNCT; + msg.src = ep->port; + msg.dst = ep->peer; + msg.payload[0] = (u64)ep; + msg.payload[1] = ep->remote_ep; + + err = scif_nodeqp_send(ep->remote_dev, &msg); + spin_unlock(&ep->lock); + mutex_unlock(&scif_info.connlock); + + if (!err) + /* Wait for the remote node to respond with SCIF_DISCNT_ACK */ + wait_for_completion_timeout(&ep->discon, + SCIF_NODE_ALIVE_TIMEOUT); + return ep; +} + +int scif_close(scif_epd_t epd) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scif_endpt *tmpep; + struct list_head *pos, *tmpq; + enum scif_epd_state oldstate; + bool flush_conn; + + dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n", + ep, scif_ep_states[ep->state]); + might_sleep(); + spin_lock(&ep->lock); + flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS); + spin_unlock(&ep->lock); + + if (flush_conn) + flush_work(&scif_info.conn_work); + + spin_lock(&ep->lock); + oldstate = ep->state; + + ep->state = SCIFEP_CLOSING; + + switch (oldstate) { + case SCIFEP_ZOMBIE: + dev_err(scif_info.mdev.this_device, + "SCIFAPI close: zombie state unexpected\n"); + case SCIFEP_DISCONNECTED: + spin_unlock(&ep->lock); + scif_unregister_all_windows(epd); + /* Remove from the disconnected list */ + mutex_lock(&scif_info.connlock); + list_for_each_safe(pos, tmpq, &scif_info.disconnected) { + tmpep = list_entry(pos, struct scif_endpt, list); + if (tmpep == ep) { + list_del(pos); + break; + } + } + mutex_unlock(&scif_info.connlock); + break; + case SCIFEP_UNBOUND: + case SCIFEP_BOUND: + case SCIFEP_CONNECTING: + spin_unlock(&ep->lock); + break; + case SCIFEP_MAPPING: + case SCIFEP_CONNECTED: + case SCIFEP_CLOSING: + { + spin_unlock(&ep->lock); + scif_unregister_all_windows(epd); + scif_disconnect_ep(ep); + break; + } + case SCIFEP_LISTENING: + case SCIFEP_CLLISTEN: + { + struct scif_conreq *conreq; + struct scifmsg msg; + struct scif_endpt *aep; + + spin_unlock(&ep->lock); + mutex_lock(&scif_info.eplock); + + /* remove from listen list */ + list_for_each_safe(pos, tmpq, &scif_info.listen) { + tmpep = list_entry(pos, struct scif_endpt, list); + if (tmpep == ep) + list_del(pos); + } + /* Remove any dangling accepts */ + while (ep->acceptcnt) { + aep = list_first_entry(&ep->li_accept, + struct scif_endpt, liacceptlist); + list_del(&aep->liacceptlist); + scif_put_port(aep->port.port); + list_for_each_safe(pos, tmpq, &scif_info.uaccept) { + tmpep = list_entry(pos, struct scif_endpt, + miacceptlist); + if (tmpep == aep) { + list_del(pos); + break; + } + } + mutex_unlock(&scif_info.eplock); + mutex_lock(&scif_info.connlock); + list_for_each_safe(pos, tmpq, &scif_info.connected) { + tmpep = list_entry(pos, + struct scif_endpt, list); + if (tmpep == aep) { + list_del(pos); + break; + } + } + list_for_each_safe(pos, tmpq, &scif_info.disconnected) { + tmpep = list_entry(pos, + struct scif_endpt, list); + if (tmpep == aep) { + list_del(pos); + break; + } + } + mutex_unlock(&scif_info.connlock); + scif_teardown_ep(aep); + mutex_lock(&scif_info.eplock); + scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD); + ep->acceptcnt--; + } + + spin_lock(&ep->lock); + mutex_unlock(&scif_info.eplock); + + /* Remove and reject any pending connection requests. */ + while (ep->conreqcnt) { + conreq = list_first_entry(&ep->conlist, + struct scif_conreq, list); + list_del(&conreq->list); + + msg.uop = SCIF_CNCT_REJ; + msg.dst.node = conreq->msg.src.node; + msg.dst.port = conreq->msg.src.port; + msg.payload[0] = conreq->msg.payload[0]; + msg.payload[1] = conreq->msg.payload[1]; + /* + * No Error Handling on purpose for scif_nodeqp_send(). + * If the remote node is lost we still want free the + * connection requests on the self node. + */ + scif_nodeqp_send(&scif_dev[conreq->msg.src.node], + &msg); + ep->conreqcnt--; + kfree(conreq); + } + + spin_unlock(&ep->lock); + /* If a kSCIF accept is waiting wake it up */ + wake_up_interruptible(&ep->conwq); + break; + } + } + scif_put_port(ep->port.port); + scif_anon_inode_fput(ep); + scif_teardown_ep(ep); + scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD); + return 0; +} +EXPORT_SYMBOL_GPL(scif_close); + +/** + * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer + * accept new connections. + * @epd: The end point returned from scif_open() + */ +int __scif_flush(scif_epd_t epd) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + + switch (ep->state) { + case SCIFEP_LISTENING: + { + ep->state = SCIFEP_CLLISTEN; + + /* If an accept is waiting wake it up */ + wake_up_interruptible(&ep->conwq); + break; + } + default: + break; + } + return 0; +} + +int scif_bind(scif_epd_t epd, u16 pn) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int ret = 0; + int tmp; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI bind: ep %p %s requested port number %d\n", + ep, scif_ep_states[ep->state], pn); + if (pn) { + /* + * Similar to IETF RFC 1700, SCIF ports below + * SCIF_ADMIN_PORT_END can only be bound by system (or root) + * processes or by processes executed by privileged users. + */ + if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) { + ret = -EACCES; + goto scif_bind_admin_exit; + } + } + + spin_lock(&ep->lock); + if (ep->state == SCIFEP_BOUND) { + ret = -EINVAL; + goto scif_bind_exit; + } else if (ep->state != SCIFEP_UNBOUND) { + ret = -EISCONN; + goto scif_bind_exit; + } + + if (pn) { + tmp = scif_rsrv_port(pn); + if (tmp != pn) { + ret = -EINVAL; + goto scif_bind_exit; + } + } else { + pn = scif_get_new_port(); + if (!pn) { + ret = -ENOSPC; + goto scif_bind_exit; + } + } + + ep->state = SCIFEP_BOUND; + ep->port.node = scif_info.nodeid; + ep->port.port = pn; + ep->conn_async_state = ASYNC_CONN_IDLE; + ret = pn; + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI bind: bound to port number %d\n", pn); +scif_bind_exit: + spin_unlock(&ep->lock); +scif_bind_admin_exit: + return ret; +} +EXPORT_SYMBOL_GPL(scif_bind); + +int scif_listen(scif_epd_t epd, int backlog) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]); + spin_lock(&ep->lock); + switch (ep->state) { + case SCIFEP_ZOMBIE: + case SCIFEP_CLOSING: + case SCIFEP_CLLISTEN: + case SCIFEP_UNBOUND: + case SCIFEP_DISCONNECTED: + spin_unlock(&ep->lock); + return -EINVAL; + case SCIFEP_LISTENING: + case SCIFEP_CONNECTED: + case SCIFEP_CONNECTING: + case SCIFEP_MAPPING: + spin_unlock(&ep->lock); + return -EISCONN; + case SCIFEP_BOUND: + break; + } + + ep->state = SCIFEP_LISTENING; + ep->backlog = backlog; + + ep->conreqcnt = 0; + ep->acceptcnt = 0; + INIT_LIST_HEAD(&ep->conlist); + init_waitqueue_head(&ep->conwq); + INIT_LIST_HEAD(&ep->li_accept); + spin_unlock(&ep->lock); + + /* + * Listen status is complete so delete the qp information not needed + * on a listen before placing on the list of listening ep's + */ + scif_teardown_ep(ep); + ep->qp_info.qp = NULL; + + mutex_lock(&scif_info.eplock); + list_add_tail(&ep->list, &scif_info.listen); + mutex_unlock(&scif_info.eplock); + return 0; +} +EXPORT_SYMBOL_GPL(scif_listen); + +/* + ************************************************************************ + * SCIF connection flow: + * + * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF + * connections via a SCIF_CNCT_REQ message + * 2) A SCIF endpoint can initiate a SCIF connection by calling + * scif_connect(..) which calls scif_setup_qp_connect(..) which + * allocates the local qp for the endpoint ring buffer and then sends + * a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or + * a SCIF_CNCT_REJ message + * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which + * wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ + * message otherwise + * 4) A thread blocked waiting for incoming connections allocates its local + * endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT + * and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then + * the node sends a SCIF_CNCT_REJ message + * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the + * connecting endpoint is woken up as part of handling + * scif_cnctgnt_resp(..) following which it maps the remote endpoints' + * QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on + * success or a SCIF_CNCT_GNTNACK message on failure and completes + * the scif_connect(..) API + * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked + * in step 4 is woken up and completes the scif_accept(..) API + * 7) The SCIF connection is now established between the two SCIF endpoints. + */ +static int scif_conn_func(struct scif_endpt *ep) +{ + int err = 0; + struct scifmsg msg; + struct device *spdev; + + err = scif_reserve_dma_chan(ep); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + ep->state = SCIFEP_BOUND; + goto connect_error_simple; + } + /* Initiate the first part of the endpoint QP setup */ + err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset, + SCIF_ENDPT_QP_SIZE, ep->remote_dev); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s err %d qp_offset 0x%llx\n", + __func__, err, ep->qp_info.qp_offset); + ep->state = SCIFEP_BOUND; + goto connect_error_simple; + } + + spdev = scif_get_peer_dev(ep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + goto cleanup_qp; + } + /* Format connect message and send it */ + msg.src = ep->port; + msg.dst = ep->conn_port; + msg.uop = SCIF_CNCT_REQ; + msg.payload[0] = (u64)ep; + msg.payload[1] = ep->qp_info.qp_offset; + err = _scif_nodeqp_send(ep->remote_dev, &msg); + if (err) + goto connect_error_dec; + scif_put_peer_dev(spdev); + /* + * Wait for the remote node to respond with SCIF_CNCT_GNT or + * SCIF_CNCT_REJ message. + */ + err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d timeout\n", __func__, __LINE__); + ep->state = SCIFEP_BOUND; + } + spdev = scif_get_peer_dev(ep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + goto cleanup_qp; + } + if (ep->state == SCIFEP_MAPPING) { + err = scif_setup_qp_connect_response(ep->remote_dev, + ep->qp_info.qp, + ep->qp_info.gnt_pld); + /* + * If the resource to map the queue are not available then + * we need to tell the other side to terminate the accept + */ + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + msg.uop = SCIF_CNCT_GNTNACK; + msg.payload[0] = ep->remote_ep; + _scif_nodeqp_send(ep->remote_dev, &msg); + ep->state = SCIFEP_BOUND; + goto connect_error_dec; + } + + msg.uop = SCIF_CNCT_GNTACK; + msg.payload[0] = ep->remote_ep; + err = _scif_nodeqp_send(ep->remote_dev, &msg); + if (err) { + ep->state = SCIFEP_BOUND; + goto connect_error_dec; + } + ep->state = SCIFEP_CONNECTED; + mutex_lock(&scif_info.connlock); + list_add_tail(&ep->list, &scif_info.connected); + mutex_unlock(&scif_info.connlock); + dev_dbg(&ep->remote_dev->sdev->dev, + "SCIFAPI connect: ep %p connected\n", ep); + } else if (ep->state == SCIFEP_BOUND) { + dev_dbg(&ep->remote_dev->sdev->dev, + "SCIFAPI connect: ep %p connection refused\n", ep); + err = -ECONNREFUSED; + goto connect_error_dec; + } + scif_put_peer_dev(spdev); + return err; +connect_error_dec: + scif_put_peer_dev(spdev); +cleanup_qp: + scif_cleanup_ep_qp(ep); +connect_error_simple: + return err; +} + +/* + * scif_conn_handler: + * + * Workqueue handler for servicing non-blocking SCIF connect + * + */ +void scif_conn_handler(struct work_struct *work) +{ + struct scif_endpt *ep; + + do { + ep = NULL; + spin_lock(&scif_info.nb_connect_lock); + if (!list_empty(&scif_info.nb_connect_list)) { + ep = list_first_entry(&scif_info.nb_connect_list, + struct scif_endpt, conn_list); + list_del(&ep->conn_list); + } + spin_unlock(&scif_info.nb_connect_lock); + if (ep) { + ep->conn_err = scif_conn_func(ep); + wake_up_interruptible(&ep->conn_pend_wq); + } + } while (ep); +} + +int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err = 0; + struct scif_dev *remote_dev; + struct device *spdev; + + dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep, + scif_ep_states[ep->state]); + + if (!scif_dev || dst->node > scif_info.maxid) + return -ENODEV; + + might_sleep(); + + remote_dev = &scif_dev[dst->node]; + spdev = scif_get_peer_dev(remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + return err; + } + + spin_lock(&ep->lock); + switch (ep->state) { + case SCIFEP_ZOMBIE: + case SCIFEP_CLOSING: + err = -EINVAL; + break; + case SCIFEP_DISCONNECTED: + if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) + ep->conn_async_state = ASYNC_CONN_FLUSH_WORK; + else + err = -EINVAL; + break; + case SCIFEP_LISTENING: + case SCIFEP_CLLISTEN: + err = -EOPNOTSUPP; + break; + case SCIFEP_CONNECTING: + case SCIFEP_MAPPING: + if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) + err = -EINPROGRESS; + else + err = -EISCONN; + break; + case SCIFEP_CONNECTED: + if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) + ep->conn_async_state = ASYNC_CONN_FLUSH_WORK; + else + err = -EISCONN; + break; + case SCIFEP_UNBOUND: + ep->port.port = scif_get_new_port(); + if (!ep->port.port) { + err = -ENOSPC; + } else { + ep->port.node = scif_info.nodeid; + ep->conn_async_state = ASYNC_CONN_IDLE; + } + /* Fall through */ + case SCIFEP_BOUND: + /* + * If a non-blocking connect has been already initiated + * (conn_async_state is either ASYNC_CONN_INPROGRESS or + * ASYNC_CONN_FLUSH_WORK), the end point could end up in + * SCIF_BOUND due an error in the connection process + * (e.g., connection refused) If conn_async_state is + * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK + * so that the error status can be collected. If the state is + * already ASYNC_CONN_FLUSH_WORK - then set the error to + * EINPROGRESS since some other thread is waiting to collect + * error status. + */ + if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) { + ep->conn_async_state = ASYNC_CONN_FLUSH_WORK; + } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) { + err = -EINPROGRESS; + } else { + ep->conn_port = *dst; + init_waitqueue_head(&ep->sendwq); + init_waitqueue_head(&ep->recvwq); + init_waitqueue_head(&ep->conwq); + ep->conn_async_state = 0; + + if (unlikely(non_block)) + ep->conn_async_state = ASYNC_CONN_INPROGRESS; + } + break; + } + + if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) + goto connect_simple_unlock1; + + ep->state = SCIFEP_CONNECTING; + ep->remote_dev = &scif_dev[dst->node]; + ep->qp_info.qp->magic = SCIFEP_MAGIC; + if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) { + init_waitqueue_head(&ep->conn_pend_wq); + spin_lock(&scif_info.nb_connect_lock); + list_add_tail(&ep->conn_list, &scif_info.nb_connect_list); + spin_unlock(&scif_info.nb_connect_lock); + err = -EINPROGRESS; + schedule_work(&scif_info.conn_work); + } +connect_simple_unlock1: + spin_unlock(&ep->lock); + scif_put_peer_dev(spdev); + if (err) { + return err; + } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) { + flush_work(&scif_info.conn_work); + err = ep->conn_err; + spin_lock(&ep->lock); + ep->conn_async_state = ASYNC_CONN_IDLE; + spin_unlock(&ep->lock); + } else { + err = scif_conn_func(ep); + } + return err; +} + +int scif_connect(scif_epd_t epd, struct scif_port_id *dst) +{ + return __scif_connect(epd, dst, false); +} +EXPORT_SYMBOL_GPL(scif_connect); + +/** + * scif_accept() - Accept a connection request from the remote node + * + * The function accepts a connection request from the remote node. Successful + * complete is indicate by a new end point being created and passed back + * to the caller for future reference. + * + * Upon successful complete a zero will be returned and the peer information + * will be filled in. + * + * If the end point is not in the listening state -EINVAL will be returned. + * + * If during the connection sequence resource allocation fails the -ENOMEM + * will be returned. + * + * If the function is called with the ASYNC flag set and no connection requests + * are pending it will return -EAGAIN. + * + * If the remote side is not sending any connection requests the caller may + * terminate this function with a signal. If so a -EINTR will be returned. + */ +int scif_accept(scif_epd_t epd, struct scif_port_id *peer, + scif_epd_t *newepd, int flags) +{ + struct scif_endpt *lep = (struct scif_endpt *)epd; + struct scif_endpt *cep; + struct scif_conreq *conreq; + struct scifmsg msg; + int err; + struct device *spdev; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]); + + if (flags & ~SCIF_ACCEPT_SYNC) + return -EINVAL; + + if (!peer || !newepd) + return -EINVAL; + + might_sleep(); + spin_lock(&lep->lock); + if (lep->state != SCIFEP_LISTENING) { + spin_unlock(&lep->lock); + return -EINVAL; + } + + if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) { + /* No connection request present and we do not want to wait */ + spin_unlock(&lep->lock); + return -EAGAIN; + } + + lep->files = current->files; +retry_connection: + spin_unlock(&lep->lock); + /* Wait for the remote node to send us a SCIF_CNCT_REQ */ + err = wait_event_interruptible(lep->conwq, + (lep->conreqcnt || + (lep->state != SCIFEP_LISTENING))); + if (err) + return err; + + if (lep->state != SCIFEP_LISTENING) + return -EINTR; + + spin_lock(&lep->lock); + + if (!lep->conreqcnt) + goto retry_connection; + + /* Get the first connect request off the list */ + conreq = list_first_entry(&lep->conlist, struct scif_conreq, list); + list_del(&conreq->list); + lep->conreqcnt--; + spin_unlock(&lep->lock); + + /* Fill in the peer information */ + peer->node = conreq->msg.src.node; + peer->port = conreq->msg.src.port; + + cep = kzalloc(sizeof(*cep), GFP_KERNEL); + if (!cep) { + err = -ENOMEM; + goto scif_accept_error_epalloc; + } + spin_lock_init(&cep->lock); + mutex_init(&cep->sendlock); + mutex_init(&cep->recvlock); + cep->state = SCIFEP_CONNECTING; + cep->remote_dev = &scif_dev[peer->node]; + cep->remote_ep = conreq->msg.payload[0]; + + scif_rma_ep_init(cep); + + err = scif_reserve_dma_chan(cep); + if (err) { + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + goto scif_accept_error_qpalloc; + } + + cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL); + if (!cep->qp_info.qp) { + err = -ENOMEM; + goto scif_accept_error_qpalloc; + } + + err = scif_anon_inode_getfile(cep); + if (err) + goto scif_accept_error_anon_inode; + + cep->qp_info.qp->magic = SCIFEP_MAGIC; + spdev = scif_get_peer_dev(cep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + goto scif_accept_error_map; + } + err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset, + conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE, + cep->remote_dev); + if (err) { + dev_dbg(&cep->remote_dev->sdev->dev, + "SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n", + lep, cep, err, cep->qp_info.qp_offset); + scif_put_peer_dev(spdev); + goto scif_accept_error_map; + } + + cep->port.node = lep->port.node; + cep->port.port = lep->port.port; + cep->peer.node = peer->node; + cep->peer.port = peer->port; + init_waitqueue_head(&cep->sendwq); + init_waitqueue_head(&cep->recvwq); + init_waitqueue_head(&cep->conwq); + + msg.uop = SCIF_CNCT_GNT; + msg.src = cep->port; + msg.payload[0] = cep->remote_ep; + msg.payload[1] = cep->qp_info.qp_offset; + msg.payload[2] = (u64)cep; + + err = _scif_nodeqp_send(cep->remote_dev, &msg); + scif_put_peer_dev(spdev); + if (err) + goto scif_accept_error_map; +retry: + /* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */ + err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING, + SCIF_NODE_ACCEPT_TIMEOUT); + if (!err && scifdev_alive(cep)) + goto retry; + err = !err ? -ENODEV : 0; + if (err) + goto scif_accept_error_map; + kfree(conreq); + + spin_lock(&cep->lock); + + if (cep->state == SCIFEP_CLOSING) { + /* + * Remote failed to allocate resources and NAKed the grant. + * There is at this point nothing referencing the new end point. + */ + spin_unlock(&cep->lock); + scif_teardown_ep(cep); + kfree(cep); + + /* If call with sync flag then go back and wait. */ + if (flags & SCIF_ACCEPT_SYNC) { + spin_lock(&lep->lock); + goto retry_connection; + } + return -EAGAIN; + } + + scif_get_port(cep->port.port); + *newepd = (scif_epd_t)cep; + spin_unlock(&cep->lock); + return 0; +scif_accept_error_map: + scif_anon_inode_fput(cep); +scif_accept_error_anon_inode: + scif_teardown_ep(cep); +scif_accept_error_qpalloc: + kfree(cep); +scif_accept_error_epalloc: + msg.uop = SCIF_CNCT_REJ; + msg.dst.node = conreq->msg.src.node; + msg.dst.port = conreq->msg.src.port; + msg.payload[0] = conreq->msg.payload[0]; + msg.payload[1] = conreq->msg.payload[1]; + scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg); + kfree(conreq); + return err; +} +EXPORT_SYMBOL_GPL(scif_accept); + +/* + * scif_msg_param_check: + * @epd: The end point returned from scif_open() + * @len: Length to receive + * @flags: blocking or non blocking + * + * Validate parameters for messaging APIs scif_send(..)/scif_recv(..). + */ +static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags) +{ + int ret = -EINVAL; + + if (len < 0) + goto err_ret; + if (flags && (!(flags & SCIF_RECV_BLOCK))) + goto err_ret; + ret = 0; +err_ret: + return ret; +} + +static int _scif_send(scif_epd_t epd, void *msg, int len, int flags) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scifmsg notif_msg; + int curr_xfer_len = 0, sent_len = 0, write_count; + int ret = 0; + struct scif_qp *qp = ep->qp_info.qp; + + if (flags & SCIF_SEND_BLOCK) + might_sleep(); + + spin_lock(&ep->lock); + while (sent_len != len && SCIFEP_CONNECTED == ep->state) { + write_count = scif_rb_space(&qp->outbound_q); + if (write_count) { + /* Best effort to send as much data as possible */ + curr_xfer_len = min(len - sent_len, write_count); + ret = scif_rb_write(&qp->outbound_q, msg, + curr_xfer_len); + if (ret < 0) + break; + /* Success. Update write pointer */ + scif_rb_commit(&qp->outbound_q); + /* + * Send a notification to the peer about the + * produced data message. + */ + notif_msg.src = ep->port; + notif_msg.uop = SCIF_CLIENT_SENT; + notif_msg.payload[0] = ep->remote_ep; + ret = _scif_nodeqp_send(ep->remote_dev, ¬if_msg); + if (ret) + break; + sent_len += curr_xfer_len; + msg = msg + curr_xfer_len; + continue; + } + curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1); + /* Not enough RB space. return for the Non Blocking case */ + if (!(flags & SCIF_SEND_BLOCK)) + break; + + spin_unlock(&ep->lock); + /* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */ + ret = + wait_event_interruptible(ep->sendwq, + (SCIFEP_CONNECTED != ep->state) || + (scif_rb_space(&qp->outbound_q) >= + curr_xfer_len)); + spin_lock(&ep->lock); + if (ret) + break; + } + if (sent_len) + ret = sent_len; + else if (!ret && SCIFEP_CONNECTED != ep->state) + ret = SCIFEP_DISCONNECTED == ep->state ? + -ECONNRESET : -ENOTCONN; + spin_unlock(&ep->lock); + return ret; +} + +static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags) +{ + int read_size; + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scifmsg notif_msg; + int curr_recv_len = 0, remaining_len = len, read_count; + int ret = 0; + struct scif_qp *qp = ep->qp_info.qp; + + if (flags & SCIF_RECV_BLOCK) + might_sleep(); + spin_lock(&ep->lock); + while (remaining_len && (SCIFEP_CONNECTED == ep->state || + SCIFEP_DISCONNECTED == ep->state)) { + read_count = scif_rb_count(&qp->inbound_q, remaining_len); + if (read_count) { + /* + * Best effort to recv as much data as there + * are bytes to read in the RB particularly + * important for the Non Blocking case. + */ + curr_recv_len = min(remaining_len, read_count); + read_size = scif_rb_get_next(&qp->inbound_q, + msg, curr_recv_len); + if (ep->state == SCIFEP_CONNECTED) { + /* + * Update the read pointer only if the endpoint + * is still connected else the read pointer + * might no longer exist since the peer has + * freed resources! + */ + scif_rb_update_read_ptr(&qp->inbound_q); + /* + * Send a notification to the peer about the + * consumed data message only if the EP is in + * SCIFEP_CONNECTED state. + */ + notif_msg.src = ep->port; + notif_msg.uop = SCIF_CLIENT_RCVD; + notif_msg.payload[0] = ep->remote_ep; + ret = _scif_nodeqp_send(ep->remote_dev, + ¬if_msg); + if (ret) + break; + } + remaining_len -= curr_recv_len; + msg = msg + curr_recv_len; + continue; + } + /* + * Bail out now if the EP is in SCIFEP_DISCONNECTED state else + * we will keep looping forever. + */ + if (ep->state == SCIFEP_DISCONNECTED) + break; + /* + * Return in the Non Blocking case if there is no data + * to read in this iteration. + */ + if (!(flags & SCIF_RECV_BLOCK)) + break; + curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1); + spin_unlock(&ep->lock); + /* + * Wait for a SCIF_CLIENT_SEND message in the blocking case + * or until other side disconnects. + */ + ret = + wait_event_interruptible(ep->recvwq, + SCIFEP_CONNECTED != ep->state || + scif_rb_count(&qp->inbound_q, + curr_recv_len) + >= curr_recv_len); + spin_lock(&ep->lock); + if (ret) + break; + } + if (len - remaining_len) + ret = len - remaining_len; + else if (!ret && ep->state != SCIFEP_CONNECTED) + ret = ep->state == SCIFEP_DISCONNECTED ? + -ECONNRESET : -ENOTCONN; + spin_unlock(&ep->lock); + return ret; +} + +/** + * scif_user_send() - Send data to connection queue + * @epd: The end point returned from scif_open() + * @msg: Address to place data + * @len: Length to receive + * @flags: blocking or non blocking + * + * This function is called from the driver IOCTL entry point + * only and is a wrapper for _scif_send(). + */ +int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err = 0; + int sent_len = 0; + char *tmp; + int loop_len; + int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1))); + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]); + if (!len) + return 0; + + err = scif_msg_param_check(epd, len, flags); + if (err) + goto send_err; + + tmp = kmalloc(chunk_len, GFP_KERNEL); + if (!tmp) { + err = -ENOMEM; + goto send_err; + } + /* + * Grabbing the lock before breaking up the transfer in + * multiple chunks is required to ensure that messages do + * not get fragmented and reordered. + */ + mutex_lock(&ep->sendlock); + while (sent_len != len) { + loop_len = len - sent_len; + loop_len = min(chunk_len, loop_len); + if (copy_from_user(tmp, msg, loop_len)) { + err = -EFAULT; + goto send_free_err; + } + err = _scif_send(epd, tmp, loop_len, flags); + if (err < 0) + goto send_free_err; + sent_len += err; + msg += err; + if (err != loop_len) + goto send_free_err; + } +send_free_err: + mutex_unlock(&ep->sendlock); + kfree(tmp); +send_err: + return err < 0 ? err : sent_len; +} + +/** + * scif_user_recv() - Receive data from connection queue + * @epd: The end point returned from scif_open() + * @msg: Address to place data + * @len: Length to receive + * @flags: blocking or non blocking + * + * This function is called from the driver IOCTL entry point + * only and is a wrapper for _scif_recv(). + */ +int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err = 0; + int recv_len = 0; + char *tmp; + int loop_len; + int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1))); + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]); + if (!len) + return 0; + + err = scif_msg_param_check(epd, len, flags); + if (err) + goto recv_err; + + tmp = kmalloc(chunk_len, GFP_KERNEL); + if (!tmp) { + err = -ENOMEM; + goto recv_err; + } + /* + * Grabbing the lock before breaking up the transfer in + * multiple chunks is required to ensure that messages do + * not get fragmented and reordered. + */ + mutex_lock(&ep->recvlock); + while (recv_len != len) { + loop_len = len - recv_len; + loop_len = min(chunk_len, loop_len); + err = _scif_recv(epd, tmp, loop_len, flags); + if (err < 0) + goto recv_free_err; + if (copy_to_user(msg, tmp, err)) { + err = -EFAULT; + goto recv_free_err; + } + recv_len += err; + msg += err; + if (err != loop_len) + goto recv_free_err; + } +recv_free_err: + mutex_unlock(&ep->recvlock); + kfree(tmp); +recv_err: + return err < 0 ? err : recv_len; +} + +/** + * scif_send() - Send data to connection queue + * @epd: The end point returned from scif_open() + * @msg: Address to place data + * @len: Length to receive + * @flags: blocking or non blocking + * + * This function is called from the kernel mode only and is + * a wrapper for _scif_send(). + */ +int scif_send(scif_epd_t epd, void *msg, int len, int flags) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int ret; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]); + if (!len) + return 0; + + ret = scif_msg_param_check(epd, len, flags); + if (ret) + return ret; + if (!ep->remote_dev) + return -ENOTCONN; + /* + * Grab the mutex lock in the blocking case only + * to ensure messages do not get fragmented/reordered. + * The non blocking mode is protected using spin locks + * in _scif_send(). + */ + if (flags & SCIF_SEND_BLOCK) + mutex_lock(&ep->sendlock); + + ret = _scif_send(epd, msg, len, flags); + + if (flags & SCIF_SEND_BLOCK) + mutex_unlock(&ep->sendlock); + return ret; +} +EXPORT_SYMBOL_GPL(scif_send); + +/** + * scif_recv() - Receive data from connection queue + * @epd: The end point returned from scif_open() + * @msg: Address to place data + * @len: Length to receive + * @flags: blocking or non blocking + * + * This function is called from the kernel mode only and is + * a wrapper for _scif_recv(). + */ +int scif_recv(scif_epd_t epd, void *msg, int len, int flags) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int ret; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]); + if (!len) + return 0; + + ret = scif_msg_param_check(epd, len, flags); + if (ret) + return ret; + /* + * Grab the mutex lock in the blocking case only + * to ensure messages do not get fragmented/reordered. + * The non blocking mode is protected using spin locks + * in _scif_send(). + */ + if (flags & SCIF_RECV_BLOCK) + mutex_lock(&ep->recvlock); + + ret = _scif_recv(epd, msg, len, flags); + + if (flags & SCIF_RECV_BLOCK) + mutex_unlock(&ep->recvlock); + + return ret; +} +EXPORT_SYMBOL_GPL(scif_recv); + +static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq, + poll_table *p, struct scif_endpt *ep) +{ + /* + * Because poll_wait makes a GFP_KERNEL allocation, give up the lock + * and regrab it afterwards. Because the endpoint state might have + * changed while the lock was given up, the state must be checked + * again after re-acquiring the lock. The code in __scif_pollfd(..) + * does this. + */ + spin_unlock(&ep->lock); + poll_wait(f, wq, p); + spin_lock(&ep->lock); +} + +unsigned int +__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep) +{ + unsigned int mask = 0; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]); + + spin_lock(&ep->lock); + + /* Endpoint is waiting for a non-blocking connect to complete */ + if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) { + _scif_poll_wait(f, &ep->conn_pend_wq, wait, ep); + if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) { + if (ep->state == SCIFEP_CONNECTED || + ep->state == SCIFEP_DISCONNECTED || + ep->conn_err) + mask |= POLLOUT; + goto exit; + } + } + + /* Endpoint is listening for incoming connection requests */ + if (ep->state == SCIFEP_LISTENING) { + _scif_poll_wait(f, &ep->conwq, wait, ep); + if (ep->state == SCIFEP_LISTENING) { + if (ep->conreqcnt) + mask |= POLLIN; + goto exit; + } + } + + /* Endpoint is connected or disconnected */ + if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) { + if (poll_requested_events(wait) & POLLIN) + _scif_poll_wait(f, &ep->recvwq, wait, ep); + if (poll_requested_events(wait) & POLLOUT) + _scif_poll_wait(f, &ep->sendwq, wait, ep); + if (ep->state == SCIFEP_CONNECTED || + ep->state == SCIFEP_DISCONNECTED) { + /* Data can be read without blocking */ + if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1)) + mask |= POLLIN; + /* Data can be written without blocking */ + if (scif_rb_space(&ep->qp_info.qp->outbound_q)) + mask |= POLLOUT; + /* Return POLLHUP if endpoint is disconnected */ + if (ep->state == SCIFEP_DISCONNECTED) + mask |= POLLHUP; + goto exit; + } + } + + /* Return POLLERR if the endpoint is in none of the above states */ + mask |= POLLERR; +exit: + spin_unlock(&ep->lock); + return mask; +} + +/** + * scif_poll() - Kernel mode SCIF poll + * @ufds: Array of scif_pollepd structures containing the end points + * and events to poll on + * @nfds: Size of the ufds array + * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout + * + * The code flow in this function is based on do_poll(..) in select.c + * + * Returns the number of endpoints which have pending events or 0 in + * the event of a timeout. If a signal is used for wake up, -EINTR is + * returned. + */ +int +scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs) +{ + struct poll_wqueues table; + poll_table *pt; + int i, mask, count = 0, timed_out = timeout_msecs == 0; + u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT + : msecs_to_jiffies(timeout_msecs); + + poll_initwait(&table); + pt = &table.pt; + while (1) { + for (i = 0; i < nfds; i++) { + pt->_key = ufds[i].events | POLLERR | POLLHUP; + mask = __scif_pollfd(ufds[i].epd->anon, + pt, ufds[i].epd); + mask &= ufds[i].events | POLLERR | POLLHUP; + if (mask) { + count++; + pt->_qproc = NULL; + } + ufds[i].revents = mask; + } + pt->_qproc = NULL; + if (!count) { + count = table.error; + if (signal_pending(current)) + count = -EINTR; + } + if (count || timed_out) + break; + + if (!schedule_timeout_interruptible(timeout)) + timed_out = 1; + } + poll_freewait(&table); + return count; +} +EXPORT_SYMBOL_GPL(scif_poll); + +int scif_get_node_ids(u16 *nodes, int len, u16 *self) +{ + int online = 0; + int offset = 0; + int node; + + if (!scif_is_mgmt_node()) + scif_get_node_info(); + + *self = scif_info.nodeid; + mutex_lock(&scif_info.conflock); + len = min_t(int, len, scif_info.total); + for (node = 0; node <= scif_info.maxid; node++) { + if (_scifdev_alive(&scif_dev[node])) { + online++; + if (offset < len) + nodes[offset++] = node; + } + } + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI get_node_ids total %d online %d filled in %d nodes\n", + scif_info.total, online, offset); + mutex_unlock(&scif_info.conflock); + + return online; +} +EXPORT_SYMBOL_GPL(scif_get_node_ids); + +static int scif_add_client_dev(struct device *dev, struct subsys_interface *si) +{ + struct scif_client *client = + container_of(si, struct scif_client, si); + struct scif_peer_dev *spdev = + container_of(dev, struct scif_peer_dev, dev); + + if (client->probe) + client->probe(spdev); + return 0; +} + +static void scif_remove_client_dev(struct device *dev, + struct subsys_interface *si) +{ + struct scif_client *client = + container_of(si, struct scif_client, si); + struct scif_peer_dev *spdev = + container_of(dev, struct scif_peer_dev, dev); + + if (client->remove) + client->remove(spdev); +} + +void scif_client_unregister(struct scif_client *client) +{ + subsys_interface_unregister(&client->si); +} +EXPORT_SYMBOL_GPL(scif_client_unregister); + +int scif_client_register(struct scif_client *client) +{ + struct subsys_interface *si = &client->si; + + si->name = client->name; + si->subsys = &scif_peer_bus; + si->add_dev = scif_add_client_dev; + si->remove_dev = scif_remove_client_dev; + + return subsys_interface_register(&client->si); +} +EXPORT_SYMBOL_GPL(scif_client_register); diff --git a/kernel/drivers/misc/mic/scif/scif_debugfs.c b/kernel/drivers/misc/mic/scif/scif_debugfs.c new file mode 100644 index 000000000..6884dad97 --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_debugfs.c @@ -0,0 +1,162 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +#include "../common/mic_dev.h" +#include "scif_main.h" + +/* Debugfs parent dir */ +static struct dentry *scif_dbg; + +static int scif_dev_test(struct seq_file *s, void *unused) +{ + int node; + + seq_printf(s, "Total Nodes %d Self Node Id %d Maxid %d\n", + scif_info.total, scif_info.nodeid, + scif_info.maxid); + + if (!scif_dev) + return 0; + + seq_printf(s, "%-16s\t%-16s\n", "node_id", "state"); + + for (node = 0; node <= scif_info.maxid; node++) + seq_printf(s, "%-16d\t%-16s\n", scif_dev[node].node, + _scifdev_alive(&scif_dev[node]) ? + "Running" : "Offline"); + return 0; +} + +static int scif_dev_test_open(struct inode *inode, struct file *file) +{ + return single_open(file, scif_dev_test, inode->i_private); +} + +static int scif_dev_test_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations scif_dev_ops = { + .owner = THIS_MODULE, + .open = scif_dev_test_open, + .read = seq_read, + .llseek = seq_lseek, + .release = scif_dev_test_release +}; + +static void scif_display_window(struct scif_window *window, struct seq_file *s) +{ + int j; + struct scatterlist *sg; + scif_pinned_pages_t pin = window->pinned_pages; + + seq_printf(s, "window %p type %d temp %d offset 0x%llx ", + window, window->type, window->temp, window->offset); + seq_printf(s, "nr_pages 0x%llx nr_contig_chunks 0x%x prot %d ", + window->nr_pages, window->nr_contig_chunks, window->prot); + seq_printf(s, "ref_count %d magic 0x%llx peer_window 0x%llx ", + window->ref_count, window->magic, window->peer_window); + seq_printf(s, "unreg_state 0x%x va_for_temp 0x%lx\n", + window->unreg_state, window->va_for_temp); + + for (j = 0; j < window->nr_contig_chunks; j++) + seq_printf(s, "page[%d] dma_addr 0x%llx num_pages 0x%llx\n", j, + window->dma_addr[j], window->num_pages[j]); + + if (window->type == SCIF_WINDOW_SELF && pin) + for (j = 0; j < window->nr_pages; j++) + seq_printf(s, "page[%d] = pinned_pages %p address %p\n", + j, pin->pages[j], + page_address(pin->pages[j])); + + if (window->st) + for_each_sg(window->st->sgl, sg, window->st->nents, j) + seq_printf(s, "sg[%d] dma addr 0x%llx length 0x%x\n", + j, sg_dma_address(sg), sg_dma_len(sg)); +} + +static void scif_display_all_windows(struct list_head *head, struct seq_file *s) +{ + struct list_head *item; + struct scif_window *window; + + list_for_each(item, head) { + window = list_entry(item, struct scif_window, list); + scif_display_window(window, s); + } +} + +static int scif_rma_test(struct seq_file *s, void *unused) +{ + struct scif_endpt *ep; + struct list_head *pos; + + mutex_lock(&scif_info.connlock); + list_for_each(pos, &scif_info.connected) { + ep = list_entry(pos, struct scif_endpt, list); + seq_printf(s, "ep %p self windows\n", ep); + mutex_lock(&ep->rma_info.rma_lock); + scif_display_all_windows(&ep->rma_info.reg_list, s); + seq_printf(s, "ep %p remote windows\n", ep); + scif_display_all_windows(&ep->rma_info.remote_reg_list, s); + mutex_unlock(&ep->rma_info.rma_lock); + } + mutex_unlock(&scif_info.connlock); + return 0; +} + +static int scif_rma_test_open(struct inode *inode, struct file *file) +{ + return single_open(file, scif_rma_test, inode->i_private); +} + +static int scif_rma_test_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations scif_rma_ops = { + .owner = THIS_MODULE, + .open = scif_rma_test_open, + .read = seq_read, + .llseek = seq_lseek, + .release = scif_rma_test_release +}; + +void __init scif_init_debugfs(void) +{ + scif_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL); + if (!scif_dbg) { + dev_err(scif_info.mdev.this_device, + "can't create debugfs dir scif\n"); + return; + } + + debugfs_create_file("scif_dev", 0444, scif_dbg, NULL, &scif_dev_ops); + debugfs_create_file("scif_rma", 0444, scif_dbg, NULL, &scif_rma_ops); + debugfs_create_u8("en_msg_log", 0666, scif_dbg, &scif_info.en_msg_log); + debugfs_create_u8("p2p_enable", 0666, scif_dbg, &scif_info.p2p_enable); +} + +void scif_exit_debugfs(void) +{ + debugfs_remove_recursive(scif_dbg); +} diff --git a/kernel/drivers/misc/mic/scif/scif_dma.c b/kernel/drivers/misc/mic/scif/scif_dma.c new file mode 100644 index 000000000..95a13c629 --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_dma.c @@ -0,0 +1,1979 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include "scif_main.h" +#include "scif_map.h" + +/* + * struct scif_dma_comp_cb - SCIF DMA completion callback + * + * @dma_completion_func: DMA completion callback + * @cb_cookie: DMA completion callback cookie + * @temp_buf: Temporary buffer + * @temp_buf_to_free: Temporary buffer to be freed + * @is_cache: Is a kmem_cache allocated buffer + * @dst_offset: Destination registration offset + * @dst_window: Destination registration window + * @len: Length of the temp buffer + * @temp_phys: DMA address of the temp buffer + * @sdev: The SCIF device + * @header_padding: padding for cache line alignment + */ +struct scif_dma_comp_cb { + void (*dma_completion_func)(void *cookie); + void *cb_cookie; + u8 *temp_buf; + u8 *temp_buf_to_free; + bool is_cache; + s64 dst_offset; + struct scif_window *dst_window; + size_t len; + dma_addr_t temp_phys; + struct scif_dev *sdev; + int header_padding; +}; + +/** + * struct scif_copy_work - Work for DMA copy + * + * @src_offset: Starting source offset + * @dst_offset: Starting destination offset + * @src_window: Starting src registered window + * @dst_window: Starting dst registered window + * @loopback: true if this is a loopback DMA transfer + * @len: Length of the transfer + * @comp_cb: DMA copy completion callback + * @remote_dev: The remote SCIF peer device + * @fence_type: polling or interrupt based + * @ordered: is this a tail byte ordered DMA transfer + */ +struct scif_copy_work { + s64 src_offset; + s64 dst_offset; + struct scif_window *src_window; + struct scif_window *dst_window; + int loopback; + size_t len; + struct scif_dma_comp_cb *comp_cb; + struct scif_dev *remote_dev; + int fence_type; + bool ordered; +}; + +#ifndef list_entry_next +#define list_entry_next(pos, member) \ + list_entry(pos->member.next, typeof(*pos), member) +#endif + +/** + * scif_reserve_dma_chan: + * @ep: Endpoint Descriptor. + * + * This routine reserves a DMA channel for a particular + * endpoint. All DMA transfers for an endpoint are always + * programmed on the same DMA channel. + */ +int scif_reserve_dma_chan(struct scif_endpt *ep) +{ + int err = 0; + struct scif_dev *scifdev; + struct scif_hw_dev *sdev; + struct dma_chan *chan; + + /* Loopback DMAs are not supported on the management node */ + if (!scif_info.nodeid && scifdev_self(ep->remote_dev)) + return 0; + if (scif_info.nodeid) + scifdev = &scif_dev[0]; + else + scifdev = ep->remote_dev; + sdev = scifdev->sdev; + if (!sdev->num_dma_ch) + return -ENODEV; + chan = sdev->dma_ch[scifdev->dma_ch_idx]; + scifdev->dma_ch_idx = (scifdev->dma_ch_idx + 1) % sdev->num_dma_ch; + mutex_lock(&ep->rma_info.rma_lock); + ep->rma_info.dma_chan = chan; + mutex_unlock(&ep->rma_info.rma_lock); + return err; +} + +#ifdef CONFIG_MMU_NOTIFIER +/** + * scif_rma_destroy_tcw: + * + * This routine destroys temporary cached windows + */ +static +void __scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, + struct scif_endpt *ep, + u64 start, u64 len) +{ + struct list_head *item, *tmp; + struct scif_window *window; + u64 start_va, end_va; + u64 end = start + len; + + if (end <= start) + return; + + list_for_each_safe(item, tmp, &mmn->tc_reg_list) { + window = list_entry(item, struct scif_window, list); + ep = (struct scif_endpt *)window->ep; + if (!len) + break; + start_va = window->va_for_temp; + end_va = start_va + (window->nr_pages << PAGE_SHIFT); + if (start < start_va && end <= start_va) + break; + if (start >= end_va) + continue; + __scif_rma_destroy_tcw_helper(window); + } +} + +static void scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, u64 start, u64 len) +{ + struct scif_endpt *ep = mmn->ep; + + spin_lock(&ep->rma_info.tc_lock); + __scif_rma_destroy_tcw(mmn, ep, start, len); + spin_unlock(&ep->rma_info.tc_lock); +} + +static void scif_rma_destroy_tcw_ep(struct scif_endpt *ep) +{ + struct list_head *item, *tmp; + struct scif_mmu_notif *mmn; + + list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) { + mmn = list_entry(item, struct scif_mmu_notif, list); + scif_rma_destroy_tcw(mmn, 0, ULONG_MAX); + } +} + +static void __scif_rma_destroy_tcw_ep(struct scif_endpt *ep) +{ + struct list_head *item, *tmp; + struct scif_mmu_notif *mmn; + + spin_lock(&ep->rma_info.tc_lock); + list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) { + mmn = list_entry(item, struct scif_mmu_notif, list); + __scif_rma_destroy_tcw(mmn, ep, 0, ULONG_MAX); + } + spin_unlock(&ep->rma_info.tc_lock); +} + +static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes) +{ + if ((cur_bytes >> PAGE_SHIFT) > scif_info.rma_tc_limit) + return false; + if ((atomic_read(&ep->rma_info.tcw_total_pages) + + (cur_bytes >> PAGE_SHIFT)) > + scif_info.rma_tc_limit) { + dev_info(scif_info.mdev.this_device, + "%s %d total=%d, current=%zu reached max\n", + __func__, __LINE__, + atomic_read(&ep->rma_info.tcw_total_pages), + (1 + (cur_bytes >> PAGE_SHIFT))); + scif_rma_destroy_tcw_invalid(); + __scif_rma_destroy_tcw_ep(ep); + } + return true; +} + +static void scif_mmu_notifier_release(struct mmu_notifier *mn, + struct mm_struct *mm) +{ + struct scif_mmu_notif *mmn; + + mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier); + scif_rma_destroy_tcw(mmn, 0, ULONG_MAX); + schedule_work(&scif_info.misc_work); +} + +static void scif_mmu_notifier_invalidate_page(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + struct scif_mmu_notif *mmn; + + mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier); + scif_rma_destroy_tcw(mmn, address, PAGE_SIZE); +} + +static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct scif_mmu_notif *mmn; + + mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier); + scif_rma_destroy_tcw(mmn, start, end - start); +} + +static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + /* + * Nothing to do here, everything needed was done in + * invalidate_range_start. + */ +} + +static const struct mmu_notifier_ops scif_mmu_notifier_ops = { + .release = scif_mmu_notifier_release, + .clear_flush_young = NULL, + .invalidate_page = scif_mmu_notifier_invalidate_page, + .invalidate_range_start = scif_mmu_notifier_invalidate_range_start, + .invalidate_range_end = scif_mmu_notifier_invalidate_range_end}; + +static void scif_ep_unregister_mmu_notifier(struct scif_endpt *ep) +{ + struct scif_endpt_rma_info *rma = &ep->rma_info; + struct scif_mmu_notif *mmn = NULL; + struct list_head *item, *tmp; + + mutex_lock(&ep->rma_info.mmn_lock); + list_for_each_safe(item, tmp, &rma->mmn_list) { + mmn = list_entry(item, struct scif_mmu_notif, list); + mmu_notifier_unregister(&mmn->ep_mmu_notifier, mmn->mm); + list_del(item); + kfree(mmn); + } + mutex_unlock(&ep->rma_info.mmn_lock); +} + +static void scif_init_mmu_notifier(struct scif_mmu_notif *mmn, + struct mm_struct *mm, struct scif_endpt *ep) +{ + mmn->ep = ep; + mmn->mm = mm; + mmn->ep_mmu_notifier.ops = &scif_mmu_notifier_ops; + INIT_LIST_HEAD(&mmn->list); + INIT_LIST_HEAD(&mmn->tc_reg_list); +} + +static struct scif_mmu_notif * +scif_find_mmu_notifier(struct mm_struct *mm, struct scif_endpt_rma_info *rma) +{ + struct scif_mmu_notif *mmn; + struct list_head *item; + + list_for_each(item, &rma->mmn_list) { + mmn = list_entry(item, struct scif_mmu_notif, list); + if (mmn->mm == mm) + return mmn; + } + return NULL; +} + +static struct scif_mmu_notif * +scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep) +{ + struct scif_mmu_notif *mmn + = kzalloc(sizeof(*mmn), GFP_KERNEL); + + if (!mmn) + return ERR_PTR(ENOMEM); + + scif_init_mmu_notifier(mmn, current->mm, ep); + if (mmu_notifier_register(&mmn->ep_mmu_notifier, + current->mm)) { + kfree(mmn); + return ERR_PTR(EBUSY); + } + list_add(&mmn->list, &ep->rma_info.mmn_list); + return mmn; +} + +/* + * Called from the misc thread to destroy temporary cached windows and + * unregister the MMU notifier for the SCIF endpoint. + */ +void scif_mmu_notif_handler(struct work_struct *work) +{ + struct list_head *pos, *tmpq; + struct scif_endpt *ep; +restart: + scif_rma_destroy_tcw_invalid(); + spin_lock(&scif_info.rmalock); + list_for_each_safe(pos, tmpq, &scif_info.mmu_notif_cleanup) { + ep = list_entry(pos, struct scif_endpt, mmu_list); + list_del(&ep->mmu_list); + spin_unlock(&scif_info.rmalock); + scif_rma_destroy_tcw_ep(ep); + scif_ep_unregister_mmu_notifier(ep); + goto restart; + } + spin_unlock(&scif_info.rmalock); +} + +static bool scif_is_set_reg_cache(int flags) +{ + return !!(flags & SCIF_RMA_USECACHE); +} +#else +static struct scif_mmu_notif * +scif_find_mmu_notifier(struct mm_struct *mm, + struct scif_endpt_rma_info *rma) +{ + return NULL; +} + +static struct scif_mmu_notif * +scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep) +{ + return NULL; +} + +void scif_mmu_notif_handler(struct work_struct *work) +{ +} + +static bool scif_is_set_reg_cache(int flags) +{ + return false; +} + +static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes) +{ + return false; +} +#endif + +/** + * scif_register_temp: + * @epd: End Point Descriptor. + * @addr: virtual address to/from which to copy + * @len: length of range to copy + * @out_offset: computed offset returned by reference. + * @out_window: allocated registered window returned by reference. + * + * Create a temporary registered window. The peer will not know about this + * window. This API is used for scif_vreadfrom()/scif_vwriteto() API's. + */ +static int +scif_register_temp(scif_epd_t epd, unsigned long addr, size_t len, int prot, + off_t *out_offset, struct scif_window **out_window) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err; + scif_pinned_pages_t pinned_pages; + size_t aligned_len; + + aligned_len = ALIGN(len, PAGE_SIZE); + + err = __scif_pin_pages((void *)(addr & PAGE_MASK), + aligned_len, &prot, 0, &pinned_pages); + if (err) + return err; + + pinned_pages->prot = prot; + + /* Compute the offset for this registration */ + err = scif_get_window_offset(ep, 0, 0, + aligned_len >> PAGE_SHIFT, + (s64 *)out_offset); + if (err) + goto error_unpin; + + /* Allocate and prepare self registration window */ + *out_window = scif_create_window(ep, aligned_len >> PAGE_SHIFT, + *out_offset, true); + if (!*out_window) { + scif_free_window_offset(ep, NULL, *out_offset); + err = -ENOMEM; + goto error_unpin; + } + + (*out_window)->pinned_pages = pinned_pages; + (*out_window)->nr_pages = pinned_pages->nr_pages; + (*out_window)->prot = pinned_pages->prot; + + (*out_window)->va_for_temp = addr & PAGE_MASK; + err = scif_map_window(ep->remote_dev, *out_window); + if (err) { + /* Something went wrong! Rollback */ + scif_destroy_window(ep, *out_window); + *out_window = NULL; + } else { + *out_offset |= (addr - (*out_window)->va_for_temp); + } + return err; +error_unpin: + if (err) + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + scif_unpin_pages(pinned_pages); + return err; +} + +#define SCIF_DMA_TO (3 * HZ) + +/* + * scif_sync_dma - Program a DMA without an interrupt descriptor + * + * @dev - The address of the pointer to the device instance used + * for DMA registration. + * @chan - DMA channel to be used. + * @sync_wait: Wait for DMA to complete? + * + * Return 0 on success and -errno on error. + */ +static int scif_sync_dma(struct scif_hw_dev *sdev, struct dma_chan *chan, + bool sync_wait) +{ + int err = 0; + struct dma_async_tx_descriptor *tx = NULL; + enum dma_ctrl_flags flags = DMA_PREP_FENCE; + dma_cookie_t cookie; + struct dma_device *ddev; + + if (!chan) { + err = -EIO; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + ddev = chan->device; + + tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags); + if (!tx) { + err = -ENOMEM; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } + cookie = tx->tx_submit(tx); + + if (dma_submit_error(cookie)) { + err = -ENOMEM; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } + if (!sync_wait) { + dma_async_issue_pending(chan); + } else { + if (dma_sync_wait(chan, cookie) == DMA_COMPLETE) { + err = 0; + } else { + err = -EIO; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + } + } +release: + return err; +} + +static void scif_dma_callback(void *arg) +{ + struct completion *done = (struct completion *)arg; + + complete(done); +} + +#define SCIF_DMA_SYNC_WAIT true +#define SCIF_DMA_POLL BIT(0) +#define SCIF_DMA_INTR BIT(1) + +/* + * scif_async_dma - Program a DMA with an interrupt descriptor + * + * @dev - The address of the pointer to the device instance used + * for DMA registration. + * @chan - DMA channel to be used. + * Return 0 on success and -errno on error. + */ +static int scif_async_dma(struct scif_hw_dev *sdev, struct dma_chan *chan) +{ + int err = 0; + struct dma_device *ddev; + struct dma_async_tx_descriptor *tx = NULL; + enum dma_ctrl_flags flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE; + DECLARE_COMPLETION_ONSTACK(done_wait); + dma_cookie_t cookie; + enum dma_status status; + + if (!chan) { + err = -EIO; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + ddev = chan->device; + + tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags); + if (!tx) { + err = -ENOMEM; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } + reinit_completion(&done_wait); + tx->callback = scif_dma_callback; + tx->callback_param = &done_wait; + cookie = tx->tx_submit(tx); + + if (dma_submit_error(cookie)) { + err = -ENOMEM; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } + dma_async_issue_pending(chan); + + err = wait_for_completion_timeout(&done_wait, SCIF_DMA_TO); + if (!err) { + err = -EIO; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } + err = 0; + status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); + if (status != DMA_COMPLETE) { + err = -EIO; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } +release: + return err; +} + +/* + * scif_drain_dma_poll - Drain all outstanding DMA operations for a particular + * DMA channel via polling. + * + * @sdev - The SCIF device + * @chan - DMA channel + * Return 0 on success and -errno on error. + */ +static int scif_drain_dma_poll(struct scif_hw_dev *sdev, struct dma_chan *chan) +{ + if (!chan) + return -EINVAL; + return scif_sync_dma(sdev, chan, SCIF_DMA_SYNC_WAIT); +} + +/* + * scif_drain_dma_intr - Drain all outstanding DMA operations for a particular + * DMA channel via interrupt based blocking wait. + * + * @sdev - The SCIF device + * @chan - DMA channel + * Return 0 on success and -errno on error. + */ +int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan) +{ + if (!chan) + return -EINVAL; + return scif_async_dma(sdev, chan); +} + +/** + * scif_rma_destroy_windows: + * + * This routine destroys all windows queued for cleanup + */ +void scif_rma_destroy_windows(void) +{ + struct list_head *item, *tmp; + struct scif_window *window; + struct scif_endpt *ep; + struct dma_chan *chan; + + might_sleep(); +restart: + spin_lock(&scif_info.rmalock); + list_for_each_safe(item, tmp, &scif_info.rma) { + window = list_entry(item, struct scif_window, + list); + ep = (struct scif_endpt *)window->ep; + chan = ep->rma_info.dma_chan; + + list_del_init(&window->list); + spin_unlock(&scif_info.rmalock); + if (!chan || !scifdev_alive(ep) || + !scif_drain_dma_intr(ep->remote_dev->sdev, + ep->rma_info.dma_chan)) + /* Remove window from global list */ + window->unreg_state = OP_COMPLETED; + else + dev_warn(&ep->remote_dev->sdev->dev, + "DMA engine hung?\n"); + if (window->unreg_state == OP_COMPLETED) { + if (window->type == SCIF_WINDOW_SELF) + scif_destroy_window(ep, window); + else + scif_destroy_remote_window(window); + atomic_dec(&ep->rma_info.tw_refcount); + } + goto restart; + } + spin_unlock(&scif_info.rmalock); +} + +/** + * scif_rma_destroy_tcw: + * + * This routine destroys temporary cached registered windows + * which have been queued for cleanup. + */ +void scif_rma_destroy_tcw_invalid(void) +{ + struct list_head *item, *tmp; + struct scif_window *window; + struct scif_endpt *ep; + struct dma_chan *chan; + + might_sleep(); +restart: + spin_lock(&scif_info.rmalock); + list_for_each_safe(item, tmp, &scif_info.rma_tc) { + window = list_entry(item, struct scif_window, list); + ep = (struct scif_endpt *)window->ep; + chan = ep->rma_info.dma_chan; + list_del_init(&window->list); + spin_unlock(&scif_info.rmalock); + mutex_lock(&ep->rma_info.rma_lock); + if (!chan || !scifdev_alive(ep) || + !scif_drain_dma_intr(ep->remote_dev->sdev, + ep->rma_info.dma_chan)) { + atomic_sub(window->nr_pages, + &ep->rma_info.tcw_total_pages); + scif_destroy_window(ep, window); + atomic_dec(&ep->rma_info.tcw_refcount); + } else { + dev_warn(&ep->remote_dev->sdev->dev, + "DMA engine hung?\n"); + } + mutex_unlock(&ep->rma_info.rma_lock); + goto restart; + } + spin_unlock(&scif_info.rmalock); +} + +static inline +void *_get_local_va(off_t off, struct scif_window *window, size_t len) +{ + int page_nr = (off - window->offset) >> PAGE_SHIFT; + off_t page_off = off & ~PAGE_MASK; + void *va = NULL; + + if (window->type == SCIF_WINDOW_SELF) { + struct page **pages = window->pinned_pages->pages; + + va = page_address(pages[page_nr]) + page_off; + } + return va; +} + +static inline +void *ioremap_remote(off_t off, struct scif_window *window, + size_t len, struct scif_dev *dev, + struct scif_window_iter *iter) +{ + dma_addr_t phys = scif_off_to_dma_addr(window, off, NULL, iter); + + /* + * If the DMA address is not card relative then we need the DMA + * addresses to be an offset into the bar. The aperture base was already + * added so subtract it here since scif_ioremap is going to add it again + */ + if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER && + dev->sdev->aper && !dev->sdev->card_rel_da) + phys = phys - dev->sdev->aper->pa; + return scif_ioremap(phys, len, dev); +} + +static inline void +iounmap_remote(void *virt, size_t size, struct scif_copy_work *work) +{ + scif_iounmap(virt, size, work->remote_dev); +} + +/* + * Takes care of ordering issue caused by + * 1. Hardware: Only in the case of cpu copy from mgmt node to card + * because of WC memory. + * 2. Software: If memcpy reorders copy instructions for optimization. + * This could happen at both mgmt node and card. + */ +static inline void +scif_ordered_memcpy_toio(char *dst, const char *src, size_t count) +{ + if (!count) + return; + + memcpy_toio((void __iomem __force *)dst, src, --count); + /* Order the last byte with the previous stores */ + wmb(); + *(dst + count) = *(src + count); +} + +static inline void scif_unaligned_cpy_toio(char *dst, const char *src, + size_t count, bool ordered) +{ + if (ordered) + scif_ordered_memcpy_toio(dst, src, count); + else + memcpy_toio((void __iomem __force *)dst, src, count); +} + +static inline +void scif_ordered_memcpy_fromio(char *dst, const char *src, size_t count) +{ + if (!count) + return; + + memcpy_fromio(dst, (void __iomem __force *)src, --count); + /* Order the last byte with the previous loads */ + rmb(); + *(dst + count) = *(src + count); +} + +static inline void scif_unaligned_cpy_fromio(char *dst, const char *src, + size_t count, bool ordered) +{ + if (ordered) + scif_ordered_memcpy_fromio(dst, src, count); + else + memcpy_fromio(dst, (void __iomem __force *)src, count); +} + +#define SCIF_RMA_ERROR_CODE (~(dma_addr_t)0x0) + +/* + * scif_off_to_dma_addr: + * Obtain the dma_addr given the window and the offset. + * @window: Registered window. + * @off: Window offset. + * @nr_bytes: Return the number of contiguous bytes till next DMA addr index. + * @index: Return the index of the dma_addr array found. + * @start_off: start offset of index of the dma addr array found. + * The nr_bytes provides the callee an estimate of the maximum possible + * DMA xfer possible while the index/start_off provide faster lookups + * for the next iteration. + */ +dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off, + size_t *nr_bytes, struct scif_window_iter *iter) +{ + int i, page_nr; + s64 start, end; + off_t page_off; + + if (window->nr_pages == window->nr_contig_chunks) { + page_nr = (off - window->offset) >> PAGE_SHIFT; + page_off = off & ~PAGE_MASK; + + if (nr_bytes) + *nr_bytes = PAGE_SIZE - page_off; + return window->dma_addr[page_nr] | page_off; + } + if (iter) { + i = iter->index; + start = iter->offset; + } else { + i = 0; + start = window->offset; + } + for (; i < window->nr_contig_chunks; i++) { + end = start + (window->num_pages[i] << PAGE_SHIFT); + if (off >= start && off < end) { + if (iter) { + iter->index = i; + iter->offset = start; + } + if (nr_bytes) + *nr_bytes = end - off; + return (window->dma_addr[i] + (off - start)); + } + start += (window->num_pages[i] << PAGE_SHIFT); + } + dev_err(scif_info.mdev.this_device, + "%s %d BUG. Addr not found? window %p off 0x%llx\n", + __func__, __LINE__, window, off); + return SCIF_RMA_ERROR_CODE; +} + +/* + * Copy between rma window and temporary buffer + */ +static void scif_rma_local_cpu_copy(s64 offset, struct scif_window *window, + u8 *temp, size_t rem_len, bool to_temp) +{ + void *window_virt; + size_t loop_len; + int offset_in_page; + s64 end_offset; + + offset_in_page = offset & ~PAGE_MASK; + loop_len = PAGE_SIZE - offset_in_page; + + if (rem_len < loop_len) + loop_len = rem_len; + + window_virt = _get_local_va(offset, window, loop_len); + if (!window_virt) + return; + if (to_temp) + memcpy(temp, window_virt, loop_len); + else + memcpy(window_virt, temp, loop_len); + + offset += loop_len; + temp += loop_len; + rem_len -= loop_len; + + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + while (rem_len) { + if (offset == end_offset) { + window = list_entry_next(window, list); + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + } + loop_len = min(PAGE_SIZE, rem_len); + window_virt = _get_local_va(offset, window, loop_len); + if (!window_virt) + return; + if (to_temp) + memcpy(temp, window_virt, loop_len); + else + memcpy(window_virt, temp, loop_len); + offset += loop_len; + temp += loop_len; + rem_len -= loop_len; + } +} + +/** + * scif_rma_completion_cb: + * @data: RMA cookie + * + * RMA interrupt completion callback. + */ +static void scif_rma_completion_cb(void *data) +{ + struct scif_dma_comp_cb *comp_cb = data; + + /* Free DMA Completion CB. */ + if (comp_cb->dst_window) + scif_rma_local_cpu_copy(comp_cb->dst_offset, + comp_cb->dst_window, + comp_cb->temp_buf + + comp_cb->header_padding, + comp_cb->len, false); + scif_unmap_single(comp_cb->temp_phys, comp_cb->sdev, + SCIF_KMEM_UNALIGNED_BUF_SIZE); + if (comp_cb->is_cache) + kmem_cache_free(unaligned_cache, + comp_cb->temp_buf_to_free); + else + kfree(comp_cb->temp_buf_to_free); +} + +/* Copies between temporary buffer and offsets provided in work */ +static int +scif_rma_list_dma_copy_unaligned(struct scif_copy_work *work, + u8 *temp, struct dma_chan *chan, + bool src_local) +{ + struct scif_dma_comp_cb *comp_cb = work->comp_cb; + dma_addr_t window_dma_addr, temp_dma_addr; + dma_addr_t temp_phys = comp_cb->temp_phys; + size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len; + int offset_in_ca, ret = 0; + s64 end_offset, offset; + struct scif_window *window; + void *window_virt_addr; + size_t tail_len; + struct dma_async_tx_descriptor *tx; + struct dma_device *dev = chan->device; + dma_cookie_t cookie; + + if (src_local) { + offset = work->dst_offset; + window = work->dst_window; + } else { + offset = work->src_offset; + window = work->src_window; + } + + offset_in_ca = offset & (L1_CACHE_BYTES - 1); + if (offset_in_ca) { + loop_len = L1_CACHE_BYTES - offset_in_ca; + loop_len = min(loop_len, remaining_len); + window_virt_addr = ioremap_remote(offset, window, + loop_len, + work->remote_dev, + NULL); + if (!window_virt_addr) + return -ENOMEM; + if (src_local) + scif_unaligned_cpy_toio(window_virt_addr, temp, + loop_len, + work->ordered && + !(remaining_len - loop_len)); + else + scif_unaligned_cpy_fromio(temp, window_virt_addr, + loop_len, work->ordered && + !(remaining_len - loop_len)); + iounmap_remote(window_virt_addr, loop_len, work); + + offset += loop_len; + temp += loop_len; + temp_phys += loop_len; + remaining_len -= loop_len; + } + + offset_in_ca = offset & ~PAGE_MASK; + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + + tail_len = remaining_len & (L1_CACHE_BYTES - 1); + remaining_len -= tail_len; + while (remaining_len) { + if (offset == end_offset) { + window = list_entry_next(window, list); + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + } + if (scif_is_mgmt_node()) + temp_dma_addr = temp_phys; + else + /* Fix if we ever enable IOMMU on the card */ + temp_dma_addr = (dma_addr_t)virt_to_phys(temp); + window_dma_addr = scif_off_to_dma_addr(window, offset, + &nr_contig_bytes, + NULL); + loop_len = min(nr_contig_bytes, remaining_len); + if (src_local) { + if (work->ordered && !tail_len && + !(remaining_len - loop_len) && + loop_len != L1_CACHE_BYTES) { + /* + * Break up the last chunk of the transfer into + * two steps. if there is no tail to guarantee + * DMA ordering. SCIF_DMA_POLLING inserts + * a status update descriptor in step 1 which + * acts as a double sided synchronization fence + * for the DMA engine to ensure that the last + * cache line in step 2 is updated last. + */ + /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */ + tx = + dev->device_prep_dma_memcpy(chan, + window_dma_addr, + temp_dma_addr, + loop_len - + L1_CACHE_BYTES, + DMA_PREP_FENCE); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + offset += (loop_len - L1_CACHE_BYTES); + temp_dma_addr += (loop_len - L1_CACHE_BYTES); + window_dma_addr += (loop_len - L1_CACHE_BYTES); + remaining_len -= (loop_len - L1_CACHE_BYTES); + loop_len = remaining_len; + + /* Step 2) DMA: L1_CACHE_BYTES */ + tx = + dev->device_prep_dma_memcpy(chan, + window_dma_addr, + temp_dma_addr, + loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } else { + tx = + dev->device_prep_dma_memcpy(chan, + window_dma_addr, + temp_dma_addr, + loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } + } else { + tx = dev->device_prep_dma_memcpy(chan, temp_dma_addr, + window_dma_addr, loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } + if (ret < 0) + goto err; + offset += loop_len; + temp += loop_len; + temp_phys += loop_len; + remaining_len -= loop_len; + offset_in_ca = 0; + } + if (tail_len) { + if (offset == end_offset) { + window = list_entry_next(window, list); + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + } + window_virt_addr = ioremap_remote(offset, window, tail_len, + work->remote_dev, + NULL); + if (!window_virt_addr) + return -ENOMEM; + /* + * The CPU copy for the tail bytes must be initiated only once + * previous DMA transfers for this endpoint have completed + * to guarantee ordering. + */ + if (work->ordered) { + struct scif_dev *rdev = work->remote_dev; + + ret = scif_drain_dma_intr(rdev->sdev, chan); + if (ret) + return ret; + } + if (src_local) + scif_unaligned_cpy_toio(window_virt_addr, temp, + tail_len, work->ordered); + else + scif_unaligned_cpy_fromio(temp, window_virt_addr, + tail_len, work->ordered); + iounmap_remote(window_virt_addr, tail_len, work); + } + tx = dev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_INTERRUPT); + if (!tx) { + ret = -ENOMEM; + return ret; + } + tx->callback = &scif_rma_completion_cb; + tx->callback_param = comp_cb; + cookie = tx->tx_submit(tx); + + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + return ret; + } + dma_async_issue_pending(chan); + return 0; +err: + dev_err(scif_info.mdev.this_device, + "%s %d Desc Prog Failed ret %d\n", + __func__, __LINE__, ret); + return ret; +} + +/* + * _scif_rma_list_dma_copy_aligned: + * + * Traverse all the windows and perform DMA copy. + */ +static int _scif_rma_list_dma_copy_aligned(struct scif_copy_work *work, + struct dma_chan *chan) +{ + dma_addr_t src_dma_addr, dst_dma_addr; + size_t loop_len, remaining_len, src_contig_bytes = 0; + size_t dst_contig_bytes = 0; + struct scif_window_iter src_win_iter; + struct scif_window_iter dst_win_iter; + s64 end_src_offset, end_dst_offset; + struct scif_window *src_window = work->src_window; + struct scif_window *dst_window = work->dst_window; + s64 src_offset = work->src_offset, dst_offset = work->dst_offset; + int ret = 0; + struct dma_async_tx_descriptor *tx; + struct dma_device *dev = chan->device; + dma_cookie_t cookie; + + remaining_len = work->len; + + scif_init_window_iter(src_window, &src_win_iter); + scif_init_window_iter(dst_window, &dst_win_iter); + end_src_offset = src_window->offset + + (src_window->nr_pages << PAGE_SHIFT); + end_dst_offset = dst_window->offset + + (dst_window->nr_pages << PAGE_SHIFT); + while (remaining_len) { + if (src_offset == end_src_offset) { + src_window = list_entry_next(src_window, list); + end_src_offset = src_window->offset + + (src_window->nr_pages << PAGE_SHIFT); + scif_init_window_iter(src_window, &src_win_iter); + } + if (dst_offset == end_dst_offset) { + dst_window = list_entry_next(dst_window, list); + end_dst_offset = dst_window->offset + + (dst_window->nr_pages << PAGE_SHIFT); + scif_init_window_iter(dst_window, &dst_win_iter); + } + + /* compute dma addresses for transfer */ + src_dma_addr = scif_off_to_dma_addr(src_window, src_offset, + &src_contig_bytes, + &src_win_iter); + dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset, + &dst_contig_bytes, + &dst_win_iter); + loop_len = min(src_contig_bytes, dst_contig_bytes); + loop_len = min(loop_len, remaining_len); + if (work->ordered && !(remaining_len - loop_len)) { + /* + * Break up the last chunk of the transfer into two + * steps to ensure that the last byte in step 2 is + * updated last. + */ + /* Step 1) DMA: Body Length - 1 */ + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, + loop_len - 1, + DMA_PREP_FENCE); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + src_offset += (loop_len - 1); + dst_offset += (loop_len - 1); + src_dma_addr += (loop_len - 1); + dst_dma_addr += (loop_len - 1); + remaining_len -= (loop_len - 1); + loop_len = remaining_len; + + /* Step 2) DMA: 1 BYTES */ + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } else { + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + } + src_offset += loop_len; + dst_offset += loop_len; + remaining_len -= loop_len; + } + return ret; +err: + dev_err(scif_info.mdev.this_device, + "%s %d Desc Prog Failed ret %d\n", + __func__, __LINE__, ret); + return ret; +} + +/* + * scif_rma_list_dma_copy_aligned: + * + * Traverse all the windows and perform DMA copy. + */ +static int scif_rma_list_dma_copy_aligned(struct scif_copy_work *work, + struct dma_chan *chan) +{ + dma_addr_t src_dma_addr, dst_dma_addr; + size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0; + size_t dst_contig_bytes = 0; + int src_cache_off; + s64 end_src_offset, end_dst_offset; + struct scif_window_iter src_win_iter; + struct scif_window_iter dst_win_iter; + void *src_virt, *dst_virt; + struct scif_window *src_window = work->src_window; + struct scif_window *dst_window = work->dst_window; + s64 src_offset = work->src_offset, dst_offset = work->dst_offset; + int ret = 0; + struct dma_async_tx_descriptor *tx; + struct dma_device *dev = chan->device; + dma_cookie_t cookie; + + remaining_len = work->len; + scif_init_window_iter(src_window, &src_win_iter); + scif_init_window_iter(dst_window, &dst_win_iter); + + src_cache_off = src_offset & (L1_CACHE_BYTES - 1); + if (src_cache_off != 0) { + /* Head */ + loop_len = L1_CACHE_BYTES - src_cache_off; + loop_len = min(loop_len, remaining_len); + src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset); + dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset); + if (src_window->type == SCIF_WINDOW_SELF) + src_virt = _get_local_va(src_offset, src_window, + loop_len); + else + src_virt = ioremap_remote(src_offset, src_window, + loop_len, + work->remote_dev, NULL); + if (!src_virt) + return -ENOMEM; + if (dst_window->type == SCIF_WINDOW_SELF) + dst_virt = _get_local_va(dst_offset, dst_window, + loop_len); + else + dst_virt = ioremap_remote(dst_offset, dst_window, + loop_len, + work->remote_dev, NULL); + if (!dst_virt) { + if (src_window->type != SCIF_WINDOW_SELF) + iounmap_remote(src_virt, loop_len, work); + return -ENOMEM; + } + if (src_window->type == SCIF_WINDOW_SELF) + scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len, + remaining_len == loop_len ? + work->ordered : false); + else + scif_unaligned_cpy_fromio(dst_virt, src_virt, loop_len, + remaining_len == loop_len ? + work->ordered : false); + if (src_window->type != SCIF_WINDOW_SELF) + iounmap_remote(src_virt, loop_len, work); + if (dst_window->type != SCIF_WINDOW_SELF) + iounmap_remote(dst_virt, loop_len, work); + src_offset += loop_len; + dst_offset += loop_len; + remaining_len -= loop_len; + } + + end_src_offset = src_window->offset + + (src_window->nr_pages << PAGE_SHIFT); + end_dst_offset = dst_window->offset + + (dst_window->nr_pages << PAGE_SHIFT); + tail_len = remaining_len & (L1_CACHE_BYTES - 1); + remaining_len -= tail_len; + while (remaining_len) { + if (src_offset == end_src_offset) { + src_window = list_entry_next(src_window, list); + end_src_offset = src_window->offset + + (src_window->nr_pages << PAGE_SHIFT); + scif_init_window_iter(src_window, &src_win_iter); + } + if (dst_offset == end_dst_offset) { + dst_window = list_entry_next(dst_window, list); + end_dst_offset = dst_window->offset + + (dst_window->nr_pages << PAGE_SHIFT); + scif_init_window_iter(dst_window, &dst_win_iter); + } + + /* compute dma addresses for transfer */ + src_dma_addr = scif_off_to_dma_addr(src_window, src_offset, + &src_contig_bytes, + &src_win_iter); + dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset, + &dst_contig_bytes, + &dst_win_iter); + loop_len = min(src_contig_bytes, dst_contig_bytes); + loop_len = min(loop_len, remaining_len); + if (work->ordered && !tail_len && + !(remaining_len - loop_len)) { + /* + * Break up the last chunk of the transfer into two + * steps. if there is no tail to gurantee DMA ordering. + * Passing SCIF_DMA_POLLING inserts a status update + * descriptor in step 1 which acts as a double sided + * synchronization fence for the DMA engine to ensure + * that the last cache line in step 2 is updated last. + */ + /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */ + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, + loop_len - + L1_CACHE_BYTES, + DMA_PREP_FENCE); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + src_offset += (loop_len - L1_CACHE_BYTES); + dst_offset += (loop_len - L1_CACHE_BYTES); + src_dma_addr += (loop_len - L1_CACHE_BYTES); + dst_dma_addr += (loop_len - L1_CACHE_BYTES); + remaining_len -= (loop_len - L1_CACHE_BYTES); + loop_len = remaining_len; + + /* Step 2) DMA: L1_CACHE_BYTES */ + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, + loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } else { + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, + loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } + src_offset += loop_len; + dst_offset += loop_len; + remaining_len -= loop_len; + } + remaining_len = tail_len; + if (remaining_len) { + loop_len = remaining_len; + if (src_offset == end_src_offset) + src_window = list_entry_next(src_window, list); + if (dst_offset == end_dst_offset) + dst_window = list_entry_next(dst_window, list); + + src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset); + dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset); + /* + * The CPU copy for the tail bytes must be initiated only once + * previous DMA transfers for this endpoint have completed to + * guarantee ordering. + */ + if (work->ordered) { + struct scif_dev *rdev = work->remote_dev; + + ret = scif_drain_dma_poll(rdev->sdev, chan); + if (ret) + return ret; + } + if (src_window->type == SCIF_WINDOW_SELF) + src_virt = _get_local_va(src_offset, src_window, + loop_len); + else + src_virt = ioremap_remote(src_offset, src_window, + loop_len, + work->remote_dev, NULL); + if (!src_virt) + return -ENOMEM; + + if (dst_window->type == SCIF_WINDOW_SELF) + dst_virt = _get_local_va(dst_offset, dst_window, + loop_len); + else + dst_virt = ioremap_remote(dst_offset, dst_window, + loop_len, + work->remote_dev, NULL); + if (!dst_virt) { + if (src_window->type != SCIF_WINDOW_SELF) + iounmap_remote(src_virt, loop_len, work); + return -ENOMEM; + } + + if (src_window->type == SCIF_WINDOW_SELF) + scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len, + work->ordered); + else + scif_unaligned_cpy_fromio(dst_virt, src_virt, + loop_len, work->ordered); + if (src_window->type != SCIF_WINDOW_SELF) + iounmap_remote(src_virt, loop_len, work); + + if (dst_window->type != SCIF_WINDOW_SELF) + iounmap_remote(dst_virt, loop_len, work); + remaining_len -= loop_len; + } + return ret; +err: + dev_err(scif_info.mdev.this_device, + "%s %d Desc Prog Failed ret %d\n", + __func__, __LINE__, ret); + return ret; +} + +/* + * scif_rma_list_cpu_copy: + * + * Traverse all the windows and perform CPU copy. + */ +static int scif_rma_list_cpu_copy(struct scif_copy_work *work) +{ + void *src_virt, *dst_virt; + size_t loop_len, remaining_len; + int src_page_off, dst_page_off; + s64 src_offset = work->src_offset, dst_offset = work->dst_offset; + struct scif_window *src_window = work->src_window; + struct scif_window *dst_window = work->dst_window; + s64 end_src_offset, end_dst_offset; + int ret = 0; + struct scif_window_iter src_win_iter; + struct scif_window_iter dst_win_iter; + + remaining_len = work->len; + + scif_init_window_iter(src_window, &src_win_iter); + scif_init_window_iter(dst_window, &dst_win_iter); + while (remaining_len) { + src_page_off = src_offset & ~PAGE_MASK; + dst_page_off = dst_offset & ~PAGE_MASK; + loop_len = min(PAGE_SIZE - + max(src_page_off, dst_page_off), + remaining_len); + + if (src_window->type == SCIF_WINDOW_SELF) + src_virt = _get_local_va(src_offset, src_window, + loop_len); + else + src_virt = ioremap_remote(src_offset, src_window, + loop_len, + work->remote_dev, + &src_win_iter); + if (!src_virt) { + ret = -ENOMEM; + goto error; + } + + if (dst_window->type == SCIF_WINDOW_SELF) + dst_virt = _get_local_va(dst_offset, dst_window, + loop_len); + else + dst_virt = ioremap_remote(dst_offset, dst_window, + loop_len, + work->remote_dev, + &dst_win_iter); + if (!dst_virt) { + if (src_window->type == SCIF_WINDOW_PEER) + iounmap_remote(src_virt, loop_len, work); + ret = -ENOMEM; + goto error; + } + + if (work->loopback) { + memcpy(dst_virt, src_virt, loop_len); + } else { + if (src_window->type == SCIF_WINDOW_SELF) + memcpy_toio((void __iomem __force *)dst_virt, + src_virt, loop_len); + else + memcpy_fromio(dst_virt, + (void __iomem __force *)src_virt, + loop_len); + } + if (src_window->type == SCIF_WINDOW_PEER) + iounmap_remote(src_virt, loop_len, work); + + if (dst_window->type == SCIF_WINDOW_PEER) + iounmap_remote(dst_virt, loop_len, work); + + src_offset += loop_len; + dst_offset += loop_len; + remaining_len -= loop_len; + if (remaining_len) { + end_src_offset = src_window->offset + + (src_window->nr_pages << PAGE_SHIFT); + end_dst_offset = dst_window->offset + + (dst_window->nr_pages << PAGE_SHIFT); + if (src_offset == end_src_offset) { + src_window = list_entry_next(src_window, list); + scif_init_window_iter(src_window, + &src_win_iter); + } + if (dst_offset == end_dst_offset) { + dst_window = list_entry_next(dst_window, list); + scif_init_window_iter(dst_window, + &dst_win_iter); + } + } + } +error: + return ret; +} + +static int scif_rma_list_dma_copy_wrapper(struct scif_endpt *epd, + struct scif_copy_work *work, + struct dma_chan *chan, off_t loffset) +{ + int src_cache_off, dst_cache_off; + s64 src_offset = work->src_offset, dst_offset = work->dst_offset; + u8 *temp = NULL; + bool src_local = true, dst_local = false; + struct scif_dma_comp_cb *comp_cb; + dma_addr_t src_dma_addr, dst_dma_addr; + int err; + + if (is_dma_copy_aligned(chan->device, 1, 1, 1)) + return _scif_rma_list_dma_copy_aligned(work, chan); + + src_cache_off = src_offset & (L1_CACHE_BYTES - 1); + dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1); + + if (dst_cache_off == src_cache_off) + return scif_rma_list_dma_copy_aligned(work, chan); + + if (work->loopback) + return scif_rma_list_cpu_copy(work); + src_dma_addr = __scif_off_to_dma_addr(work->src_window, src_offset); + dst_dma_addr = __scif_off_to_dma_addr(work->dst_window, dst_offset); + src_local = work->src_window->type == SCIF_WINDOW_SELF; + dst_local = work->dst_window->type == SCIF_WINDOW_SELF; + + dst_local = dst_local; + /* Allocate dma_completion cb */ + comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL); + if (!comp_cb) + goto error; + + work->comp_cb = comp_cb; + comp_cb->cb_cookie = comp_cb; + comp_cb->dma_completion_func = &scif_rma_completion_cb; + + if (work->len + (L1_CACHE_BYTES << 1) < SCIF_KMEM_UNALIGNED_BUF_SIZE) { + comp_cb->is_cache = false; + /* Allocate padding bytes to align to a cache line */ + temp = kmalloc(work->len + (L1_CACHE_BYTES << 1), + GFP_KERNEL); + if (!temp) + goto free_comp_cb; + comp_cb->temp_buf_to_free = temp; + /* kmalloc(..) does not guarantee cache line alignment */ + if (!IS_ALIGNED((u64)temp, L1_CACHE_BYTES)) + temp = PTR_ALIGN(temp, L1_CACHE_BYTES); + } else { + comp_cb->is_cache = true; + temp = kmem_cache_alloc(unaligned_cache, GFP_KERNEL); + if (!temp) + goto free_comp_cb; + comp_cb->temp_buf_to_free = temp; + } + + if (src_local) { + temp += dst_cache_off; + scif_rma_local_cpu_copy(work->src_offset, work->src_window, + temp, work->len, true); + } else { + comp_cb->dst_window = work->dst_window; + comp_cb->dst_offset = work->dst_offset; + work->src_offset = work->src_offset - src_cache_off; + comp_cb->len = work->len; + work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES); + comp_cb->header_padding = src_cache_off; + } + comp_cb->temp_buf = temp; + + err = scif_map_single(&comp_cb->temp_phys, temp, + work->remote_dev, SCIF_KMEM_UNALIGNED_BUF_SIZE); + if (err) + goto free_temp_buf; + comp_cb->sdev = work->remote_dev; + if (scif_rma_list_dma_copy_unaligned(work, temp, chan, src_local) < 0) + goto free_temp_buf; + if (!src_local) + work->fence_type = SCIF_DMA_INTR; + return 0; +free_temp_buf: + if (comp_cb->is_cache) + kmem_cache_free(unaligned_cache, comp_cb->temp_buf_to_free); + else + kfree(comp_cb->temp_buf_to_free); +free_comp_cb: + kfree(comp_cb); +error: + return -ENOMEM; +} + +/** + * scif_rma_copy: + * @epd: end point descriptor. + * @loffset: offset in local registered address space to/from which to copy + * @addr: user virtual address to/from which to copy + * @len: length of range to copy + * @roffset: offset in remote registered address space to/from which to copy + * @flags: flags + * @dir: LOCAL->REMOTE or vice versa. + * @last_chunk: true if this is the last chunk of a larger transfer + * + * Validate parameters, check if src/dst registered ranges requested for copy + * are valid and initiate either CPU or DMA copy. + */ +static int scif_rma_copy(scif_epd_t epd, off_t loffset, unsigned long addr, + size_t len, off_t roffset, int flags, + enum scif_rma_dir dir, bool last_chunk) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scif_rma_req remote_req; + struct scif_rma_req req; + struct scif_window *local_window = NULL; + struct scif_window *remote_window = NULL; + struct scif_copy_work copy_work; + bool loopback; + int err = 0; + struct dma_chan *chan; + struct scif_mmu_notif *mmn = NULL; + bool cache = false; + struct device *spdev; + + err = scif_verify_epd(ep); + if (err) + return err; + + if (flags && !(flags & (SCIF_RMA_USECPU | SCIF_RMA_USECACHE | + SCIF_RMA_SYNC | SCIF_RMA_ORDERED))) + return -EINVAL; + + loopback = scifdev_self(ep->remote_dev) ? true : false; + copy_work.fence_type = ((flags & SCIF_RMA_SYNC) && last_chunk) ? + SCIF_DMA_POLL : 0; + copy_work.ordered = !!((flags & SCIF_RMA_ORDERED) && last_chunk); + + /* Use CPU for Mgmt node <-> Mgmt node copies */ + if (loopback && scif_is_mgmt_node()) { + flags |= SCIF_RMA_USECPU; + copy_work.fence_type = 0x0; + } + + cache = scif_is_set_reg_cache(flags); + + remote_req.out_window = &remote_window; + remote_req.offset = roffset; + remote_req.nr_bytes = len; + /* + * If transfer is from local to remote then the remote window + * must be writeable and vice versa. + */ + remote_req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_WRITE : VM_READ; + remote_req.type = SCIF_WINDOW_PARTIAL; + remote_req.head = &ep->rma_info.remote_reg_list; + + spdev = scif_get_peer_dev(ep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + return err; + } + + if (addr && cache) { + mutex_lock(&ep->rma_info.mmn_lock); + mmn = scif_find_mmu_notifier(current->mm, &ep->rma_info); + if (!mmn) + scif_add_mmu_notifier(current->mm, ep); + mutex_unlock(&ep->rma_info.mmn_lock); + if (IS_ERR(mmn)) { + scif_put_peer_dev(spdev); + return PTR_ERR(mmn); + } + cache = cache && !scif_rma_tc_can_cache(ep, len); + } + mutex_lock(&ep->rma_info.rma_lock); + if (addr) { + req.out_window = &local_window; + req.nr_bytes = ALIGN(len + (addr & ~PAGE_MASK), + PAGE_SIZE); + req.va_for_temp = addr & PAGE_MASK; + req.prot = (dir == SCIF_LOCAL_TO_REMOTE ? + VM_READ : VM_WRITE | VM_READ); + /* Does a valid local window exist? */ + if (mmn) { + spin_lock(&ep->rma_info.tc_lock); + req.head = &mmn->tc_reg_list; + err = scif_query_tcw(ep, &req); + spin_unlock(&ep->rma_info.tc_lock); + } + if (!mmn || err) { + err = scif_register_temp(epd, req.va_for_temp, + req.nr_bytes, req.prot, + &loffset, &local_window); + if (err) { + mutex_unlock(&ep->rma_info.rma_lock); + goto error; + } + if (!cache) + goto skip_cache; + atomic_inc(&ep->rma_info.tcw_refcount); + atomic_add_return(local_window->nr_pages, + &ep->rma_info.tcw_total_pages); + if (mmn) { + spin_lock(&ep->rma_info.tc_lock); + scif_insert_tcw(local_window, + &mmn->tc_reg_list); + spin_unlock(&ep->rma_info.tc_lock); + } + } +skip_cache: + loffset = local_window->offset + + (addr - local_window->va_for_temp); + } else { + req.out_window = &local_window; + req.offset = loffset; + /* + * If transfer is from local to remote then the self window + * must be readable and vice versa. + */ + req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_READ : VM_WRITE; + req.nr_bytes = len; + req.type = SCIF_WINDOW_PARTIAL; + req.head = &ep->rma_info.reg_list; + /* Does a valid local window exist? */ + err = scif_query_window(&req); + if (err) { + mutex_unlock(&ep->rma_info.rma_lock); + goto error; + } + } + + /* Does a valid remote window exist? */ + err = scif_query_window(&remote_req); + if (err) { + mutex_unlock(&ep->rma_info.rma_lock); + goto error; + } + + /* + * Prepare copy_work for submitting work to the DMA kernel thread + * or CPU copy routine. + */ + copy_work.len = len; + copy_work.loopback = loopback; + copy_work.remote_dev = ep->remote_dev; + if (dir == SCIF_LOCAL_TO_REMOTE) { + copy_work.src_offset = loffset; + copy_work.src_window = local_window; + copy_work.dst_offset = roffset; + copy_work.dst_window = remote_window; + } else { + copy_work.src_offset = roffset; + copy_work.src_window = remote_window; + copy_work.dst_offset = loffset; + copy_work.dst_window = local_window; + } + + if (flags & SCIF_RMA_USECPU) { + scif_rma_list_cpu_copy(©_work); + } else { + chan = ep->rma_info.dma_chan; + err = scif_rma_list_dma_copy_wrapper(epd, ©_work, + chan, loffset); + } + if (addr && !cache) + atomic_inc(&ep->rma_info.tw_refcount); + + mutex_unlock(&ep->rma_info.rma_lock); + + if (last_chunk) { + struct scif_dev *rdev = ep->remote_dev; + + if (copy_work.fence_type == SCIF_DMA_POLL) + err = scif_drain_dma_poll(rdev->sdev, + ep->rma_info.dma_chan); + else if (copy_work.fence_type == SCIF_DMA_INTR) + err = scif_drain_dma_intr(rdev->sdev, + ep->rma_info.dma_chan); + } + + if (addr && !cache) + scif_queue_for_cleanup(local_window, &scif_info.rma); + scif_put_peer_dev(spdev); + return err; +error: + if (err) { + if (addr && local_window && !cache) + scif_destroy_window(ep, local_window); + dev_err(scif_info.mdev.this_device, + "%s %d err %d len 0x%lx\n", + __func__, __LINE__, err, len); + } + scif_put_peer_dev(spdev); + return err; +} + +int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, + off_t roffset, int flags) +{ + int err; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI readfrom: ep %p loffset 0x%lx len 0x%lx offset 0x%lx flags 0x%x\n", + epd, loffset, len, roffset, flags); + if (scif_unaligned(loffset, roffset)) { + while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) { + err = scif_rma_copy(epd, loffset, 0x0, + SCIF_MAX_UNALIGNED_BUF_SIZE, + roffset, flags, + SCIF_REMOTE_TO_LOCAL, false); + if (err) + goto readfrom_err; + loffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + roffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + len -= SCIF_MAX_UNALIGNED_BUF_SIZE; + } + } + err = scif_rma_copy(epd, loffset, 0x0, len, + roffset, flags, SCIF_REMOTE_TO_LOCAL, true); +readfrom_err: + return err; +} +EXPORT_SYMBOL_GPL(scif_readfrom); + +int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, + off_t roffset, int flags) +{ + int err; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI writeto: ep %p loffset 0x%lx len 0x%lx roffset 0x%lx flags 0x%x\n", + epd, loffset, len, roffset, flags); + if (scif_unaligned(loffset, roffset)) { + while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) { + err = scif_rma_copy(epd, loffset, 0x0, + SCIF_MAX_UNALIGNED_BUF_SIZE, + roffset, flags, + SCIF_LOCAL_TO_REMOTE, false); + if (err) + goto writeto_err; + loffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + roffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + len -= SCIF_MAX_UNALIGNED_BUF_SIZE; + } + } + err = scif_rma_copy(epd, loffset, 0x0, len, + roffset, flags, SCIF_LOCAL_TO_REMOTE, true); +writeto_err: + return err; +} +EXPORT_SYMBOL_GPL(scif_writeto); + +int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, + off_t roffset, int flags) +{ + int err; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI vreadfrom: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n", + epd, addr, len, roffset, flags); + if (scif_unaligned((off_t __force)addr, roffset)) { + if (len > SCIF_MAX_UNALIGNED_BUF_SIZE) + flags &= ~SCIF_RMA_USECACHE; + + while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) { + err = scif_rma_copy(epd, 0, (u64)addr, + SCIF_MAX_UNALIGNED_BUF_SIZE, + roffset, flags, + SCIF_REMOTE_TO_LOCAL, false); + if (err) + goto vreadfrom_err; + addr += SCIF_MAX_UNALIGNED_BUF_SIZE; + roffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + len -= SCIF_MAX_UNALIGNED_BUF_SIZE; + } + } + err = scif_rma_copy(epd, 0, (u64)addr, len, + roffset, flags, SCIF_REMOTE_TO_LOCAL, true); +vreadfrom_err: + return err; +} +EXPORT_SYMBOL_GPL(scif_vreadfrom); + +int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, + off_t roffset, int flags) +{ + int err; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI vwriteto: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n", + epd, addr, len, roffset, flags); + if (scif_unaligned((off_t __force)addr, roffset)) { + if (len > SCIF_MAX_UNALIGNED_BUF_SIZE) + flags &= ~SCIF_RMA_USECACHE; + + while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) { + err = scif_rma_copy(epd, 0, (u64)addr, + SCIF_MAX_UNALIGNED_BUF_SIZE, + roffset, flags, + SCIF_LOCAL_TO_REMOTE, false); + if (err) + goto vwriteto_err; + addr += SCIF_MAX_UNALIGNED_BUF_SIZE; + roffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + len -= SCIF_MAX_UNALIGNED_BUF_SIZE; + } + } + err = scif_rma_copy(epd, 0, (u64)addr, len, + roffset, flags, SCIF_LOCAL_TO_REMOTE, true); +vwriteto_err: + return err; +} +EXPORT_SYMBOL_GPL(scif_vwriteto); diff --git a/kernel/drivers/misc/mic/scif/scif_epd.c b/kernel/drivers/misc/mic/scif/scif_epd.c new file mode 100644 index 000000000..00e5d6d66 --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_epd.c @@ -0,0 +1,357 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include "scif_main.h" +#include "scif_map.h" + +void scif_cleanup_ep_qp(struct scif_endpt *ep) +{ + struct scif_qp *qp = ep->qp_info.qp; + + if (qp->outbound_q.rb_base) { + scif_iounmap((void *)qp->outbound_q.rb_base, + qp->outbound_q.size, ep->remote_dev); + qp->outbound_q.rb_base = NULL; + } + if (qp->remote_qp) { + scif_iounmap((void *)qp->remote_qp, + sizeof(struct scif_qp), ep->remote_dev); + qp->remote_qp = NULL; + } + if (qp->local_qp) { + scif_unmap_single(qp->local_qp, ep->remote_dev, + sizeof(struct scif_qp)); + qp->local_qp = 0x0; + } + if (qp->local_buf) { + scif_unmap_single(qp->local_buf, ep->remote_dev, + SCIF_ENDPT_QP_SIZE); + qp->local_buf = 0; + } +} + +void scif_teardown_ep(void *endpt) +{ + struct scif_endpt *ep = endpt; + struct scif_qp *qp = ep->qp_info.qp; + + if (qp) { + spin_lock(&ep->lock); + scif_cleanup_ep_qp(ep); + spin_unlock(&ep->lock); + kfree(qp->inbound_q.rb_base); + kfree(qp); + } +} + +/* + * Enqueue the endpoint to the zombie list for cleanup. + * The endpoint should not be accessed once this API returns. + */ +void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held) +{ + if (!eplock_held) + mutex_lock(&scif_info.eplock); + spin_lock(&ep->lock); + ep->state = SCIFEP_ZOMBIE; + spin_unlock(&ep->lock); + list_add_tail(&ep->list, &scif_info.zombie); + scif_info.nr_zombies++; + if (!eplock_held) + mutex_unlock(&scif_info.eplock); + schedule_work(&scif_info.misc_work); +} + +static struct scif_endpt *scif_find_listen_ep(u16 port) +{ + struct scif_endpt *ep = NULL; + struct list_head *pos, *tmpq; + + mutex_lock(&scif_info.eplock); + list_for_each_safe(pos, tmpq, &scif_info.listen) { + ep = list_entry(pos, struct scif_endpt, list); + if (ep->port.port == port) { + mutex_unlock(&scif_info.eplock); + return ep; + } + } + mutex_unlock(&scif_info.eplock); + return NULL; +} + +void scif_cleanup_zombie_epd(void) +{ + struct list_head *pos, *tmpq; + struct scif_endpt *ep; + + mutex_lock(&scif_info.eplock); + list_for_each_safe(pos, tmpq, &scif_info.zombie) { + ep = list_entry(pos, struct scif_endpt, list); + if (scif_rma_ep_can_uninit(ep)) { + list_del(pos); + scif_info.nr_zombies--; + put_iova_domain(&ep->rma_info.iovad); + kfree(ep); + } + } + mutex_unlock(&scif_info.eplock); +} + +/** + * scif_cnctreq() - Respond to SCIF_CNCT_REQ interrupt message + * @msg: Interrupt message + * + * This message is initiated by the remote node to request a connection + * to the local node. This function looks for an end point in the + * listen state on the requested port id. + * + * If it finds a listening port it places the connect request on the + * listening end points queue and wakes up any pending accept calls. + * + * If it does not find a listening end point it sends a connection + * reject message to the remote node. + */ +void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = NULL; + struct scif_conreq *conreq; + + conreq = kmalloc(sizeof(*conreq), GFP_KERNEL); + if (!conreq) + /* Lack of resources so reject the request. */ + goto conreq_sendrej; + + ep = scif_find_listen_ep(msg->dst.port); + if (!ep) + /* Send reject due to no listening ports */ + goto conreq_sendrej_free; + else + spin_lock(&ep->lock); + + if (ep->backlog <= ep->conreqcnt) { + /* Send reject due to too many pending requests */ + spin_unlock(&ep->lock); + goto conreq_sendrej_free; + } + + conreq->msg = *msg; + list_add_tail(&conreq->list, &ep->conlist); + ep->conreqcnt++; + wake_up_interruptible(&ep->conwq); + spin_unlock(&ep->lock); + return; + +conreq_sendrej_free: + kfree(conreq); +conreq_sendrej: + msg->uop = SCIF_CNCT_REJ; + scif_nodeqp_send(&scif_dev[msg->src.node], msg); +} + +/** + * scif_cnctgnt() - Respond to SCIF_CNCT_GNT interrupt message + * @msg: Interrupt message + * + * An accept() on the remote node has occurred and sent this message + * to indicate success. Place the end point in the MAPPING state and + * save the remote nodes memory information. Then wake up the connect + * request so it can finish. + */ +void scif_cnctgnt(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + + spin_lock(&ep->lock); + if (SCIFEP_CONNECTING == ep->state) { + ep->peer.node = msg->src.node; + ep->peer.port = msg->src.port; + ep->qp_info.gnt_pld = msg->payload[1]; + ep->remote_ep = msg->payload[2]; + ep->state = SCIFEP_MAPPING; + + wake_up(&ep->conwq); + } + spin_unlock(&ep->lock); +} + +/** + * scif_cnctgnt_ack() - Respond to SCIF_CNCT_GNTACK interrupt message + * @msg: Interrupt message + * + * The remote connection request has finished mapping the local memory. + * Place the connection in the connected state and wake up the pending + * accept() call. + */ +void scif_cnctgnt_ack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + + mutex_lock(&scif_info.connlock); + spin_lock(&ep->lock); + /* New ep is now connected with all resources set. */ + ep->state = SCIFEP_CONNECTED; + list_add_tail(&ep->list, &scif_info.connected); + wake_up(&ep->conwq); + spin_unlock(&ep->lock); + mutex_unlock(&scif_info.connlock); +} + +/** + * scif_cnctgnt_nack() - Respond to SCIF_CNCT_GNTNACK interrupt message + * @msg: Interrupt message + * + * The remote connection request failed to map the local memory it was sent. + * Place the end point in the CLOSING state to indicate it and wake up + * the pending accept(); + */ +void scif_cnctgnt_nack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + + spin_lock(&ep->lock); + ep->state = SCIFEP_CLOSING; + wake_up(&ep->conwq); + spin_unlock(&ep->lock); +} + +/** + * scif_cnctrej() - Respond to SCIF_CNCT_REJ interrupt message + * @msg: Interrupt message + * + * The remote end has rejected the connection request. Set the end + * point back to the bound state and wake up the pending connect(). + */ +void scif_cnctrej(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + + spin_lock(&ep->lock); + if (SCIFEP_CONNECTING == ep->state) { + ep->state = SCIFEP_BOUND; + wake_up(&ep->conwq); + } + spin_unlock(&ep->lock); +} + +/** + * scif_discnct() - Respond to SCIF_DISCNCT interrupt message + * @msg: Interrupt message + * + * The remote node has indicated close() has been called on its end + * point. Remove the local end point from the connected list, set its + * state to disconnected and ensure accesses to the remote node are + * shutdown. + * + * When all accesses to the remote end have completed then send a + * DISCNT_ACK to indicate it can remove its resources and complete + * the close routine. + */ +void scif_discnct(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = NULL; + struct scif_endpt *tmpep; + struct list_head *pos, *tmpq; + + mutex_lock(&scif_info.connlock); + list_for_each_safe(pos, tmpq, &scif_info.connected) { + tmpep = list_entry(pos, struct scif_endpt, list); + /* + * The local ep may have sent a disconnect and and been closed + * due to a message response time out. It may have been + * allocated again and formed a new connection so we want to + * check if the remote ep matches + */ + if (((u64)tmpep == msg->payload[1]) && + ((u64)tmpep->remote_ep == msg->payload[0])) { + list_del(pos); + ep = tmpep; + spin_lock(&ep->lock); + break; + } + } + + /* + * If the terminated end is not found then this side started closing + * before the other side sent the disconnect. If so the ep will no + * longer be on the connected list. Regardless the other side + * needs to be acked to let it know close is complete. + */ + if (!ep) { + mutex_unlock(&scif_info.connlock); + goto discnct_ack; + } + + ep->state = SCIFEP_DISCONNECTED; + list_add_tail(&ep->list, &scif_info.disconnected); + + wake_up_interruptible(&ep->sendwq); + wake_up_interruptible(&ep->recvwq); + spin_unlock(&ep->lock); + mutex_unlock(&scif_info.connlock); + +discnct_ack: + msg->uop = SCIF_DISCNT_ACK; + scif_nodeqp_send(&scif_dev[msg->src.node], msg); +} + +/** + * scif_discnct_ack() - Respond to SCIF_DISCNT_ACK interrupt message + * @msg: Interrupt message + * + * Remote side has indicated it has not more references to local resources + */ +void scif_discnt_ack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + + spin_lock(&ep->lock); + ep->state = SCIFEP_DISCONNECTED; + spin_unlock(&ep->lock); + complete(&ep->discon); +} + +/** + * scif_clientsend() - Respond to SCIF_CLIENT_SEND interrupt message + * @msg: Interrupt message + * + * Remote side is confirming send or receive interrupt handling is complete. + */ +void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + + spin_lock(&ep->lock); + if (SCIFEP_CONNECTED == ep->state) + wake_up_interruptible(&ep->recvwq); + spin_unlock(&ep->lock); +} + +/** + * scif_clientrcvd() - Respond to SCIF_CLIENT_RCVD interrupt message + * @msg: Interrupt message + * + * Remote side is confirming send or receive interrupt handling is complete. + */ +void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + + spin_lock(&ep->lock); + if (SCIFEP_CONNECTED == ep->state) + wake_up_interruptible(&ep->sendwq); + spin_unlock(&ep->lock); +} diff --git a/kernel/drivers/misc/mic/scif/scif_epd.h b/kernel/drivers/misc/mic/scif/scif_epd.h new file mode 100644 index 000000000..1771d7a9b --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_epd.h @@ -0,0 +1,210 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#ifndef SCIF_EPD_H +#define SCIF_EPD_H + +#include <linux/delay.h> +#include <linux/scif.h> +#include <linux/scif_ioctl.h> + +#define SCIF_EPLOCK_HELD true + +enum scif_epd_state { + SCIFEP_UNBOUND, + SCIFEP_BOUND, + SCIFEP_LISTENING, + SCIFEP_CONNECTED, + SCIFEP_CONNECTING, + SCIFEP_MAPPING, + SCIFEP_CLOSING, + SCIFEP_CLLISTEN, + SCIFEP_DISCONNECTED, + SCIFEP_ZOMBIE +}; + +/* + * struct scif_conreq - Data structure added to the connection list. + * + * @msg: connection request message received + * @list: link to list of connection requests + */ +struct scif_conreq { + struct scifmsg msg; + struct list_head list; +}; + +/* Size of the RB for the Endpoint QP */ +#define SCIF_ENDPT_QP_SIZE 0x1000 + +/* + * scif_endpt_qp_info - SCIF endpoint queue pair + * + * @qp - Qpair for this endpoint + * @qp_offset - DMA address of the QP + * @gnt_pld - Payload in a SCIF_CNCT_GNT message containing the + * physical address of the remote_qp. + */ +struct scif_endpt_qp_info { + struct scif_qp *qp; + dma_addr_t qp_offset; + dma_addr_t gnt_pld; +}; + +/* + * struct scif_endpt - The SCIF endpoint data structure + * + * @state: end point state + * @lock: lock synchronizing access to endpoint fields like state etc + * @port: self port information + * @peer: peer port information + * @backlog: maximum pending connection requests + * @qp_info: Endpoint QP information for SCIF messaging + * @remote_dev: scifdev used by this endpt to communicate with remote node. + * @remote_ep: remote endpoint + * @conreqcnt: Keep track of number of connection requests. + * @files: Open file information used to match the id passed in with + * the flush routine. + * @conlist: list of connection requests + * @conwq: waitqueue for connection processing + * @discon: completion used during disconnection + * @sendwq: waitqueue used during sending messages + * @recvwq: waitqueue used during message receipt + * @sendlock: Synchronize ordering of messages sent + * @recvlock: Synchronize ordering of messages received + * @list: link to list of various endpoints like connected, listening etc + * @li_accept: pending ACCEPTREG + * @acceptcnt: pending ACCEPTREG cnt + * @liacceptlist: link to listen accept + * @miacceptlist: link to uaccept + * @listenep: associated listen ep + * @conn_work: Non blocking connect work + * @conn_port: Connection port + * @conn_err: Errors during connection + * @conn_async_state: Async connection + * @conn_pend_wq: Used by poll while waiting for incoming connections + * @conn_list: List of async connection requests + * @rma_info: Information for triggering SCIF RMA and DMA operations + * @mmu_list: link to list of MMU notifier cleanup work + * @anon: anonymous file for use in kernel mode scif poll + */ +struct scif_endpt { + enum scif_epd_state state; + spinlock_t lock; + struct scif_port_id port; + struct scif_port_id peer; + int backlog; + struct scif_endpt_qp_info qp_info; + struct scif_dev *remote_dev; + u64 remote_ep; + int conreqcnt; + struct files_struct *files; + struct list_head conlist; + wait_queue_head_t conwq; + struct completion discon; + wait_queue_head_t sendwq; + wait_queue_head_t recvwq; + struct mutex sendlock; + struct mutex recvlock; + struct list_head list; + struct list_head li_accept; + int acceptcnt; + struct list_head liacceptlist; + struct list_head miacceptlist; + struct scif_endpt *listenep; + struct scif_port_id conn_port; + int conn_err; + int conn_async_state; + wait_queue_head_t conn_pend_wq; + struct list_head conn_list; + struct scif_endpt_rma_info rma_info; + struct list_head mmu_list; + struct file *anon; +}; + +static inline int scifdev_alive(struct scif_endpt *ep) +{ + return _scifdev_alive(ep->remote_dev); +} + +/* + * scif_verify_epd: + * ep: SCIF endpoint + * + * Checks several generic error conditions and returns the + * appropriate error. + */ +static inline int scif_verify_epd(struct scif_endpt *ep) +{ + if (ep->state == SCIFEP_DISCONNECTED) + return -ECONNRESET; + + if (ep->state != SCIFEP_CONNECTED) + return -ENOTCONN; + + if (!scifdev_alive(ep)) + return -ENODEV; + + return 0; +} + +static inline int scif_anon_inode_getfile(scif_epd_t epd) +{ + epd->anon = anon_inode_getfile("scif", &scif_anon_fops, NULL, 0); + if (IS_ERR(epd->anon)) + return PTR_ERR(epd->anon); + return 0; +} + +static inline void scif_anon_inode_fput(scif_epd_t epd) +{ + if (epd->anon) { + fput(epd->anon); + epd->anon = NULL; + } +} + +void scif_cleanup_zombie_epd(void); +void scif_teardown_ep(void *endpt); +void scif_cleanup_ep_qp(struct scif_endpt *ep); +void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held); +void scif_get_node_info(void); +void scif_send_acks(struct scif_dev *dev); +void scif_conn_handler(struct work_struct *work); +int scif_rsrv_port(u16 port); +void scif_get_port(u16 port); +int scif_get_new_port(void); +void scif_put_port(u16 port); +int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags); +int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags); +void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_cnctgnt(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_cnctgnt_ack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_cnctgnt_nack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_cnctrej(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_discnct(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_discnt_ack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg); +int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block); +int __scif_flush(scif_epd_t epd); +int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd); +unsigned int __scif_pollfd(struct file *f, poll_table *wait, + struct scif_endpt *ep); +int __scif_pin_pages(void *addr, size_t len, int *out_prot, + int map_flags, scif_pinned_pages_t *pages); +#endif /* SCIF_EPD_H */ diff --git a/kernel/drivers/misc/mic/scif/scif_fd.c b/kernel/drivers/misc/mic/scif/scif_fd.c new file mode 100644 index 000000000..f7e826142 --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_fd.c @@ -0,0 +1,471 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include "scif_main.h" + +static int scif_fdopen(struct inode *inode, struct file *f) +{ + struct scif_endpt *priv = scif_open(); + + if (!priv) + return -ENOMEM; + f->private_data = priv; + return 0; +} + +static int scif_fdclose(struct inode *inode, struct file *f) +{ + struct scif_endpt *priv = f->private_data; + + return scif_close(priv); +} + +static int scif_fdmmap(struct file *f, struct vm_area_struct *vma) +{ + struct scif_endpt *priv = f->private_data; + + return scif_mmap(vma, priv); +} + +static unsigned int scif_fdpoll(struct file *f, poll_table *wait) +{ + struct scif_endpt *priv = f->private_data; + + return __scif_pollfd(f, wait, priv); +} + +static int scif_fdflush(struct file *f, fl_owner_t id) +{ + struct scif_endpt *ep = f->private_data; + + spin_lock(&ep->lock); + /* + * The listening endpoint stashes the open file information before + * waiting for incoming connections. The release callback would never be + * called if the application closed the endpoint, while waiting for + * incoming connections from a separate thread since the file descriptor + * reference count is bumped up in the accept IOCTL. Call the flush + * routine if the id matches the endpoint open file information so that + * the listening endpoint can be woken up and the fd released. + */ + if (ep->files == id) + __scif_flush(ep); + spin_unlock(&ep->lock); + return 0; +} + +static __always_inline void scif_err_debug(int err, const char *str) +{ + /* + * ENOTCONN is a common uninteresting error which is + * flooding debug messages to the console unnecessarily. + */ + if (err < 0 && err != -ENOTCONN) + dev_dbg(scif_info.mdev.this_device, "%s err %d\n", str, err); +} + +static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ + struct scif_endpt *priv = f->private_data; + void __user *argp = (void __user *)arg; + int err = 0; + struct scifioctl_msg request; + bool non_block = false; + + non_block = !!(f->f_flags & O_NONBLOCK); + + switch (cmd) { + case SCIF_BIND: + { + int pn; + + if (copy_from_user(&pn, argp, sizeof(pn))) + return -EFAULT; + + pn = scif_bind(priv, pn); + if (pn < 0) + return pn; + + if (copy_to_user(argp, &pn, sizeof(pn))) + return -EFAULT; + + return 0; + } + case SCIF_LISTEN: + return scif_listen(priv, arg); + case SCIF_CONNECT: + { + struct scifioctl_connect req; + struct scif_endpt *ep = (struct scif_endpt *)priv; + + if (copy_from_user(&req, argp, sizeof(req))) + return -EFAULT; + + err = __scif_connect(priv, &req.peer, non_block); + if (err < 0) + return err; + + req.self.node = ep->port.node; + req.self.port = ep->port.port; + + if (copy_to_user(argp, &req, sizeof(req))) + return -EFAULT; + + return 0; + } + /* + * Accept is done in two halves. The request ioctl does the basic + * functionality of accepting the request and returning the information + * about it including the internal ID of the end point. The register + * is done with the internal ID on a new file descriptor opened by the + * requesting process. + */ + case SCIF_ACCEPTREQ: + { + struct scifioctl_accept request; + scif_epd_t *ep = (scif_epd_t *)&request.endpt; + + if (copy_from_user(&request, argp, sizeof(request))) + return -EFAULT; + + err = scif_accept(priv, &request.peer, ep, request.flags); + if (err < 0) + return err; + + if (copy_to_user(argp, &request, sizeof(request))) { + scif_close(*ep); + return -EFAULT; + } + /* + * Add to the list of user mode eps where the second half + * of the accept is not yet completed. + */ + mutex_lock(&scif_info.eplock); + list_add_tail(&((*ep)->miacceptlist), &scif_info.uaccept); + list_add_tail(&((*ep)->liacceptlist), &priv->li_accept); + (*ep)->listenep = priv; + priv->acceptcnt++; + mutex_unlock(&scif_info.eplock); + + return 0; + } + case SCIF_ACCEPTREG: + { + struct scif_endpt *priv = f->private_data; + struct scif_endpt *newep; + struct scif_endpt *lisep; + struct scif_endpt *fep = NULL; + struct scif_endpt *tmpep; + struct list_head *pos, *tmpq; + + /* Finally replace the pointer to the accepted endpoint */ + if (copy_from_user(&newep, argp, sizeof(void *))) + return -EFAULT; + + /* Remove form the user accept queue */ + mutex_lock(&scif_info.eplock); + list_for_each_safe(pos, tmpq, &scif_info.uaccept) { + tmpep = list_entry(pos, + struct scif_endpt, miacceptlist); + if (tmpep == newep) { + list_del(pos); + fep = tmpep; + break; + } + } + + if (!fep) { + mutex_unlock(&scif_info.eplock); + return -ENOENT; + } + + lisep = newep->listenep; + list_for_each_safe(pos, tmpq, &lisep->li_accept) { + tmpep = list_entry(pos, + struct scif_endpt, liacceptlist); + if (tmpep == newep) { + list_del(pos); + lisep->acceptcnt--; + break; + } + } + + mutex_unlock(&scif_info.eplock); + + /* Free the resources automatically created from the open. */ + scif_anon_inode_fput(priv); + scif_teardown_ep(priv); + scif_add_epd_to_zombie_list(priv, !SCIF_EPLOCK_HELD); + f->private_data = newep; + return 0; + } + case SCIF_SEND: + { + struct scif_endpt *priv = f->private_data; + + if (copy_from_user(&request, argp, + sizeof(struct scifioctl_msg))) { + err = -EFAULT; + goto send_err; + } + err = scif_user_send(priv, (void __user *)request.msg, + request.len, request.flags); + if (err < 0) + goto send_err; + if (copy_to_user(& + ((struct scifioctl_msg __user *)argp)->out_len, + &err, sizeof(err))) { + err = -EFAULT; + goto send_err; + } + err = 0; +send_err: + scif_err_debug(err, "scif_send"); + return err; + } + case SCIF_RECV: + { + struct scif_endpt *priv = f->private_data; + + if (copy_from_user(&request, argp, + sizeof(struct scifioctl_msg))) { + err = -EFAULT; + goto recv_err; + } + + err = scif_user_recv(priv, (void __user *)request.msg, + request.len, request.flags); + if (err < 0) + goto recv_err; + + if (copy_to_user(& + ((struct scifioctl_msg __user *)argp)->out_len, + &err, sizeof(err))) { + err = -EFAULT; + goto recv_err; + } + err = 0; +recv_err: + scif_err_debug(err, "scif_recv"); + return err; + } + case SCIF_GET_NODEIDS: + { + struct scifioctl_node_ids node_ids; + int entries; + u16 *nodes; + void __user *unodes, *uself; + u16 self; + + if (copy_from_user(&node_ids, argp, sizeof(node_ids))) { + err = -EFAULT; + goto getnodes_err2; + } + + entries = min_t(int, scif_info.maxid, node_ids.len); + nodes = kmalloc_array(entries, sizeof(u16), GFP_KERNEL); + if (entries && !nodes) { + err = -ENOMEM; + goto getnodes_err2; + } + node_ids.len = scif_get_node_ids(nodes, entries, &self); + + unodes = (void __user *)node_ids.nodes; + if (copy_to_user(unodes, nodes, sizeof(u16) * entries)) { + err = -EFAULT; + goto getnodes_err1; + } + + uself = (void __user *)node_ids.self; + if (copy_to_user(uself, &self, sizeof(u16))) { + err = -EFAULT; + goto getnodes_err1; + } + + if (copy_to_user(argp, &node_ids, sizeof(node_ids))) { + err = -EFAULT; + goto getnodes_err1; + } +getnodes_err1: + kfree(nodes); +getnodes_err2: + return err; + } + case SCIF_REG: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_reg reg; + off_t ret; + + if (copy_from_user(®, argp, sizeof(reg))) { + err = -EFAULT; + goto reg_err; + } + if (reg.flags & SCIF_MAP_KERNEL) { + err = -EINVAL; + goto reg_err; + } + ret = scif_register(priv, (void *)reg.addr, reg.len, + reg.offset, reg.prot, reg.flags); + if (ret < 0) { + err = (int)ret; + goto reg_err; + } + + if (copy_to_user(&((struct scifioctl_reg __user *)argp) + ->out_offset, &ret, sizeof(reg.out_offset))) { + err = -EFAULT; + goto reg_err; + } + err = 0; +reg_err: + scif_err_debug(err, "scif_register"); + return err; + } + case SCIF_UNREG: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_unreg unreg; + + if (copy_from_user(&unreg, argp, sizeof(unreg))) { + err = -EFAULT; + goto unreg_err; + } + err = scif_unregister(priv, unreg.offset, unreg.len); +unreg_err: + scif_err_debug(err, "scif_unregister"); + return err; + } + case SCIF_READFROM: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_copy copy; + + if (copy_from_user(©, argp, sizeof(copy))) { + err = -EFAULT; + goto readfrom_err; + } + err = scif_readfrom(priv, copy.loffset, copy.len, copy.roffset, + copy.flags); +readfrom_err: + scif_err_debug(err, "scif_readfrom"); + return err; + } + case SCIF_WRITETO: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_copy copy; + + if (copy_from_user(©, argp, sizeof(copy))) { + err = -EFAULT; + goto writeto_err; + } + err = scif_writeto(priv, copy.loffset, copy.len, copy.roffset, + copy.flags); +writeto_err: + scif_err_debug(err, "scif_writeto"); + return err; + } + case SCIF_VREADFROM: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_copy copy; + + if (copy_from_user(©, argp, sizeof(copy))) { + err = -EFAULT; + goto vreadfrom_err; + } + err = scif_vreadfrom(priv, (void __force *)copy.addr, copy.len, + copy.roffset, copy.flags); +vreadfrom_err: + scif_err_debug(err, "scif_vreadfrom"); + return err; + } + case SCIF_VWRITETO: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_copy copy; + + if (copy_from_user(©, argp, sizeof(copy))) { + err = -EFAULT; + goto vwriteto_err; + } + err = scif_vwriteto(priv, (void __force *)copy.addr, copy.len, + copy.roffset, copy.flags); +vwriteto_err: + scif_err_debug(err, "scif_vwriteto"); + return err; + } + case SCIF_FENCE_MARK: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_fence_mark mark; + int tmp_mark = 0; + + if (copy_from_user(&mark, argp, sizeof(mark))) { + err = -EFAULT; + goto fence_mark_err; + } + err = scif_fence_mark(priv, mark.flags, &tmp_mark); + if (err) + goto fence_mark_err; + if (copy_to_user((void __user *)mark.mark, &tmp_mark, + sizeof(tmp_mark))) { + err = -EFAULT; + goto fence_mark_err; + } +fence_mark_err: + scif_err_debug(err, "scif_fence_mark"); + return err; + } + case SCIF_FENCE_WAIT: + { + struct scif_endpt *priv = f->private_data; + + err = scif_fence_wait(priv, arg); + scif_err_debug(err, "scif_fence_wait"); + return err; + } + case SCIF_FENCE_SIGNAL: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_fence_signal signal; + + if (copy_from_user(&signal, argp, sizeof(signal))) { + err = -EFAULT; + goto fence_signal_err; + } + + err = scif_fence_signal(priv, signal.loff, signal.lval, + signal.roff, signal.rval, signal.flags); +fence_signal_err: + scif_err_debug(err, "scif_fence_signal"); + return err; + } + } + return -EINVAL; +} + +const struct file_operations scif_fops = { + .open = scif_fdopen, + .release = scif_fdclose, + .unlocked_ioctl = scif_fdioctl, + .mmap = scif_fdmmap, + .poll = scif_fdpoll, + .flush = scif_fdflush, + .owner = THIS_MODULE, +}; diff --git a/kernel/drivers/misc/mic/scif/scif_fence.c b/kernel/drivers/misc/mic/scif/scif_fence.c new file mode 100644 index 000000000..7f2c96f57 --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_fence.c @@ -0,0 +1,771 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ + +#include "scif_main.h" + +/** + * scif_recv_mark: Handle SCIF_MARK request + * @msg: Interrupt message + * + * The peer has requested a mark. + */ +void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + int mark, err; + + err = _scif_fence_mark(ep, &mark); + if (err) + msg->uop = SCIF_MARK_NACK; + else + msg->uop = SCIF_MARK_ACK; + msg->payload[0] = ep->remote_ep; + msg->payload[2] = mark; + scif_nodeqp_send(ep->remote_dev, msg); +} + +/** + * scif_recv_mark_resp: Handle SCIF_MARK_(N)ACK messages. + * @msg: Interrupt message + * + * The peer has responded to a SCIF_MARK message. + */ +void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + struct scif_fence_info *fence_req = + (struct scif_fence_info *)msg->payload[1]; + + mutex_lock(&ep->rma_info.rma_lock); + if (msg->uop == SCIF_MARK_ACK) { + fence_req->state = OP_COMPLETED; + fence_req->dma_mark = (int)msg->payload[2]; + } else { + fence_req->state = OP_FAILED; + } + mutex_unlock(&ep->rma_info.rma_lock); + complete(&fence_req->comp); +} + +/** + * scif_recv_wait: Handle SCIF_WAIT request + * @msg: Interrupt message + * + * The peer has requested waiting on a fence. + */ +void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + struct scif_remote_fence_info *fence; + + /* + * Allocate structure for remote fence information and + * send a NACK if the allocation failed. The peer will + * return ENOMEM upon receiving a NACK. + */ + fence = kmalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) { + msg->payload[0] = ep->remote_ep; + msg->uop = SCIF_WAIT_NACK; + scif_nodeqp_send(ep->remote_dev, msg); + return; + } + + /* Prepare the fence request */ + memcpy(&fence->msg, msg, sizeof(struct scifmsg)); + INIT_LIST_HEAD(&fence->list); + + /* Insert to the global remote fence request list */ + mutex_lock(&scif_info.fencelock); + atomic_inc(&ep->rma_info.fence_refcount); + list_add_tail(&fence->list, &scif_info.fence); + mutex_unlock(&scif_info.fencelock); + + schedule_work(&scif_info.misc_work); +} + +/** + * scif_recv_wait_resp: Handle SCIF_WAIT_(N)ACK messages. + * @msg: Interrupt message + * + * The peer has responded to a SCIF_WAIT message. + */ +void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + struct scif_fence_info *fence_req = + (struct scif_fence_info *)msg->payload[1]; + + mutex_lock(&ep->rma_info.rma_lock); + if (msg->uop == SCIF_WAIT_ACK) + fence_req->state = OP_COMPLETED; + else + fence_req->state = OP_FAILED; + mutex_unlock(&ep->rma_info.rma_lock); + complete(&fence_req->comp); +} + +/** + * scif_recv_sig_local: Handle SCIF_SIG_LOCAL request + * @msg: Interrupt message + * + * The peer has requested a signal on a local offset. + */ +void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + int err; + + err = scif_prog_signal(ep, msg->payload[1], msg->payload[2], + SCIF_WINDOW_SELF); + if (err) + msg->uop = SCIF_SIG_NACK; + else + msg->uop = SCIF_SIG_ACK; + msg->payload[0] = ep->remote_ep; + scif_nodeqp_send(ep->remote_dev, msg); +} + +/** + * scif_recv_sig_remote: Handle SCIF_SIGNAL_REMOTE request + * @msg: Interrupt message + * + * The peer has requested a signal on a remote offset. + */ +void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + int err; + + err = scif_prog_signal(ep, msg->payload[1], msg->payload[2], + SCIF_WINDOW_PEER); + if (err) + msg->uop = SCIF_SIG_NACK; + else + msg->uop = SCIF_SIG_ACK; + msg->payload[0] = ep->remote_ep; + scif_nodeqp_send(ep->remote_dev, msg); +} + +/** + * scif_recv_sig_resp: Handle SCIF_SIG_(N)ACK messages. + * @msg: Interrupt message + * + * The peer has responded to a signal request. + */ +void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + struct scif_fence_info *fence_req = + (struct scif_fence_info *)msg->payload[3]; + + mutex_lock(&ep->rma_info.rma_lock); + if (msg->uop == SCIF_SIG_ACK) + fence_req->state = OP_COMPLETED; + else + fence_req->state = OP_FAILED; + mutex_unlock(&ep->rma_info.rma_lock); + complete(&fence_req->comp); +} + +static inline void *scif_get_local_va(off_t off, struct scif_window *window) +{ + struct page **pages = window->pinned_pages->pages; + int page_nr = (off - window->offset) >> PAGE_SHIFT; + off_t page_off = off & ~PAGE_MASK; + + return page_address(pages[page_nr]) + page_off; +} + +static void scif_prog_signal_cb(void *arg) +{ + struct scif_status *status = arg; + + dma_pool_free(status->ep->remote_dev->signal_pool, status, + status->src_dma_addr); +} + +static int _scif_prog_signal(scif_epd_t epd, dma_addr_t dst, u64 val) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct dma_chan *chan = ep->rma_info.dma_chan; + struct dma_device *ddev = chan->device; + bool x100 = !is_dma_copy_aligned(chan->device, 1, 1, 1); + struct dma_async_tx_descriptor *tx; + struct scif_status *status = NULL; + dma_addr_t src; + dma_cookie_t cookie; + int err; + + tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE); + if (!tx) { + err = -ENOMEM; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto alloc_fail; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + err = (int)cookie; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto alloc_fail; + } + dma_async_issue_pending(chan); + if (x100) { + /* + * For X100 use the status descriptor to write the value to + * the destination. + */ + tx = ddev->device_prep_dma_imm_data(chan, dst, val, 0); + } else { + status = dma_pool_alloc(ep->remote_dev->signal_pool, GFP_KERNEL, + &src); + if (!status) { + err = -ENOMEM; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto alloc_fail; + } + status->val = val; + status->src_dma_addr = src; + status->ep = ep; + src += offsetof(struct scif_status, val); + tx = ddev->device_prep_dma_memcpy(chan, dst, src, sizeof(val), + DMA_PREP_INTERRUPT); + } + if (!tx) { + err = -ENOMEM; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto dma_fail; + } + if (!x100) { + tx->callback = scif_prog_signal_cb; + tx->callback_param = status; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + err = -EIO; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto dma_fail; + } + dma_async_issue_pending(chan); + return 0; +dma_fail: + if (!x100) + dma_pool_free(ep->remote_dev->signal_pool, status, + status->src_dma_addr); +alloc_fail: + return err; +} + +/* + * scif_prog_signal: + * @epd - Endpoint Descriptor + * @offset - registered address to write @val to + * @val - Value to be written at @offset + * @type - Type of the window. + * + * Arrange to write a value to the registered offset after ensuring that the + * offset provided is indeed valid. + */ +int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val, + enum scif_window_type type) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scif_window *window = NULL; + struct scif_rma_req req; + dma_addr_t dst_dma_addr; + int err; + + mutex_lock(&ep->rma_info.rma_lock); + req.out_window = &window; + req.offset = offset; + req.nr_bytes = sizeof(u64); + req.prot = SCIF_PROT_WRITE; + req.type = SCIF_WINDOW_SINGLE; + if (type == SCIF_WINDOW_SELF) + req.head = &ep->rma_info.reg_list; + else + req.head = &ep->rma_info.remote_reg_list; + /* Does a valid window exist? */ + err = scif_query_window(&req); + if (err) { + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + goto unlock_ret; + } + + if (scif_is_mgmt_node() && scifdev_self(ep->remote_dev)) { + u64 *dst_virt; + + if (type == SCIF_WINDOW_SELF) + dst_virt = scif_get_local_va(offset, window); + else + dst_virt = + scif_get_local_va(offset, (struct scif_window *) + window->peer_window); + *dst_virt = val; + } else { + dst_dma_addr = __scif_off_to_dma_addr(window, offset); + err = _scif_prog_signal(epd, dst_dma_addr, val); + } +unlock_ret: + mutex_unlock(&ep->rma_info.rma_lock); + return err; +} + +static int _scif_fence_wait(scif_epd_t epd, int mark) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + dma_cookie_t cookie = mark & ~SCIF_REMOTE_FENCE; + int err; + + /* Wait for DMA callback in scif_fence_mark_cb(..) */ + err = wait_event_interruptible_timeout(ep->rma_info.markwq, + dma_async_is_tx_complete( + ep->rma_info.dma_chan, + cookie, NULL, NULL) == + DMA_COMPLETE, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err) + err = -ETIMEDOUT; + else if (err > 0) + err = 0; + return err; +} + +/** + * scif_rma_handle_remote_fences: + * + * This routine services remote fence requests. + */ +void scif_rma_handle_remote_fences(void) +{ + struct list_head *item, *tmp; + struct scif_remote_fence_info *fence; + struct scif_endpt *ep; + int mark, err; + + might_sleep(); + mutex_lock(&scif_info.fencelock); + list_for_each_safe(item, tmp, &scif_info.fence) { + fence = list_entry(item, struct scif_remote_fence_info, + list); + /* Remove fence from global list */ + list_del(&fence->list); + + /* Initiate the fence operation */ + ep = (struct scif_endpt *)fence->msg.payload[0]; + mark = fence->msg.payload[2]; + err = _scif_fence_wait(ep, mark); + if (err) + fence->msg.uop = SCIF_WAIT_NACK; + else + fence->msg.uop = SCIF_WAIT_ACK; + fence->msg.payload[0] = ep->remote_ep; + scif_nodeqp_send(ep->remote_dev, &fence->msg); + kfree(fence); + if (!atomic_sub_return(1, &ep->rma_info.fence_refcount)) + schedule_work(&scif_info.misc_work); + } + mutex_unlock(&scif_info.fencelock); +} + +static int _scif_send_fence(scif_epd_t epd, int uop, int mark, int *out_mark) +{ + int err; + struct scifmsg msg; + struct scif_fence_info *fence_req; + struct scif_endpt *ep = (struct scif_endpt *)epd; + + fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL); + if (!fence_req) { + err = -ENOMEM; + goto error; + } + + fence_req->state = OP_IN_PROGRESS; + init_completion(&fence_req->comp); + + msg.src = ep->port; + msg.uop = uop; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = (u64)fence_req; + if (uop == SCIF_WAIT) + msg.payload[2] = mark; + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) + err = scif_nodeqp_send(ep->remote_dev, &msg); + else + err = -ENOTCONN; + spin_unlock(&ep->lock); + if (err) + goto error_free; +retry: + /* Wait for a SCIF_WAIT_(N)ACK message */ + err = wait_for_completion_timeout(&fence_req->comp, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err && scifdev_alive(ep)) + goto retry; + if (!err) + err = -ENODEV; + if (err > 0) + err = 0; + mutex_lock(&ep->rma_info.rma_lock); + if (err < 0) { + if (fence_req->state == OP_IN_PROGRESS) + fence_req->state = OP_FAILED; + } + if (fence_req->state == OP_FAILED && !err) + err = -ENOMEM; + if (uop == SCIF_MARK && fence_req->state == OP_COMPLETED) + *out_mark = SCIF_REMOTE_FENCE | fence_req->dma_mark; + mutex_unlock(&ep->rma_info.rma_lock); +error_free: + kfree(fence_req); +error: + return err; +} + +/** + * scif_send_fence_mark: + * @epd: end point descriptor. + * @out_mark: Output DMA mark reported by peer. + * + * Send a remote fence mark request. + */ +static int scif_send_fence_mark(scif_epd_t epd, int *out_mark) +{ + return _scif_send_fence(epd, SCIF_MARK, 0, out_mark); +} + +/** + * scif_send_fence_wait: + * @epd: end point descriptor. + * @mark: DMA mark to wait for. + * + * Send a remote fence wait request. + */ +static int scif_send_fence_wait(scif_epd_t epd, int mark) +{ + return _scif_send_fence(epd, SCIF_WAIT, mark, NULL); +} + +static int _scif_send_fence_signal_wait(struct scif_endpt *ep, + struct scif_fence_info *fence_req) +{ + int err; + +retry: + /* Wait for a SCIF_SIG_(N)ACK message */ + err = wait_for_completion_timeout(&fence_req->comp, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err && scifdev_alive(ep)) + goto retry; + if (!err) + err = -ENODEV; + if (err > 0) + err = 0; + if (err < 0) { + mutex_lock(&ep->rma_info.rma_lock); + if (fence_req->state == OP_IN_PROGRESS) + fence_req->state = OP_FAILED; + mutex_unlock(&ep->rma_info.rma_lock); + } + if (fence_req->state == OP_FAILED && !err) + err = -ENXIO; + return err; +} + +/** + * scif_send_fence_signal: + * @epd - endpoint descriptor + * @loff - local offset + * @lval - local value to write to loffset + * @roff - remote offset + * @rval - remote value to write to roffset + * @flags - flags + * + * Sends a remote fence signal request + */ +static int scif_send_fence_signal(scif_epd_t epd, off_t roff, u64 rval, + off_t loff, u64 lval, int flags) +{ + int err = 0; + struct scifmsg msg; + struct scif_fence_info *fence_req; + struct scif_endpt *ep = (struct scif_endpt *)epd; + + fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL); + if (!fence_req) { + err = -ENOMEM; + goto error; + } + + fence_req->state = OP_IN_PROGRESS; + init_completion(&fence_req->comp); + msg.src = ep->port; + if (flags & SCIF_SIGNAL_LOCAL) { + msg.uop = SCIF_SIG_LOCAL; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = roff; + msg.payload[2] = rval; + msg.payload[3] = (u64)fence_req; + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) + err = scif_nodeqp_send(ep->remote_dev, &msg); + else + err = -ENOTCONN; + spin_unlock(&ep->lock); + if (err) + goto error_free; + err = _scif_send_fence_signal_wait(ep, fence_req); + if (err) + goto error_free; + } + fence_req->state = OP_IN_PROGRESS; + + if (flags & SCIF_SIGNAL_REMOTE) { + msg.uop = SCIF_SIG_REMOTE; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = loff; + msg.payload[2] = lval; + msg.payload[3] = (u64)fence_req; + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) + err = scif_nodeqp_send(ep->remote_dev, &msg); + else + err = -ENOTCONN; + spin_unlock(&ep->lock); + if (err) + goto error_free; + err = _scif_send_fence_signal_wait(ep, fence_req); + } +error_free: + kfree(fence_req); +error: + return err; +} + +static void scif_fence_mark_cb(void *arg) +{ + struct scif_endpt *ep = (struct scif_endpt *)arg; + + wake_up_interruptible(&ep->rma_info.markwq); + atomic_dec(&ep->rma_info.fence_refcount); +} + +/* + * _scif_fence_mark: + * + * @epd - endpoint descriptor + * Set up a mark for this endpoint and return the value of the mark. + */ +int _scif_fence_mark(scif_epd_t epd, int *mark) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct dma_chan *chan = ep->rma_info.dma_chan; + struct dma_device *ddev = chan->device; + struct dma_async_tx_descriptor *tx; + dma_cookie_t cookie; + int err; + + tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE); + if (!tx) { + err = -ENOMEM; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + err = (int)cookie; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + dma_async_issue_pending(chan); + tx = ddev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT); + if (!tx) { + err = -ENOMEM; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + tx->callback = scif_fence_mark_cb; + tx->callback_param = ep; + *mark = cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + err = (int)cookie; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + atomic_inc(&ep->rma_info.fence_refcount); + dma_async_issue_pending(chan); + return 0; +} + +#define SCIF_LOOPB_MAGIC_MARK 0xdead + +int scif_fence_mark(scif_epd_t epd, int flags, int *mark) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err = 0; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x\n", + ep, flags, *mark); + err = scif_verify_epd(ep); + if (err) + return err; + + /* Invalid flags? */ + if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)) + return -EINVAL; + + /* At least one of init self or peer RMA should be set */ + if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER))) + return -EINVAL; + + /* Exactly one of init self or peer RMA should be set but not both */ + if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER)) + return -EINVAL; + + /* + * Management node loopback does not need to use DMA. + * Return a valid mark to be symmetric. + */ + if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) { + *mark = SCIF_LOOPB_MAGIC_MARK; + return 0; + } + + if (flags & SCIF_FENCE_INIT_SELF) + err = _scif_fence_mark(epd, mark); + else + err = scif_send_fence_mark(ep, mark); + + if (err) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x err %d\n", + ep, flags, *mark, err); + return err; +} +EXPORT_SYMBOL_GPL(scif_fence_mark); + +int scif_fence_wait(scif_epd_t epd, int mark) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err = 0; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI fence_wait: ep %p mark 0x%x\n", + ep, mark); + err = scif_verify_epd(ep); + if (err) + return err; + /* + * Management node loopback does not need to use DMA. + * The only valid mark provided is 0 so simply + * return success if the mark is valid. + */ + if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) { + if (mark == SCIF_LOOPB_MAGIC_MARK) + return 0; + else + return -EINVAL; + } + if (mark & SCIF_REMOTE_FENCE) + err = scif_send_fence_wait(epd, mark); + else + err = _scif_fence_wait(epd, mark); + if (err < 0) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + return err; +} +EXPORT_SYMBOL_GPL(scif_fence_wait); + +int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, + off_t roff, u64 rval, int flags) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err = 0; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI fence_signal: ep %p loff 0x%lx lval 0x%llx roff 0x%lx rval 0x%llx flags 0x%x\n", + ep, loff, lval, roff, rval, flags); + err = scif_verify_epd(ep); + if (err) + return err; + + /* Invalid flags? */ + if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER | + SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE)) + return -EINVAL; + + /* At least one of init self or peer RMA should be set */ + if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER))) + return -EINVAL; + + /* Exactly one of init self or peer RMA should be set but not both */ + if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER)) + return -EINVAL; + + /* At least one of SCIF_SIGNAL_LOCAL or SCIF_SIGNAL_REMOTE required */ + if (!(flags & (SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE))) + return -EINVAL; + + /* Only Dword offsets allowed */ + if ((flags & SCIF_SIGNAL_LOCAL) && (loff & (sizeof(u32) - 1))) + return -EINVAL; + + /* Only Dword aligned offsets allowed */ + if ((flags & SCIF_SIGNAL_REMOTE) && (roff & (sizeof(u32) - 1))) + return -EINVAL; + + if (flags & SCIF_FENCE_INIT_PEER) { + err = scif_send_fence_signal(epd, roff, rval, loff, + lval, flags); + } else { + /* Local Signal in Local RAS */ + if (flags & SCIF_SIGNAL_LOCAL) { + err = scif_prog_signal(epd, loff, lval, + SCIF_WINDOW_SELF); + if (err) + goto error_ret; + } + + /* Signal in Remote RAS */ + if (flags & SCIF_SIGNAL_REMOTE) + err = scif_prog_signal(epd, roff, + rval, SCIF_WINDOW_PEER); + } +error_ret: + if (err) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + return err; +} +EXPORT_SYMBOL_GPL(scif_fence_signal); diff --git a/kernel/drivers/misc/mic/scif/scif_main.c b/kernel/drivers/misc/mic/scif/scif_main.c new file mode 100644 index 000000000..36d847af1 --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_main.c @@ -0,0 +1,359 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include <linux/module.h> +#include <linux/idr.h> + +#include <linux/mic_common.h> +#include "../common/mic_dev.h" +#include "../bus/scif_bus.h" +#include "scif_peer_bus.h" +#include "scif_main.h" +#include "scif_map.h" + +struct scif_info scif_info = { + .mdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "scif", + .fops = &scif_fops, + } +}; + +struct scif_dev *scif_dev; +struct kmem_cache *unaligned_cache; +static atomic_t g_loopb_cnt; + +/* Runs in the context of intr_wq */ +static void scif_intr_bh_handler(struct work_struct *work) +{ + struct scif_dev *scifdev = + container_of(work, struct scif_dev, intr_bh); + + if (scifdev_self(scifdev)) + scif_loopb_msg_handler(scifdev, scifdev->qpairs); + else + scif_nodeqp_intrhandler(scifdev, scifdev->qpairs); +} + +int scif_setup_intr_wq(struct scif_dev *scifdev) +{ + if (!scifdev->intr_wq) { + snprintf(scifdev->intr_wqname, sizeof(scifdev->intr_wqname), + "SCIF INTR %d", scifdev->node); + scifdev->intr_wq = + alloc_ordered_workqueue(scifdev->intr_wqname, 0); + if (!scifdev->intr_wq) + return -ENOMEM; + INIT_WORK(&scifdev->intr_bh, scif_intr_bh_handler); + } + return 0; +} + +void scif_destroy_intr_wq(struct scif_dev *scifdev) +{ + if (scifdev->intr_wq) { + destroy_workqueue(scifdev->intr_wq); + scifdev->intr_wq = NULL; + } +} + +irqreturn_t scif_intr_handler(int irq, void *data) +{ + struct scif_dev *scifdev = data; + struct scif_hw_dev *sdev = scifdev->sdev; + + sdev->hw_ops->ack_interrupt(sdev, scifdev->db); + queue_work(scifdev->intr_wq, &scifdev->intr_bh); + return IRQ_HANDLED; +} + +static void scif_qp_setup_handler(struct work_struct *work) +{ + struct scif_dev *scifdev = container_of(work, struct scif_dev, + qp_dwork.work); + struct scif_hw_dev *sdev = scifdev->sdev; + dma_addr_t da = 0; + int err; + + if (scif_is_mgmt_node()) { + struct mic_bootparam *bp = sdev->dp; + + da = bp->scif_card_dma_addr; + scifdev->rdb = bp->h2c_scif_db; + } else { + struct mic_bootparam __iomem *bp = sdev->rdp; + + da = readq(&bp->scif_host_dma_addr); + scifdev->rdb = ioread8(&bp->c2h_scif_db); + } + if (da) { + err = scif_qp_response(da, scifdev); + if (err) + dev_err(&scifdev->sdev->dev, + "scif_qp_response err %d\n", err); + } else { + schedule_delayed_work(&scifdev->qp_dwork, + msecs_to_jiffies(1000)); + } +} + +static int scif_setup_scifdev(void) +{ + /* We support a maximum of 129 SCIF nodes including the mgmt node */ +#define MAX_SCIF_NODES 129 + int i; + u8 num_nodes = MAX_SCIF_NODES; + + scif_dev = kcalloc(num_nodes, sizeof(*scif_dev), GFP_KERNEL); + if (!scif_dev) + return -ENOMEM; + for (i = 0; i < num_nodes; i++) { + struct scif_dev *scifdev = &scif_dev[i]; + + scifdev->node = i; + scifdev->exit = OP_IDLE; + init_waitqueue_head(&scifdev->disconn_wq); + mutex_init(&scifdev->lock); + INIT_WORK(&scifdev->peer_add_work, scif_add_peer_device); + INIT_DELAYED_WORK(&scifdev->p2p_dwork, + scif_poll_qp_state); + INIT_DELAYED_WORK(&scifdev->qp_dwork, + scif_qp_setup_handler); + INIT_LIST_HEAD(&scifdev->p2p); + RCU_INIT_POINTER(scifdev->spdev, NULL); + } + return 0; +} + +static void scif_destroy_scifdev(void) +{ + kfree(scif_dev); +} + +static int scif_probe(struct scif_hw_dev *sdev) +{ + struct scif_dev *scifdev = &scif_dev[sdev->dnode]; + int rc; + + dev_set_drvdata(&sdev->dev, sdev); + scifdev->sdev = sdev; + + if (1 == atomic_add_return(1, &g_loopb_cnt)) { + struct scif_dev *loopb_dev = &scif_dev[sdev->snode]; + + loopb_dev->sdev = sdev; + rc = scif_setup_loopback_qp(loopb_dev); + if (rc) + goto exit; + } + + rc = scif_setup_intr_wq(scifdev); + if (rc) + goto destroy_loopb; + rc = scif_setup_qp(scifdev); + if (rc) + goto destroy_intr; + scifdev->db = sdev->hw_ops->next_db(sdev); + scifdev->cookie = sdev->hw_ops->request_irq(sdev, scif_intr_handler, + "SCIF_INTR", scifdev, + scifdev->db); + if (IS_ERR(scifdev->cookie)) { + rc = PTR_ERR(scifdev->cookie); + goto free_qp; + } + if (scif_is_mgmt_node()) { + struct mic_bootparam *bp = sdev->dp; + + bp->c2h_scif_db = scifdev->db; + bp->scif_host_dma_addr = scifdev->qp_dma_addr; + } else { + struct mic_bootparam __iomem *bp = sdev->rdp; + + iowrite8(scifdev->db, &bp->h2c_scif_db); + writeq(scifdev->qp_dma_addr, &bp->scif_card_dma_addr); + } + schedule_delayed_work(&scifdev->qp_dwork, + msecs_to_jiffies(1000)); + return rc; +free_qp: + scif_free_qp(scifdev); +destroy_intr: + scif_destroy_intr_wq(scifdev); +destroy_loopb: + if (atomic_dec_and_test(&g_loopb_cnt)) + scif_destroy_loopback_qp(&scif_dev[sdev->snode]); +exit: + return rc; +} + +void scif_stop(struct scif_dev *scifdev) +{ + struct scif_dev *dev; + int i; + + for (i = scif_info.maxid; i >= 0; i--) { + dev = &scif_dev[i]; + if (scifdev_self(dev)) + continue; + scif_handle_remove_node(i); + } +} + +static void scif_remove(struct scif_hw_dev *sdev) +{ + struct scif_dev *scifdev = &scif_dev[sdev->dnode]; + + if (scif_is_mgmt_node()) { + struct mic_bootparam *bp = sdev->dp; + + bp->c2h_scif_db = -1; + bp->scif_host_dma_addr = 0x0; + } else { + struct mic_bootparam __iomem *bp = sdev->rdp; + + iowrite8(-1, &bp->h2c_scif_db); + writeq(0x0, &bp->scif_card_dma_addr); + } + if (scif_is_mgmt_node()) { + scif_disconnect_node(scifdev->node, true); + } else { + scif_info.card_initiated_exit = true; + scif_stop(scifdev); + } + if (atomic_dec_and_test(&g_loopb_cnt)) + scif_destroy_loopback_qp(&scif_dev[sdev->snode]); + if (scifdev->cookie) { + sdev->hw_ops->free_irq(sdev, scifdev->cookie, scifdev); + scifdev->cookie = NULL; + } + scif_destroy_intr_wq(scifdev); + cancel_delayed_work(&scifdev->qp_dwork); + scif_free_qp(scifdev); + scifdev->rdb = -1; + scifdev->sdev = NULL; +} + +static struct scif_hw_dev_id id_table[] = { + { MIC_SCIF_DEV, SCIF_DEV_ANY_ID }, + { 0 }, +}; + +static struct scif_driver scif_driver = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = scif_probe, + .remove = scif_remove, +}; + +static int _scif_init(void) +{ + int rc; + + mutex_init(&scif_info.eplock); + spin_lock_init(&scif_info.rmalock); + spin_lock_init(&scif_info.nb_connect_lock); + spin_lock_init(&scif_info.port_lock); + mutex_init(&scif_info.conflock); + mutex_init(&scif_info.connlock); + mutex_init(&scif_info.fencelock); + INIT_LIST_HEAD(&scif_info.uaccept); + INIT_LIST_HEAD(&scif_info.listen); + INIT_LIST_HEAD(&scif_info.zombie); + INIT_LIST_HEAD(&scif_info.connected); + INIT_LIST_HEAD(&scif_info.disconnected); + INIT_LIST_HEAD(&scif_info.rma); + INIT_LIST_HEAD(&scif_info.rma_tc); + INIT_LIST_HEAD(&scif_info.mmu_notif_cleanup); + INIT_LIST_HEAD(&scif_info.fence); + INIT_LIST_HEAD(&scif_info.nb_connect_list); + init_waitqueue_head(&scif_info.exitwq); + scif_info.rma_tc_limit = SCIF_RMA_TEMP_CACHE_LIMIT; + scif_info.en_msg_log = 0; + scif_info.p2p_enable = 1; + rc = scif_setup_scifdev(); + if (rc) + goto error; + unaligned_cache = kmem_cache_create("Unaligned_DMA", + SCIF_KMEM_UNALIGNED_BUF_SIZE, + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!unaligned_cache) { + rc = -ENOMEM; + goto free_sdev; + } + INIT_WORK(&scif_info.misc_work, scif_misc_handler); + INIT_WORK(&scif_info.mmu_notif_work, scif_mmu_notif_handler); + INIT_WORK(&scif_info.conn_work, scif_conn_handler); + idr_init(&scif_ports); + return 0; +free_sdev: + scif_destroy_scifdev(); +error: + return rc; +} + +static void _scif_exit(void) +{ + idr_destroy(&scif_ports); + kmem_cache_destroy(unaligned_cache); + scif_destroy_scifdev(); +} + +static int __init scif_init(void) +{ + struct miscdevice *mdev = &scif_info.mdev; + int rc; + + _scif_init(); + iova_cache_get(); + rc = scif_peer_bus_init(); + if (rc) + goto exit; + rc = scif_register_driver(&scif_driver); + if (rc) + goto peer_bus_exit; + rc = misc_register(mdev); + if (rc) + goto unreg_scif; + scif_init_debugfs(); + return 0; +unreg_scif: + scif_unregister_driver(&scif_driver); +peer_bus_exit: + scif_peer_bus_exit(); +exit: + _scif_exit(); + return rc; +} + +static void __exit scif_exit(void) +{ + scif_exit_debugfs(); + misc_deregister(&scif_info.mdev); + scif_unregister_driver(&scif_driver); + scif_peer_bus_exit(); + iova_cache_put(); + _scif_exit(); +} + +module_init(scif_init); +module_exit(scif_exit); + +MODULE_DEVICE_TABLE(scif, id_table); +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) SCIF driver"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/mic/scif/scif_main.h b/kernel/drivers/misc/mic/scif/scif_main.h new file mode 100644 index 000000000..a08f0b600 --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_main.h @@ -0,0 +1,283 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#ifndef SCIF_MAIN_H +#define SCIF_MAIN_H + +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/miscdevice.h> +#include <linux/dmaengine.h> +#include <linux/iova.h> +#include <linux/anon_inodes.h> +#include <linux/file.h> +#include <linux/vmalloc.h> +#include <linux/scif.h> +#include "../common/mic_dev.h" + +#define SCIF_MGMT_NODE 0 +#define SCIF_DEFAULT_WATCHDOG_TO 30 +#define SCIF_NODE_ACCEPT_TIMEOUT (3 * HZ) +#define SCIF_NODE_ALIVE_TIMEOUT (SCIF_DEFAULT_WATCHDOG_TO * HZ) +#define SCIF_RMA_TEMP_CACHE_LIMIT 0x20000 + +/* + * Generic state used for certain node QP message exchanges + * like Unregister, Alloc etc. + */ +enum scif_msg_state { + OP_IDLE = 1, + OP_IN_PROGRESS, + OP_COMPLETED, + OP_FAILED +}; + +/* + * struct scif_info - Global SCIF information + * + * @nodeid: Node ID this node is to others + * @maxid: Max known node ID + * @total: Total number of SCIF nodes + * @nr_zombies: number of zombie endpoints + * @eplock: Lock to synchronize listening, zombie endpoint lists + * @connlock: Lock to synchronize connected and disconnected lists + * @nb_connect_lock: Synchronize non blocking connect operations + * @port_lock: Synchronize access to SCIF ports + * @uaccept: List of user acceptreq waiting for acceptreg + * @listen: List of listening end points + * @zombie: List of zombie end points with pending RMA's + * @connected: List of end points in connected state + * @disconnected: List of end points in disconnected state + * @nb_connect_list: List for non blocking connections + * @misc_work: miscellaneous SCIF tasks + * @conflock: Lock to synchronize SCIF node configuration changes + * @en_msg_log: Enable debug message logging + * @p2p_enable: Enable P2P SCIF network + * @mdev: The MISC device + * @conn_work: Work for workqueue handling all connections + * @exitwq: Wait queue for waiting for an EXIT node QP message response + * @loopb_dev: Dummy SCIF device used for loopback + * @loopb_wq: Workqueue used for handling loopback messages + * @loopb_wqname[16]: Name of loopback workqueue + * @loopb_work: Used for submitting work to loopb_wq + * @loopb_recv_q: List of messages received on the loopb_wq + * @card_initiated_exit: set when the card has initiated the exit + * @rmalock: Synchronize access to RMA operations + * @fencelock: Synchronize access to list of remote fences requested. + * @rma: List of temporary registered windows to be destroyed. + * @rma_tc: List of temporary registered & cached Windows to be destroyed + * @fence: List of remote fence requests + * @mmu_notif_work: Work for registration caching MMU notifier workqueue + * @mmu_notif_cleanup: List of temporary cached windows for reg cache + * @rma_tc_limit: RMA temporary cache limit + */ +struct scif_info { + u8 nodeid; + u8 maxid; + u8 total; + u32 nr_zombies; + struct mutex eplock; + struct mutex connlock; + spinlock_t nb_connect_lock; + spinlock_t port_lock; + struct list_head uaccept; + struct list_head listen; + struct list_head zombie; + struct list_head connected; + struct list_head disconnected; + struct list_head nb_connect_list; + struct work_struct misc_work; + struct mutex conflock; + u8 en_msg_log; + u8 p2p_enable; + struct miscdevice mdev; + struct work_struct conn_work; + wait_queue_head_t exitwq; + struct scif_dev *loopb_dev; + struct workqueue_struct *loopb_wq; + char loopb_wqname[16]; + struct work_struct loopb_work; + struct list_head loopb_recv_q; + bool card_initiated_exit; + spinlock_t rmalock; + struct mutex fencelock; + struct list_head rma; + struct list_head rma_tc; + struct list_head fence; + struct work_struct mmu_notif_work; + struct list_head mmu_notif_cleanup; + unsigned long rma_tc_limit; +}; + +/* + * struct scif_p2p_info - SCIF mapping information used for P2P + * + * @ppi_peer_id - SCIF peer node id + * @ppi_sg - Scatter list for bar information (One for mmio and one for aper) + * @sg_nentries - Number of entries in the scatterlist + * @ppi_da: DMA address for MMIO and APER bars + * @ppi_len: Length of MMIO and APER bars + * @ppi_list: Link in list of mapping information + */ +struct scif_p2p_info { + u8 ppi_peer_id; + struct scatterlist *ppi_sg[2]; + u64 sg_nentries[2]; + dma_addr_t ppi_da[2]; + u64 ppi_len[2]; +#define SCIF_PPI_MMIO 0 +#define SCIF_PPI_APER 1 + struct list_head ppi_list; +}; + +/* + * struct scif_dev - SCIF remote device specific fields + * + * @node: Node id + * @p2p: List of P2P mapping information + * @qpairs: The node queue pair for exchanging control messages + * @intr_wq: Workqueue for handling Node QP messages + * @intr_wqname: Name of node QP workqueue for handling interrupts + * @intr_bh: Used for submitting work to intr_wq + * @lock: Lock used for synchronizing access to the scif device + * @sdev: SCIF hardware device on the SCIF hardware bus + * @db: doorbell the peer will trigger to generate an interrupt on self + * @rdb: Doorbell to trigger on the peer to generate an interrupt on the peer + * @cookie: Cookie received while registering the interrupt handler + * @peer_add_work: Work for handling device_add for peer devices + * @p2p_dwork: Delayed work to enable polling for P2P state + * @qp_dwork: Delayed work for enabling polling for remote QP information + * @p2p_retry: Number of times to retry polling of P2P state + * @base_addr: P2P aperture bar base address + * @mic_mw mmio: The peer MMIO information used for P2P + * @spdev: SCIF peer device on the SCIF peer bus + * @node_remove_ack_pending: True if a node_remove_ack is pending + * @exit_ack_pending: true if an exit_ack is pending + * @disconn_wq: Used while waiting for a node remove response + * @disconn_rescnt: Keeps track of number of node remove requests sent + * @exit: Status of exit message + * @qp_dma_addr: Queue pair DMA address passed to the peer + * @dma_ch_idx: Round robin index for DMA channels + * @signal_pool: DMA pool used for scheduling scif_fence_signal DMA's +*/ +struct scif_dev { + u8 node; + struct list_head p2p; + struct scif_qp *qpairs; + struct workqueue_struct *intr_wq; + char intr_wqname[16]; + struct work_struct intr_bh; + struct mutex lock; + struct scif_hw_dev *sdev; + int db; + int rdb; + struct mic_irq *cookie; + struct work_struct peer_add_work; + struct delayed_work p2p_dwork; + struct delayed_work qp_dwork; + int p2p_retry; + dma_addr_t base_addr; + struct mic_mw mmio; + struct scif_peer_dev __rcu *spdev; + bool node_remove_ack_pending; + bool exit_ack_pending; + wait_queue_head_t disconn_wq; + atomic_t disconn_rescnt; + enum scif_msg_state exit; + dma_addr_t qp_dma_addr; + int dma_ch_idx; + struct dma_pool *signal_pool; +}; + +extern bool scif_reg_cache_enable; +extern bool scif_ulimit_check; +extern struct scif_info scif_info; +extern struct idr scif_ports; +extern struct bus_type scif_peer_bus; +extern struct scif_dev *scif_dev; +extern const struct file_operations scif_fops; +extern const struct file_operations scif_anon_fops; + +/* Size of the RB for the Node QP */ +#define SCIF_NODE_QP_SIZE 0x10000 + +#include "scif_nodeqp.h" +#include "scif_rma.h" +#include "scif_rma_list.h" + +/* + * scifdev_self: + * @dev: The remote SCIF Device + * + * Returns true if the SCIF Device passed is the self aka Loopback SCIF device. + */ +static inline int scifdev_self(struct scif_dev *dev) +{ + return dev->node == scif_info.nodeid; +} + +static inline bool scif_is_mgmt_node(void) +{ + return !scif_info.nodeid; +} + +/* + * scifdev_is_p2p: + * @dev: The remote SCIF Device + * + * Returns true if the SCIF Device is a MIC Peer to Peer SCIF device. + */ +static inline bool scifdev_is_p2p(struct scif_dev *dev) +{ + if (scif_is_mgmt_node()) + return false; + else + return dev != &scif_dev[SCIF_MGMT_NODE] && + !scifdev_self(dev); +} + +/* + * scifdev_alive: + * @scifdev: The remote SCIF Device + * + * Returns true if the remote SCIF Device is running or sleeping for + * this endpoint. + */ +static inline int _scifdev_alive(struct scif_dev *scifdev) +{ + struct scif_peer_dev *spdev; + + rcu_read_lock(); + spdev = rcu_dereference(scifdev->spdev); + rcu_read_unlock(); + return !!spdev; +} + +#include "scif_epd.h" + +void __init scif_init_debugfs(void); +void scif_exit_debugfs(void); +int scif_setup_intr_wq(struct scif_dev *scifdev); +void scif_destroy_intr_wq(struct scif_dev *scifdev); +void scif_cleanup_scifdev(struct scif_dev *dev); +void scif_handle_remove_node(int node); +void scif_disconnect_node(u32 node_id, bool mgmt_initiated); +void scif_free_qp(struct scif_dev *dev); +void scif_misc_handler(struct work_struct *work); +void scif_stop(struct scif_dev *scifdev); +irqreturn_t scif_intr_handler(int irq, void *data); +#endif /* SCIF_MAIN_H */ diff --git a/kernel/drivers/misc/mic/scif/scif_map.h b/kernel/drivers/misc/mic/scif/scif_map.h new file mode 100644 index 000000000..3e86360ba --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_map.h @@ -0,0 +1,136 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#ifndef SCIF_MAP_H +#define SCIF_MAP_H + +#include "../bus/scif_bus.h" + +static __always_inline void * +scif_alloc_coherent(dma_addr_t *dma_handle, + struct scif_dev *scifdev, size_t size, + gfp_t gfp) +{ + void *va; + + if (scifdev_self(scifdev)) { + va = kmalloc(size, gfp); + if (va) + *dma_handle = virt_to_phys(va); + } else { + va = dma_alloc_coherent(&scifdev->sdev->dev, + size, dma_handle, gfp); + if (va && scifdev_is_p2p(scifdev)) + *dma_handle = *dma_handle + scifdev->base_addr; + } + return va; +} + +static __always_inline void +scif_free_coherent(void *va, dma_addr_t local, + struct scif_dev *scifdev, size_t size) +{ + if (scifdev_self(scifdev)) { + kfree(va); + } else { + if (scifdev_is_p2p(scifdev) && local > scifdev->base_addr) + local = local - scifdev->base_addr; + dma_free_coherent(&scifdev->sdev->dev, + size, va, local); + } +} + +static __always_inline int +scif_map_single(dma_addr_t *dma_handle, + void *local, struct scif_dev *scifdev, size_t size) +{ + int err = 0; + + if (scifdev_self(scifdev)) { + *dma_handle = virt_to_phys((local)); + } else { + *dma_handle = dma_map_single(&scifdev->sdev->dev, + local, size, DMA_BIDIRECTIONAL); + if (dma_mapping_error(&scifdev->sdev->dev, *dma_handle)) + err = -ENOMEM; + else if (scifdev_is_p2p(scifdev)) + *dma_handle = *dma_handle + scifdev->base_addr; + } + if (err) + *dma_handle = 0; + return err; +} + +static __always_inline void +scif_unmap_single(dma_addr_t local, struct scif_dev *scifdev, + size_t size) +{ + if (!scifdev_self(scifdev)) { + if (scifdev_is_p2p(scifdev)) + local = local - scifdev->base_addr; + dma_unmap_single(&scifdev->sdev->dev, local, + size, DMA_BIDIRECTIONAL); + } +} + +static __always_inline void * +scif_ioremap(dma_addr_t phys, size_t size, struct scif_dev *scifdev) +{ + void *out_virt; + struct scif_hw_dev *sdev = scifdev->sdev; + + if (scifdev_self(scifdev)) + out_virt = phys_to_virt(phys); + else + out_virt = (void __force *) + sdev->hw_ops->ioremap(sdev, phys, size); + return out_virt; +} + +static __always_inline void +scif_iounmap(void *virt, size_t len, struct scif_dev *scifdev) +{ + if (!scifdev_self(scifdev)) { + struct scif_hw_dev *sdev = scifdev->sdev; + + sdev->hw_ops->iounmap(sdev, (void __force __iomem *)virt); + } +} + +static __always_inline int +scif_map_page(dma_addr_t *dma_handle, struct page *page, + struct scif_dev *scifdev) +{ + int err = 0; + + if (scifdev_self(scifdev)) { + *dma_handle = page_to_phys(page); + } else { + struct scif_hw_dev *sdev = scifdev->sdev; + *dma_handle = dma_map_page(&sdev->dev, + page, 0x0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&sdev->dev, *dma_handle)) + err = -ENOMEM; + else if (scifdev_is_p2p(scifdev)) + *dma_handle = *dma_handle + scifdev->base_addr; + } + if (err) + *dma_handle = 0; + return err; +} +#endif /* SCIF_MAP_H */ diff --git a/kernel/drivers/misc/mic/scif/scif_mmap.c b/kernel/drivers/misc/mic/scif/scif_mmap.c new file mode 100644 index 000000000..49cb8f7b4 --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_mmap.c @@ -0,0 +1,699 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include "scif_main.h" + +/* + * struct scif_vma_info - Information about a remote memory mapping + * created via scif_mmap(..) + * @vma: VM area struct + * @list: link to list of active vmas + */ +struct scif_vma_info { + struct vm_area_struct *vma; + struct list_head list; +}; + +void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_rma_req req; + struct scif_window *window = NULL; + struct scif_window *recv_window = + (struct scif_window *)msg->payload[0]; + struct scif_endpt *ep; + + ep = (struct scif_endpt *)recv_window->ep; + req.out_window = &window; + req.offset = recv_window->offset; + req.prot = recv_window->prot; + req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT; + req.type = SCIF_WINDOW_FULL; + req.head = &ep->rma_info.reg_list; + msg->payload[0] = ep->remote_ep; + + mutex_lock(&ep->rma_info.rma_lock); + /* Does a valid window exist? */ + if (scif_query_window(&req)) { + dev_err(&scifdev->sdev->dev, + "%s %d -ENXIO\n", __func__, __LINE__); + msg->uop = SCIF_UNREGISTER_ACK; + goto error; + } + + scif_put_window(window, window->nr_pages); + + if (!window->ref_count) { + atomic_inc(&ep->rma_info.tw_refcount); + ep->rma_info.async_list_del = 1; + list_del_init(&window->list); + scif_free_window_offset(ep, window, window->offset); + } +error: + mutex_unlock(&ep->rma_info.rma_lock); + if (window && !window->ref_count) + scif_queue_for_cleanup(window, &scif_info.rma); +} + +/* + * Remove valid remote memory mappings created via scif_mmap(..) from the + * process address space since the remote node is lost + */ +static void __scif_zap_mmaps(struct scif_endpt *ep) +{ + struct list_head *item; + struct scif_vma_info *info; + struct vm_area_struct *vma; + unsigned long size; + + spin_lock(&ep->lock); + list_for_each(item, &ep->rma_info.vma_list) { + info = list_entry(item, struct scif_vma_info, list); + vma = info->vma; + size = vma->vm_end - vma->vm_start; + zap_vma_ptes(vma, vma->vm_start, size); + dev_dbg(scif_info.mdev.this_device, + "%s ep %p zap vma %p size 0x%lx\n", + __func__, ep, info->vma, size); + } + spin_unlock(&ep->lock); +} + +/* + * Traverse the list of endpoints for a particular remote node and + * zap valid remote memory mappings since the remote node is lost + */ +static void _scif_zap_mmaps(int node, struct list_head *head) +{ + struct scif_endpt *ep; + struct list_head *item; + + mutex_lock(&scif_info.connlock); + list_for_each(item, head) { + ep = list_entry(item, struct scif_endpt, list); + if (ep->remote_dev->node == node) + __scif_zap_mmaps(ep); + } + mutex_unlock(&scif_info.connlock); +} + +/* + * Wrapper for removing remote memory mappings for a particular node. This API + * is called by peer nodes as part of handling a lost node. + */ +void scif_zap_mmaps(int node) +{ + _scif_zap_mmaps(node, &scif_info.connected); + _scif_zap_mmaps(node, &scif_info.disconnected); +} + +/* + * This API is only called while handling a lost node: + * a) Remote node is dead. + * b) Remote memory mappings have been zapped + * So we can traverse the remote_reg_list without any locks. Since + * the window has not yet been unregistered we can drop the ref count + * and queue it to the cleanup thread. + */ +static void __scif_cleanup_rma_for_zombies(struct scif_endpt *ep) +{ + struct list_head *pos, *tmp; + struct scif_window *window; + + list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) { + window = list_entry(pos, struct scif_window, list); + if (window->ref_count) + scif_put_window(window, window->nr_pages); + else + dev_err(scif_info.mdev.this_device, + "%s %d unexpected\n", + __func__, __LINE__); + if (!window->ref_count) { + atomic_inc(&ep->rma_info.tw_refcount); + list_del_init(&window->list); + scif_queue_for_cleanup(window, &scif_info.rma); + } + } +} + +/* Cleanup remote registration lists for zombie endpoints */ +void scif_cleanup_rma_for_zombies(int node) +{ + struct scif_endpt *ep; + struct list_head *item; + + mutex_lock(&scif_info.eplock); + list_for_each(item, &scif_info.zombie) { + ep = list_entry(item, struct scif_endpt, list); + if (ep->remote_dev && ep->remote_dev->node == node) + __scif_cleanup_rma_for_zombies(ep); + } + mutex_unlock(&scif_info.eplock); + flush_work(&scif_info.misc_work); +} + +/* Insert the VMA into the per endpoint VMA list */ +static int scif_insert_vma(struct scif_endpt *ep, struct vm_area_struct *vma) +{ + struct scif_vma_info *info; + int err = 0; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + err = -ENOMEM; + goto done; + } + info->vma = vma; + spin_lock(&ep->lock); + list_add_tail(&info->list, &ep->rma_info.vma_list); + spin_unlock(&ep->lock); +done: + return err; +} + +/* Delete the VMA from the per endpoint VMA list */ +static void scif_delete_vma(struct scif_endpt *ep, struct vm_area_struct *vma) +{ + struct list_head *item; + struct scif_vma_info *info; + + spin_lock(&ep->lock); + list_for_each(item, &ep->rma_info.vma_list) { + info = list_entry(item, struct scif_vma_info, list); + if (info->vma == vma) { + list_del(&info->list); + kfree(info); + break; + } + } + spin_unlock(&ep->lock); +} + +static phys_addr_t scif_get_phys(phys_addr_t phys, struct scif_endpt *ep) +{ + struct scif_dev *scifdev = (struct scif_dev *)ep->remote_dev; + struct scif_hw_dev *sdev = scifdev->sdev; + phys_addr_t out_phys, apt_base = 0; + + /* + * If the DMA address is card relative then we need to add the + * aperture base for mmap to work correctly + */ + if (!scifdev_self(scifdev) && sdev->aper && sdev->card_rel_da) + apt_base = sdev->aper->pa; + out_phys = apt_base + phys; + return out_phys; +} + +int scif_get_pages(scif_epd_t epd, off_t offset, size_t len, + struct scif_range **pages) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scif_rma_req req; + struct scif_window *window = NULL; + int nr_pages, err, i; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI get_pinned_pages: ep %p offset 0x%lx len 0x%lx\n", + ep, offset, len); + err = scif_verify_epd(ep); + if (err) + return err; + + if (!len || (offset < 0) || + (offset + len < offset) || + (ALIGN(offset, PAGE_SIZE) != offset) || + (ALIGN(len, PAGE_SIZE) != len)) + return -EINVAL; + + nr_pages = len >> PAGE_SHIFT; + + req.out_window = &window; + req.offset = offset; + req.prot = 0; + req.nr_bytes = len; + req.type = SCIF_WINDOW_SINGLE; + req.head = &ep->rma_info.remote_reg_list; + + mutex_lock(&ep->rma_info.rma_lock); + /* Does a valid window exist? */ + err = scif_query_window(&req); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error; + } + + /* Allocate scif_range */ + *pages = kzalloc(sizeof(**pages), GFP_KERNEL); + if (!*pages) { + err = -ENOMEM; + goto error; + } + + /* Allocate phys addr array */ + (*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t)); + if (!((*pages)->phys_addr)) { + err = -ENOMEM; + goto error; + } + + if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) { + /* Allocate virtual address array */ + ((*pages)->va = scif_zalloc(nr_pages * sizeof(void *))); + if (!(*pages)->va) { + err = -ENOMEM; + goto error; + } + } + /* Populate the values */ + (*pages)->cookie = window; + (*pages)->nr_pages = nr_pages; + (*pages)->prot_flags = window->prot; + + for (i = 0; i < nr_pages; i++) { + (*pages)->phys_addr[i] = + __scif_off_to_dma_addr(window, offset + + (i * PAGE_SIZE)); + (*pages)->phys_addr[i] = scif_get_phys((*pages)->phys_addr[i], + ep); + if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) + (*pages)->va[i] = + ep->remote_dev->sdev->aper->va + + (*pages)->phys_addr[i] - + ep->remote_dev->sdev->aper->pa; + } + + scif_get_window(window, nr_pages); +error: + mutex_unlock(&ep->rma_info.rma_lock); + if (err) { + if (*pages) { + scif_free((*pages)->phys_addr, + nr_pages * sizeof(dma_addr_t)); + scif_free((*pages)->va, + nr_pages * sizeof(void *)); + kfree(*pages); + *pages = NULL; + } + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + } + return err; +} +EXPORT_SYMBOL_GPL(scif_get_pages); + +int scif_put_pages(struct scif_range *pages) +{ + struct scif_endpt *ep; + struct scif_window *window; + struct scifmsg msg; + + if (!pages || !pages->cookie) + return -EINVAL; + + window = pages->cookie; + + if (!window || window->magic != SCIFEP_MAGIC) + return -EINVAL; + + ep = (struct scif_endpt *)window->ep; + /* + * If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the + * callee should be allowed to release references to the pages, + * else the endpoint was not connected in the first place, + * hence the ENOTCONN. + */ + if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED) + return -ENOTCONN; + + mutex_lock(&ep->rma_info.rma_lock); + + scif_put_window(window, pages->nr_pages); + + /* Initiate window destruction if ref count is zero */ + if (!window->ref_count) { + list_del(&window->list); + mutex_unlock(&ep->rma_info.rma_lock); + scif_drain_dma_intr(ep->remote_dev->sdev, + ep->rma_info.dma_chan); + /* Inform the peer about this window being destroyed. */ + msg.uop = SCIF_MUNMAP; + msg.src = ep->port; + msg.payload[0] = window->peer_window; + /* No error handling for notification messages */ + scif_nodeqp_send(ep->remote_dev, &msg); + /* Destroy this window from the peer's registered AS */ + scif_destroy_remote_window(window); + } else { + mutex_unlock(&ep->rma_info.rma_lock); + } + + scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t)); + scif_free(pages->va, pages->nr_pages * sizeof(void *)); + kfree(pages); + return 0; +} +EXPORT_SYMBOL_GPL(scif_put_pages); + +/* + * scif_rma_list_mmap: + * + * Traverse the remote registration list starting from start_window: + * 1) Create VtoP mappings via remap_pfn_range(..) + * 2) Once step 1) and 2) complete successfully then traverse the range of + * windows again and bump the reference count. + * RMA lock must be held. + */ +static int scif_rma_list_mmap(struct scif_window *start_window, s64 offset, + int nr_pages, struct vm_area_struct *vma) +{ + s64 end_offset, loop_offset = offset; + struct scif_window *window = start_window; + int loop_nr_pages, nr_pages_left = nr_pages; + struct scif_endpt *ep = (struct scif_endpt *)start_window->ep; + struct list_head *head = &ep->rma_info.remote_reg_list; + int i, err = 0; + dma_addr_t phys_addr; + struct scif_window_iter src_win_iter; + size_t contig_bytes = 0; + + might_sleep(); + list_for_each_entry_from(window, head, list) { + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + loop_nr_pages = min_t(int, + (end_offset - loop_offset) >> PAGE_SHIFT, + nr_pages_left); + scif_init_window_iter(window, &src_win_iter); + for (i = 0; i < loop_nr_pages; i++) { + phys_addr = scif_off_to_dma_addr(window, loop_offset, + &contig_bytes, + &src_win_iter); + phys_addr = scif_get_phys(phys_addr, ep); + err = remap_pfn_range(vma, + vma->vm_start + + loop_offset - offset, + phys_addr >> PAGE_SHIFT, + PAGE_SIZE, + vma->vm_page_prot); + if (err) + goto error; + loop_offset += PAGE_SIZE; + } + nr_pages_left -= loop_nr_pages; + if (!nr_pages_left) + break; + } + /* + * No more failures expected. Bump up the ref count for all + * the windows. Another traversal from start_window required + * for handling errors encountered across windows during + * remap_pfn_range(..). + */ + loop_offset = offset; + nr_pages_left = nr_pages; + window = start_window; + head = &ep->rma_info.remote_reg_list; + list_for_each_entry_from(window, head, list) { + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + loop_nr_pages = min_t(int, + (end_offset - loop_offset) >> PAGE_SHIFT, + nr_pages_left); + scif_get_window(window, loop_nr_pages); + nr_pages_left -= loop_nr_pages; + loop_offset += (loop_nr_pages << PAGE_SHIFT); + if (!nr_pages_left) + break; + } +error: + if (err) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + return err; +} + +/* + * scif_rma_list_munmap: + * + * Traverse the remote registration list starting from window: + * 1) Decrement ref count. + * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer. + * RMA lock must be held. + */ +static void scif_rma_list_munmap(struct scif_window *start_window, + s64 offset, int nr_pages) +{ + struct scifmsg msg; + s64 loop_offset = offset, end_offset; + int loop_nr_pages, nr_pages_left = nr_pages; + struct scif_endpt *ep = (struct scif_endpt *)start_window->ep; + struct list_head *head = &ep->rma_info.remote_reg_list; + struct scif_window *window = start_window, *_window; + + msg.uop = SCIF_MUNMAP; + msg.src = ep->port; + loop_offset = offset; + nr_pages_left = nr_pages; + list_for_each_entry_safe_from(window, _window, head, list) { + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + loop_nr_pages = min_t(int, + (end_offset - loop_offset) >> PAGE_SHIFT, + nr_pages_left); + scif_put_window(window, loop_nr_pages); + if (!window->ref_count) { + struct scif_dev *rdev = ep->remote_dev; + + scif_drain_dma_intr(rdev->sdev, + ep->rma_info.dma_chan); + /* Inform the peer about this munmap */ + msg.payload[0] = window->peer_window; + /* No error handling for Notification messages. */ + scif_nodeqp_send(ep->remote_dev, &msg); + list_del(&window->list); + /* Destroy this window from the peer's registered AS */ + scif_destroy_remote_window(window); + } + nr_pages_left -= loop_nr_pages; + loop_offset += (loop_nr_pages << PAGE_SHIFT); + if (!nr_pages_left) + break; + } +} + +/* + * The private data field of each VMA used to mmap a remote window + * points to an instance of struct vma_pvt + */ +struct vma_pvt { + struct scif_endpt *ep; /* End point for remote window */ + s64 offset; /* offset within remote window */ + bool valid_offset; /* offset is valid only if the original + * mmap request was for a single page + * else the offset within the vma is + * the correct offset + */ + struct kref ref; +}; + +static void vma_pvt_release(struct kref *ref) +{ + struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref); + + kfree(vmapvt); +} + +/** + * scif_vma_open - VMA open driver callback + * @vma: VMM memory area. + * The open method is called by the kernel to allow the subsystem implementing + * the VMA to initialize the area. This method is invoked any time a new + * reference to the VMA is made (when a process forks, for example). + * The one exception happens when the VMA is first created by mmap; + * in this case, the driver's mmap method is called instead. + * This function is also invoked when an existing VMA is split by the kernel + * due to a call to munmap on a subset of the VMA resulting in two VMAs. + * The kernel invokes this function only on one of the two VMAs. + */ +static void scif_vma_open(struct vm_area_struct *vma) +{ + struct vma_pvt *vmapvt = vma->vm_private_data; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n", + vma->vm_start, vma->vm_end); + scif_insert_vma(vmapvt->ep, vma); + kref_get(&vmapvt->ref); +} + +/** + * scif_munmap - VMA close driver callback. + * @vma: VMM memory area. + * When an area is destroyed, the kernel calls its close operation. + * Note that there's no usage count associated with VMA's; the area + * is opened and closed exactly once by each process that uses it. + */ +static void scif_munmap(struct vm_area_struct *vma) +{ + struct scif_endpt *ep; + struct vma_pvt *vmapvt = vma->vm_private_data; + int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + s64 offset; + struct scif_rma_req req; + struct scif_window *window = NULL; + int err; + + might_sleep(); + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n", + vma->vm_start, vma->vm_end); + ep = vmapvt->ep; + offset = vmapvt->valid_offset ? vmapvt->offset : + (vma->vm_pgoff) << PAGE_SHIFT; + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI munmap: ep %p nr_pages 0x%x offset 0x%llx\n", + ep, nr_pages, offset); + req.out_window = &window; + req.offset = offset; + req.nr_bytes = vma->vm_end - vma->vm_start; + req.prot = vma->vm_flags & (VM_READ | VM_WRITE); + req.type = SCIF_WINDOW_PARTIAL; + req.head = &ep->rma_info.remote_reg_list; + + mutex_lock(&ep->rma_info.rma_lock); + + err = scif_query_window(&req); + if (err) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + else + scif_rma_list_munmap(window, offset, nr_pages); + + mutex_unlock(&ep->rma_info.rma_lock); + /* + * The kernel probably zeroes these out but we still want + * to clean up our own mess just in case. + */ + vma->vm_ops = NULL; + vma->vm_private_data = NULL; + kref_put(&vmapvt->ref, vma_pvt_release); + scif_delete_vma(ep, vma); +} + +static const struct vm_operations_struct scif_vm_ops = { + .open = scif_vma_open, + .close = scif_munmap, +}; + +/** + * scif_mmap - Map pages in virtual address space to a remote window. + * @vma: VMM memory area. + * @epd: endpoint descriptor + * + * Return: Upon successful completion, scif_mmap() returns zero + * else an apt error is returned as documented in scif.h + */ +int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd) +{ + struct scif_rma_req req; + struct scif_window *window = NULL; + struct scif_endpt *ep = (struct scif_endpt *)epd; + s64 start_offset = vma->vm_pgoff << PAGE_SHIFT; + int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int err; + struct vma_pvt *vmapvt; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI mmap: ep %p start_offset 0x%llx nr_pages 0x%x\n", + ep, start_offset, nr_pages); + err = scif_verify_epd(ep); + if (err) + return err; + + might_sleep(); + + err = scif_insert_vma(ep, vma); + if (err) + return err; + + vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL); + if (!vmapvt) { + scif_delete_vma(ep, vma); + return -ENOMEM; + } + + vmapvt->ep = ep; + kref_init(&vmapvt->ref); + + req.out_window = &window; + req.offset = start_offset; + req.nr_bytes = vma->vm_end - vma->vm_start; + req.prot = vma->vm_flags & (VM_READ | VM_WRITE); + req.type = SCIF_WINDOW_PARTIAL; + req.head = &ep->rma_info.remote_reg_list; + + mutex_lock(&ep->rma_info.rma_lock); + /* Does a valid window exist? */ + err = scif_query_window(&req); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error_unlock; + } + + /* Default prot for loopback */ + if (!scifdev_self(ep->remote_dev)) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + + /* + * VM_DONTCOPY - Do not copy this vma on fork + * VM_DONTEXPAND - Cannot expand with mremap() + * VM_RESERVED - Count as reserved_vm like IO + * VM_PFNMAP - Page-ranges managed without "struct page" + * VM_IO - Memory mapped I/O or similar + * + * We do not want to copy this VMA automatically on a fork(), + * expand this VMA due to mremap() or swap out these pages since + * the VMA is actually backed by physical pages in the remote + * node's physical memory and not via a struct page. + */ + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; + + if (!scifdev_self(ep->remote_dev)) + vma->vm_flags |= VM_IO | VM_PFNMAP; + + /* Map this range of windows */ + err = scif_rma_list_mmap(window, start_offset, nr_pages, vma); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error_unlock; + } + /* Set up the driver call back */ + vma->vm_ops = &scif_vm_ops; + vma->vm_private_data = vmapvt; +error_unlock: + mutex_unlock(&ep->rma_info.rma_lock); + if (err) { + kfree(vmapvt); + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + scif_delete_vma(ep, vma); + } + return err; +} diff --git a/kernel/drivers/misc/mic/scif/scif_nm.c b/kernel/drivers/misc/mic/scif/scif_nm.c new file mode 100644 index 000000000..79f26a02a --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_nm.c @@ -0,0 +1,237 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include "scif_peer_bus.h" + +#include "scif_main.h" +#include "scif_map.h" + +/** + * scif_invalidate_ep() - Set state for all connected endpoints + * to disconnected and wake up all send/recv waitqueues + */ +static void scif_invalidate_ep(int node) +{ + struct scif_endpt *ep; + struct list_head *pos, *tmpq; + + flush_work(&scif_info.conn_work); + mutex_lock(&scif_info.connlock); + list_for_each_safe(pos, tmpq, &scif_info.disconnected) { + ep = list_entry(pos, struct scif_endpt, list); + if (ep->remote_dev->node == node) { + scif_unmap_all_windows(ep); + spin_lock(&ep->lock); + scif_cleanup_ep_qp(ep); + spin_unlock(&ep->lock); + } + } + list_for_each_safe(pos, tmpq, &scif_info.connected) { + ep = list_entry(pos, struct scif_endpt, list); + if (ep->remote_dev->node == node) { + list_del(pos); + spin_lock(&ep->lock); + ep->state = SCIFEP_DISCONNECTED; + list_add_tail(&ep->list, &scif_info.disconnected); + scif_cleanup_ep_qp(ep); + wake_up_interruptible(&ep->sendwq); + wake_up_interruptible(&ep->recvwq); + spin_unlock(&ep->lock); + scif_unmap_all_windows(ep); + } + } + mutex_unlock(&scif_info.connlock); +} + +void scif_free_qp(struct scif_dev *scifdev) +{ + struct scif_qp *qp = scifdev->qpairs; + + if (!qp) + return; + scif_unmap_single(qp->local_buf, scifdev, qp->inbound_q.size); + kfree(qp->inbound_q.rb_base); + scif_unmap_single(qp->local_qp, scifdev, sizeof(struct scif_qp)); + kfree(scifdev->qpairs); + scifdev->qpairs = NULL; +} + +static void scif_cleanup_qp(struct scif_dev *dev) +{ + struct scif_qp *qp = &dev->qpairs[0]; + + if (!qp) + return; + scif_iounmap((void *)qp->remote_qp, sizeof(struct scif_qp), dev); + scif_iounmap((void *)qp->outbound_q.rb_base, + sizeof(struct scif_qp), dev); + qp->remote_qp = NULL; + qp->local_write = 0; + qp->inbound_q.current_write_offset = 0; + qp->inbound_q.current_read_offset = 0; + if (scifdev_is_p2p(dev)) + scif_free_qp(dev); +} + +void scif_send_acks(struct scif_dev *dev) +{ + struct scifmsg msg; + + if (dev->node_remove_ack_pending) { + msg.uop = SCIF_NODE_REMOVE_ACK; + msg.src.node = scif_info.nodeid; + msg.dst.node = SCIF_MGMT_NODE; + msg.payload[0] = dev->node; + scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], &msg); + dev->node_remove_ack_pending = false; + } + if (dev->exit_ack_pending) { + msg.uop = SCIF_EXIT_ACK; + msg.src.node = scif_info.nodeid; + msg.dst.node = dev->node; + scif_nodeqp_send(dev, &msg); + dev->exit_ack_pending = false; + } +} + +/* + * scif_cleanup_scifdev + * + * @dev: Remote SCIF device. + * Uninitialize SCIF data structures for remote SCIF device. + */ +void scif_cleanup_scifdev(struct scif_dev *dev) +{ + struct scif_hw_dev *sdev = dev->sdev; + + if (!dev->sdev) + return; + if (scifdev_is_p2p(dev)) { + if (dev->cookie) { + sdev->hw_ops->free_irq(sdev, dev->cookie, dev); + dev->cookie = NULL; + } + scif_destroy_intr_wq(dev); + } + flush_work(&scif_info.misc_work); + scif_destroy_p2p(dev); + scif_invalidate_ep(dev->node); + scif_zap_mmaps(dev->node); + scif_cleanup_rma_for_zombies(dev->node); + flush_work(&scif_info.misc_work); + scif_send_acks(dev); + if (!dev->node && scif_info.card_initiated_exit) { + /* + * Send an SCIF_EXIT message which is the last message from MIC + * to the Host and wait for a SCIF_EXIT_ACK + */ + scif_send_exit(dev); + scif_info.card_initiated_exit = false; + } + scif_cleanup_qp(dev); +} + +/* + * scif_remove_node: + * + * @node: Node to remove + */ +void scif_handle_remove_node(int node) +{ + struct scif_dev *scifdev = &scif_dev[node]; + + if (scif_peer_unregister_device(scifdev)) + scif_send_acks(scifdev); +} + +static int scif_send_rmnode_msg(int node, int remove_node) +{ + struct scifmsg notif_msg; + struct scif_dev *dev = &scif_dev[node]; + + notif_msg.uop = SCIF_NODE_REMOVE; + notif_msg.src.node = scif_info.nodeid; + notif_msg.dst.node = node; + notif_msg.payload[0] = remove_node; + return scif_nodeqp_send(dev, ¬if_msg); +} + +/** + * scif_node_disconnect: + * + * @node_id[in]: source node id. + * @mgmt_initiated: Disconnection initiated from the mgmt node + * + * Disconnect a node from the scif network. + */ +void scif_disconnect_node(u32 node_id, bool mgmt_initiated) +{ + int ret; + int msg_cnt = 0; + u32 i = 0; + struct scif_dev *scifdev = &scif_dev[node_id]; + + if (!node_id) + return; + + atomic_set(&scifdev->disconn_rescnt, 0); + + /* Destroy p2p network */ + for (i = 1; i <= scif_info.maxid; i++) { + if (i == node_id) + continue; + ret = scif_send_rmnode_msg(i, node_id); + if (!ret) + msg_cnt++; + } + /* Wait for the remote nodes to respond with SCIF_NODE_REMOVE_ACK */ + ret = wait_event_timeout(scifdev->disconn_wq, + (atomic_read(&scifdev->disconn_rescnt) + == msg_cnt), SCIF_NODE_ALIVE_TIMEOUT); + /* Tell the card to clean up */ + if (mgmt_initiated && _scifdev_alive(scifdev)) + /* + * Send an SCIF_EXIT message which is the last message from Host + * to the MIC and wait for a SCIF_EXIT_ACK + */ + scif_send_exit(scifdev); + atomic_set(&scifdev->disconn_rescnt, 0); + /* Tell the mgmt node to clean up */ + ret = scif_send_rmnode_msg(SCIF_MGMT_NODE, node_id); + if (!ret) + /* Wait for mgmt node to respond with SCIF_NODE_REMOVE_ACK */ + wait_event_timeout(scifdev->disconn_wq, + (atomic_read(&scifdev->disconn_rescnt) == 1), + SCIF_NODE_ALIVE_TIMEOUT); +} + +void scif_get_node_info(void) +{ + struct scifmsg msg; + DECLARE_COMPLETION_ONSTACK(node_info); + + msg.uop = SCIF_GET_NODE_INFO; + msg.src.node = scif_info.nodeid; + msg.dst.node = SCIF_MGMT_NODE; + msg.payload[3] = (u64)&node_info; + + if ((scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], &msg))) + return; + + /* Wait for a response with SCIF_GET_NODE_INFO */ + wait_for_completion(&node_info); +} diff --git a/kernel/drivers/misc/mic/scif/scif_nodeqp.c b/kernel/drivers/misc/mic/scif/scif_nodeqp.c new file mode 100644 index 000000000..c66ca1a58 --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_nodeqp.c @@ -0,0 +1,1354 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include "../bus/scif_bus.h" +#include "scif_peer_bus.h" +#include "scif_main.h" +#include "scif_nodeqp.h" +#include "scif_map.h" + +/* + ************************************************************************ + * SCIF node Queue Pair (QP) setup flow: + * + * 1) SCIF driver gets probed with a scif_hw_dev via the scif_hw_bus + * 2) scif_setup_qp(..) allocates the local qp and calls + * scif_setup_qp_connect(..) which allocates and maps the local + * buffer for the inbound QP + * 3) The local node updates the device page with the DMA address of the QP + * 4) A delayed work is scheduled (qp_dwork) which periodically reads if + * the peer node has updated its QP DMA address + * 5) Once a valid non zero address is found in the QP DMA address field + * in the device page, the local node maps the remote node's QP, + * updates its outbound QP and sends a SCIF_INIT message to the peer + * 6) The SCIF_INIT message is received by the peer node QP interrupt bottom + * half handler by calling scif_init(..) + * 7) scif_init(..) registers a new SCIF peer node by calling + * scif_peer_register_device(..) which signifies the addition of a new + * SCIF node + * 8) On the mgmt node, P2P network setup/teardown is initiated if all the + * remote nodes are online via scif_p2p_setup(..) + * 9) For P2P setup, the host maps the remote nodes' aperture and memory + * bars and sends a SCIF_NODE_ADD message to both nodes + * 10) As part of scif_nodeadd, both nodes set up their local inbound + * QPs and send a SCIF_NODE_ADD_ACK to the mgmt node + * 11) As part of scif_node_add_ack(..) the mgmt node forwards the + * SCIF_NODE_ADD_ACK to the remote nodes + * 12) As part of scif_node_add_ack(..) the remote nodes update their + * outbound QPs, make sure they can access memory on the remote node + * and then add a new SCIF peer node by calling + * scif_peer_register_device(..) which signifies the addition of a new + * SCIF node. + * 13) The SCIF network is now established across all nodes. + * + ************************************************************************ + * SCIF node QP teardown flow (initiated by non mgmt node): + * + * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus + * 2) The device page QP DMA address field is updated with 0x0 + * 3) A non mgmt node now cleans up all local data structures and sends a + * SCIF_EXIT message to the peer and waits for a SCIF_EXIT_ACK + * 4) As part of scif_exit(..) handling scif_disconnect_node(..) is called + * 5) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the + * peers and waits for a SCIF_NODE_REMOVE_ACK + * 6) As part of scif_node_remove(..) a remote node unregisters the peer + * node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK + * 7) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs + * it sends itself a node remove message whose handling cleans up local + * data structures and unregisters the peer node from the SCIF network + * 8) The mgmt node sends a SCIF_EXIT_ACK + * 9) Upon receipt of the SCIF_EXIT_ACK the node initiating the teardown + * completes the SCIF remove routine + * 10) The SCIF network is now torn down for the node initiating the + * teardown sequence + * + ************************************************************************ + * SCIF node QP teardown flow (initiated by mgmt node): + * + * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus + * 2) The device page QP DMA address field is updated with 0x0 + * 3) The mgmt node calls scif_disconnect_node(..) + * 4) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the peers + * and waits for a SCIF_NODE_REMOVE_ACK + * 5) As part of scif_node_remove(..) a remote node unregisters the peer + * node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK + * 6) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs + * it unregisters the peer node from the SCIF network + * 7) The mgmt node sends a SCIF_EXIT message and waits for a SCIF_EXIT_ACK. + * 8) A non mgmt node upon receipt of a SCIF_EXIT message calls scif_stop(..) + * which would clean up local data structures for all SCIF nodes and + * then send a SCIF_EXIT_ACK back to the mgmt node + * 9) Upon receipt of the SCIF_EXIT_ACK the the mgmt node sends itself a node + * remove message whose handling cleans up local data structures and + * destroys any P2P mappings. + * 10) The SCIF hardware device for which a remove callback was received is now + * disconnected from the SCIF network. + */ +/* + * Initializes "local" data structures for the QP. Allocates the QP + * ring buffer (rb) and initializes the "in bound" queue. + */ +int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset, + int local_size, struct scif_dev *scifdev) +{ + void *local_q = qp->inbound_q.rb_base; + int err = 0; + u32 tmp_rd = 0; + + spin_lock_init(&qp->send_lock); + spin_lock_init(&qp->recv_lock); + + /* Allocate rb only if not already allocated */ + if (!local_q) { + local_q = kzalloc(local_size, GFP_KERNEL); + if (!local_q) { + err = -ENOMEM; + return err; + } + } + + err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size); + if (err) + goto kfree; + /* + * To setup the inbound_q, the buffer lives locally, the read pointer + * is remote and the write pointer is local. + */ + scif_rb_init(&qp->inbound_q, + &tmp_rd, + &qp->local_write, + local_q, get_count_order(local_size)); + /* + * The read pointer is NULL initially and it is unsafe to use the ring + * buffer til this changes! + */ + qp->inbound_q.read_ptr = NULL; + err = scif_map_single(qp_offset, qp, + scifdev, sizeof(struct scif_qp)); + if (err) + goto unmap; + qp->local_qp = *qp_offset; + return err; +unmap: + scif_unmap_single(qp->local_buf, scifdev, local_size); + qp->local_buf = 0; +kfree: + kfree(local_q); + return err; +} + +/* When the other side has already done it's allocation, this is called */ +int scif_setup_qp_accept(struct scif_qp *qp, dma_addr_t *qp_offset, + dma_addr_t phys, int local_size, + struct scif_dev *scifdev) +{ + void *local_q; + void *remote_q; + struct scif_qp *remote_qp; + int remote_size; + int err = 0; + + spin_lock_init(&qp->send_lock); + spin_lock_init(&qp->recv_lock); + /* Start by figuring out where we need to point */ + remote_qp = scif_ioremap(phys, sizeof(struct scif_qp), scifdev); + if (!remote_qp) + return -EIO; + qp->remote_qp = remote_qp; + if (qp->remote_qp->magic != SCIFEP_MAGIC) { + err = -EIO; + goto iounmap; + } + qp->remote_buf = remote_qp->local_buf; + remote_size = qp->remote_qp->inbound_q.size; + remote_q = scif_ioremap(qp->remote_buf, remote_size, scifdev); + if (!remote_q) { + err = -EIO; + goto iounmap; + } + qp->remote_qp->local_write = 0; + /* + * To setup the outbound_q, the buffer lives in remote memory, + * the read pointer is local, the write pointer is remote + */ + scif_rb_init(&qp->outbound_q, + &qp->local_read, + &qp->remote_qp->local_write, + remote_q, + get_count_order(remote_size)); + local_q = kzalloc(local_size, GFP_KERNEL); + if (!local_q) { + err = -ENOMEM; + goto iounmap_1; + } + err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size); + if (err) + goto kfree; + qp->remote_qp->local_read = 0; + /* + * To setup the inbound_q, the buffer lives locally, the read pointer + * is remote and the write pointer is local + */ + scif_rb_init(&qp->inbound_q, + &qp->remote_qp->local_read, + &qp->local_write, + local_q, get_count_order(local_size)); + err = scif_map_single(qp_offset, qp, scifdev, + sizeof(struct scif_qp)); + if (err) + goto unmap; + qp->local_qp = *qp_offset; + return err; +unmap: + scif_unmap_single(qp->local_buf, scifdev, local_size); + qp->local_buf = 0; +kfree: + kfree(local_q); +iounmap_1: + scif_iounmap(remote_q, remote_size, scifdev); + qp->outbound_q.rb_base = NULL; +iounmap: + scif_iounmap(qp->remote_qp, sizeof(struct scif_qp), scifdev); + qp->remote_qp = NULL; + return err; +} + +int scif_setup_qp_connect_response(struct scif_dev *scifdev, + struct scif_qp *qp, u64 payload) +{ + int err = 0; + void *r_buf; + int remote_size; + phys_addr_t tmp_phys; + + qp->remote_qp = scif_ioremap(payload, sizeof(struct scif_qp), scifdev); + + if (!qp->remote_qp) { + err = -ENOMEM; + goto error; + } + + if (qp->remote_qp->magic != SCIFEP_MAGIC) { + dev_err(&scifdev->sdev->dev, + "SCIFEP_MAGIC mismatch between self %d remote %d\n", + scif_dev[scif_info.nodeid].node, scifdev->node); + err = -ENODEV; + goto error; + } + + tmp_phys = qp->remote_qp->local_buf; + remote_size = qp->remote_qp->inbound_q.size; + r_buf = scif_ioremap(tmp_phys, remote_size, scifdev); + + if (!r_buf) + return -EIO; + + qp->local_read = 0; + scif_rb_init(&qp->outbound_q, + &qp->local_read, + &qp->remote_qp->local_write, + r_buf, + get_count_order(remote_size)); + /* + * Because the node QP may already be processing an INIT message, set + * the read pointer so the cached read offset isn't lost + */ + qp->remote_qp->local_read = qp->inbound_q.current_read_offset; + /* + * resetup the inbound_q now that we know where the + * inbound_read really is. + */ + scif_rb_init(&qp->inbound_q, + &qp->remote_qp->local_read, + &qp->local_write, + qp->inbound_q.rb_base, + get_count_order(qp->inbound_q.size)); +error: + return err; +} + +static __always_inline void +scif_send_msg_intr(struct scif_dev *scifdev) +{ + struct scif_hw_dev *sdev = scifdev->sdev; + + if (scifdev_is_p2p(scifdev)) + sdev->hw_ops->send_p2p_intr(sdev, scifdev->rdb, &scifdev->mmio); + else + sdev->hw_ops->send_intr(sdev, scifdev->rdb); +} + +int scif_qp_response(phys_addr_t phys, struct scif_dev *scifdev) +{ + int err = 0; + struct scifmsg msg; + + err = scif_setup_qp_connect_response(scifdev, scifdev->qpairs, phys); + if (!err) { + /* + * Now that everything is setup and mapped, we're ready + * to tell the peer about our queue's location + */ + msg.uop = SCIF_INIT; + msg.dst.node = scifdev->node; + err = scif_nodeqp_send(scifdev, &msg); + } + return err; +} + +void scif_send_exit(struct scif_dev *scifdev) +{ + struct scifmsg msg; + int ret; + + scifdev->exit = OP_IN_PROGRESS; + msg.uop = SCIF_EXIT; + msg.src.node = scif_info.nodeid; + msg.dst.node = scifdev->node; + ret = scif_nodeqp_send(scifdev, &msg); + if (ret) + goto done; + /* Wait for a SCIF_EXIT_ACK message */ + wait_event_timeout(scif_info.exitwq, scifdev->exit == OP_COMPLETED, + SCIF_NODE_ALIVE_TIMEOUT); +done: + scifdev->exit = OP_IDLE; +} + +int scif_setup_qp(struct scif_dev *scifdev) +{ + int err = 0; + int local_size; + struct scif_qp *qp; + + local_size = SCIF_NODE_QP_SIZE; + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) { + err = -ENOMEM; + return err; + } + qp->magic = SCIFEP_MAGIC; + scifdev->qpairs = qp; + err = scif_setup_qp_connect(qp, &scifdev->qp_dma_addr, + local_size, scifdev); + if (err) + goto free_qp; + /* + * We're as setup as we can be. The inbound_q is setup, w/o a usable + * outbound q. When we get a message, the read_ptr will be updated, + * and we will pull the message. + */ + return err; +free_qp: + kfree(scifdev->qpairs); + scifdev->qpairs = NULL; + return err; +} + +static void scif_p2p_freesg(struct scatterlist *sg) +{ + kfree(sg); +} + +static struct scatterlist * +scif_p2p_setsg(phys_addr_t pa, int page_size, int page_cnt) +{ + struct scatterlist *sg; + struct page *page; + int i; + + sg = kcalloc(page_cnt, sizeof(struct scatterlist), GFP_KERNEL); + if (!sg) + return NULL; + sg_init_table(sg, page_cnt); + for (i = 0; i < page_cnt; i++) { + page = pfn_to_page(pa >> PAGE_SHIFT); + sg_set_page(&sg[i], page, page_size, 0); + pa += page_size; + } + return sg; +} + +/* Init p2p mappings required to access peerdev from scifdev */ +static struct scif_p2p_info * +scif_init_p2p_info(struct scif_dev *scifdev, struct scif_dev *peerdev) +{ + struct scif_p2p_info *p2p; + int num_mmio_pages, num_aper_pages, sg_page_shift, err, num_aper_chunks; + struct scif_hw_dev *psdev = peerdev->sdev; + struct scif_hw_dev *sdev = scifdev->sdev; + + num_mmio_pages = psdev->mmio->len >> PAGE_SHIFT; + num_aper_pages = psdev->aper->len >> PAGE_SHIFT; + + p2p = kzalloc(sizeof(*p2p), GFP_KERNEL); + if (!p2p) + return NULL; + p2p->ppi_sg[SCIF_PPI_MMIO] = scif_p2p_setsg(psdev->mmio->pa, + PAGE_SIZE, num_mmio_pages); + if (!p2p->ppi_sg[SCIF_PPI_MMIO]) + goto free_p2p; + p2p->sg_nentries[SCIF_PPI_MMIO] = num_mmio_pages; + sg_page_shift = get_order(min(psdev->aper->len, (u64)(1 << 30))); + num_aper_chunks = num_aper_pages >> (sg_page_shift - PAGE_SHIFT); + p2p->ppi_sg[SCIF_PPI_APER] = scif_p2p_setsg(psdev->aper->pa, + 1 << sg_page_shift, + num_aper_chunks); + p2p->sg_nentries[SCIF_PPI_APER] = num_aper_chunks; + err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO], + num_mmio_pages, PCI_DMA_BIDIRECTIONAL); + if (err != num_mmio_pages) + goto scif_p2p_free; + err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER], + num_aper_chunks, PCI_DMA_BIDIRECTIONAL); + if (err != num_aper_chunks) + goto dma_unmap; + p2p->ppi_da[SCIF_PPI_MMIO] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_MMIO]); + p2p->ppi_da[SCIF_PPI_APER] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_APER]); + p2p->ppi_len[SCIF_PPI_MMIO] = num_mmio_pages; + p2p->ppi_len[SCIF_PPI_APER] = num_aper_pages; + p2p->ppi_peer_id = peerdev->node; + return p2p; +dma_unmap: + dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO], + p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL); +scif_p2p_free: + scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]); + scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]); +free_p2p: + kfree(p2p); + return NULL; +} + +/* Uninitialize and release resources from a p2p mapping */ +static void scif_deinit_p2p_info(struct scif_dev *scifdev, + struct scif_p2p_info *p2p) +{ + struct scif_hw_dev *sdev = scifdev->sdev; + + dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO], + p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL); + dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER], + p2p->sg_nentries[SCIF_PPI_APER], DMA_BIDIRECTIONAL); + scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]); + scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]); + kfree(p2p); +} + +/** + * scif_node_connect: Respond to SCIF_NODE_CONNECT interrupt message + * @dst: Destination node + * + * Connect the src and dst node by setting up the p2p connection + * between them. Management node here acts like a proxy. + */ +static void scif_node_connect(struct scif_dev *scifdev, int dst) +{ + struct scif_dev *dev_j = scifdev; + struct scif_dev *dev_i = NULL; + struct scif_p2p_info *p2p_ij = NULL; /* bus addr for j from i */ + struct scif_p2p_info *p2p_ji = NULL; /* bus addr for i from j */ + struct scif_p2p_info *p2p; + struct list_head *pos, *tmp; + struct scifmsg msg; + int err; + u64 tmppayload; + + if (dst < 1 || dst > scif_info.maxid) + return; + + dev_i = &scif_dev[dst]; + + if (!_scifdev_alive(dev_i)) + return; + /* + * If the p2p connection is already setup or in the process of setting + * up then just ignore this request. The requested node will get + * informed by SCIF_NODE_ADD_ACK or SCIF_NODE_ADD_NACK + */ + if (!list_empty(&dev_i->p2p)) { + list_for_each_safe(pos, tmp, &dev_i->p2p) { + p2p = list_entry(pos, struct scif_p2p_info, ppi_list); + if (p2p->ppi_peer_id == dev_j->node) + return; + } + } + p2p_ij = scif_init_p2p_info(dev_i, dev_j); + if (!p2p_ij) + return; + p2p_ji = scif_init_p2p_info(dev_j, dev_i); + if (!p2p_ji) { + scif_deinit_p2p_info(dev_i, p2p_ij); + return; + } + list_add_tail(&p2p_ij->ppi_list, &dev_i->p2p); + list_add_tail(&p2p_ji->ppi_list, &dev_j->p2p); + + /* + * Send a SCIF_NODE_ADD to dev_i, pass it its bus address + * as seen from dev_j + */ + msg.uop = SCIF_NODE_ADD; + msg.src.node = dev_j->node; + msg.dst.node = dev_i->node; + + msg.payload[0] = p2p_ji->ppi_da[SCIF_PPI_APER]; + msg.payload[1] = p2p_ij->ppi_da[SCIF_PPI_MMIO]; + msg.payload[2] = p2p_ij->ppi_da[SCIF_PPI_APER]; + msg.payload[3] = p2p_ij->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT; + + err = scif_nodeqp_send(dev_i, &msg); + if (err) { + dev_err(&scifdev->sdev->dev, + "%s %d error %d\n", __func__, __LINE__, err); + return; + } + + /* Same as above but to dev_j */ + msg.uop = SCIF_NODE_ADD; + msg.src.node = dev_i->node; + msg.dst.node = dev_j->node; + + tmppayload = msg.payload[0]; + msg.payload[0] = msg.payload[2]; + msg.payload[2] = tmppayload; + msg.payload[1] = p2p_ji->ppi_da[SCIF_PPI_MMIO]; + msg.payload[3] = p2p_ji->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT; + + scif_nodeqp_send(dev_j, &msg); +} + +static void scif_p2p_setup(void) +{ + int i, j; + + if (!scif_info.p2p_enable) + return; + + for (i = 1; i <= scif_info.maxid; i++) + if (!_scifdev_alive(&scif_dev[i])) + return; + + for (i = 1; i <= scif_info.maxid; i++) { + for (j = 1; j <= scif_info.maxid; j++) { + struct scif_dev *scifdev = &scif_dev[i]; + + if (i == j) + continue; + scif_node_connect(scifdev, j); + } + } +} + +static char *message_types[] = {"BAD", + "INIT", + "EXIT", + "SCIF_EXIT_ACK", + "SCIF_NODE_ADD", + "SCIF_NODE_ADD_ACK", + "SCIF_NODE_ADD_NACK", + "REMOVE_NODE", + "REMOVE_NODE_ACK", + "CNCT_REQ", + "CNCT_GNT", + "CNCT_GNTACK", + "CNCT_GNTNACK", + "CNCT_REJ", + "DISCNCT", + "DISCNT_ACK", + "CLIENT_SENT", + "CLIENT_RCVD", + "SCIF_GET_NODE_INFO", + "REGISTER", + "REGISTER_ACK", + "REGISTER_NACK", + "UNREGISTER", + "UNREGISTER_ACK", + "UNREGISTER_NACK", + "ALLOC_REQ", + "ALLOC_GNT", + "ALLOC_REJ", + "FREE_PHYS", + "FREE_VIRT", + "MUNMAP", + "MARK", + "MARK_ACK", + "MARK_NACK", + "WAIT", + "WAIT_ACK", + "WAIT_NACK", + "SIGNAL_LOCAL", + "SIGNAL_REMOTE", + "SIG_ACK", + "SIG_NACK"}; + +static void +scif_display_message(struct scif_dev *scifdev, struct scifmsg *msg, + const char *label) +{ + if (!scif_info.en_msg_log) + return; + if (msg->uop > SCIF_MAX_MSG) { + dev_err(&scifdev->sdev->dev, + "%s: unknown msg type %d\n", label, msg->uop); + return; + } + dev_info(&scifdev->sdev->dev, + "%s: msg type %s, src %d:%d, dest %d:%d payload 0x%llx:0x%llx:0x%llx:0x%llx\n", + label, message_types[msg->uop], msg->src.node, msg->src.port, + msg->dst.node, msg->dst.port, msg->payload[0], msg->payload[1], + msg->payload[2], msg->payload[3]); +} + +int _scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_qp *qp = scifdev->qpairs; + int err = -ENOMEM, loop_cnt = 0; + + scif_display_message(scifdev, msg, "Sent"); + if (!qp) { + err = -EINVAL; + goto error; + } + spin_lock(&qp->send_lock); + + while ((err = scif_rb_write(&qp->outbound_q, + msg, sizeof(struct scifmsg)))) { + mdelay(1); +#define SCIF_NODEQP_SEND_TO_MSEC (3 * 1000) + if (loop_cnt++ > (SCIF_NODEQP_SEND_TO_MSEC)) { + err = -ENODEV; + break; + } + } + if (!err) + scif_rb_commit(&qp->outbound_q); + spin_unlock(&qp->send_lock); + if (!err) { + if (scifdev_self(scifdev)) + /* + * For loopback we need to emulate an interrupt by + * queuing work for the queue handling real node + * Qp interrupts. + */ + queue_work(scifdev->intr_wq, &scifdev->intr_bh); + else + scif_send_msg_intr(scifdev); + } +error: + if (err) + dev_dbg(&scifdev->sdev->dev, + "%s %d error %d uop %d\n", + __func__, __LINE__, err, msg->uop); + return err; +} + +/** + * scif_nodeqp_send - Send a message on the node queue pair + * @scifdev: Scif Device. + * @msg: The message to be sent. + */ +int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg) +{ + int err; + struct device *spdev = NULL; + + if (msg->uop > SCIF_EXIT_ACK) { + /* Dont send messages once the exit flow has begun */ + if (OP_IDLE != scifdev->exit) + return -ENODEV; + spdev = scif_get_peer_dev(scifdev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + return err; + } + } + err = _scif_nodeqp_send(scifdev, msg); + if (msg->uop > SCIF_EXIT_ACK) + scif_put_peer_dev(spdev); + return err; +} + +/* + * scif_misc_handler: + * + * Work queue handler for servicing miscellaneous SCIF tasks. + * Examples include: + * 1) Remote fence requests. + * 2) Destruction of temporary registered windows + * created during scif_vreadfrom()/scif_vwriteto(). + * 3) Cleanup of zombie endpoints. + */ +void scif_misc_handler(struct work_struct *work) +{ + scif_rma_handle_remote_fences(); + scif_rma_destroy_windows(); + scif_rma_destroy_tcw_invalid(); + scif_cleanup_zombie_epd(); +} + +/** + * scif_init() - Respond to SCIF_INIT interrupt message + * @scifdev: Remote SCIF device node + * @msg: Interrupt message + */ +static __always_inline void +scif_init(struct scif_dev *scifdev, struct scifmsg *msg) +{ + /* + * Allow the thread waiting for device page updates for the peer QP DMA + * address to complete initializing the inbound_q. + */ + flush_delayed_work(&scifdev->qp_dwork); + + scif_peer_register_device(scifdev); + + if (scif_is_mgmt_node()) { + mutex_lock(&scif_info.conflock); + scif_p2p_setup(); + mutex_unlock(&scif_info.conflock); + } +} + +/** + * scif_exit() - Respond to SCIF_EXIT interrupt message + * @scifdev: Remote SCIF device node + * @msg: Interrupt message + * + * This function stops the SCIF interface for the node which sent + * the SCIF_EXIT message and starts waiting for that node to + * resetup the queue pair again. + */ +static __always_inline void +scif_exit(struct scif_dev *scifdev, struct scifmsg *unused) +{ + scifdev->exit_ack_pending = true; + if (scif_is_mgmt_node()) + scif_disconnect_node(scifdev->node, false); + else + scif_stop(scifdev); + schedule_delayed_work(&scifdev->qp_dwork, + msecs_to_jiffies(1000)); +} + +/** + * scif_exitack() - Respond to SCIF_EXIT_ACK interrupt message + * @scifdev: Remote SCIF device node + * @msg: Interrupt message + * + */ +static __always_inline void +scif_exit_ack(struct scif_dev *scifdev, struct scifmsg *unused) +{ + scifdev->exit = OP_COMPLETED; + wake_up(&scif_info.exitwq); +} + +/** + * scif_node_add() - Respond to SCIF_NODE_ADD interrupt message + * @scifdev: Remote SCIF device node + * @msg: Interrupt message + * + * When the mgmt node driver has finished initializing a MIC node queue pair it + * marks the node as online. It then looks for all currently online MIC cards + * and send a SCIF_NODE_ADD message to identify the ID of the new card for + * peer to peer initialization + * + * The local node allocates its incoming queue and sends its address in the + * SCIF_NODE_ADD_ACK message back to the mgmt node, the mgmt node "reflects" + * this message to the new node + */ +static __always_inline void +scif_node_add(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_dev *newdev; + dma_addr_t qp_offset; + int qp_connect; + struct scif_hw_dev *sdev; + + dev_dbg(&scifdev->sdev->dev, + "Scifdev %d:%d received NODE_ADD msg for node %d\n", + scifdev->node, msg->dst.node, msg->src.node); + dev_dbg(&scifdev->sdev->dev, + "Remote address for this node's aperture %llx\n", + msg->payload[0]); + newdev = &scif_dev[msg->src.node]; + newdev->node = msg->src.node; + newdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev; + sdev = newdev->sdev; + + if (scif_setup_intr_wq(newdev)) { + dev_err(&scifdev->sdev->dev, + "failed to setup interrupts for %d\n", msg->src.node); + goto interrupt_setup_error; + } + newdev->mmio.va = ioremap_nocache(msg->payload[1], sdev->mmio->len); + if (!newdev->mmio.va) { + dev_err(&scifdev->sdev->dev, + "failed to map mmio for %d\n", msg->src.node); + goto mmio_map_error; + } + newdev->qpairs = kzalloc(sizeof(*newdev->qpairs), GFP_KERNEL); + if (!newdev->qpairs) + goto qp_alloc_error; + /* + * Set the base address of the remote node's memory since it gets + * added to qp_offset + */ + newdev->base_addr = msg->payload[0]; + + qp_connect = scif_setup_qp_connect(newdev->qpairs, &qp_offset, + SCIF_NODE_QP_SIZE, newdev); + if (qp_connect) { + dev_err(&scifdev->sdev->dev, + "failed to setup qp_connect %d\n", qp_connect); + goto qp_connect_error; + } + + newdev->db = sdev->hw_ops->next_db(sdev); + newdev->cookie = sdev->hw_ops->request_irq(sdev, scif_intr_handler, + "SCIF_INTR", newdev, + newdev->db); + if (IS_ERR(newdev->cookie)) + goto qp_connect_error; + newdev->qpairs->magic = SCIFEP_MAGIC; + newdev->qpairs->qp_state = SCIF_QP_OFFLINE; + + msg->uop = SCIF_NODE_ADD_ACK; + msg->dst.node = msg->src.node; + msg->src.node = scif_info.nodeid; + msg->payload[0] = qp_offset; + msg->payload[2] = newdev->db; + scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg); + return; +qp_connect_error: + kfree(newdev->qpairs); + newdev->qpairs = NULL; +qp_alloc_error: + iounmap(newdev->mmio.va); + newdev->mmio.va = NULL; +mmio_map_error: +interrupt_setup_error: + dev_err(&scifdev->sdev->dev, + "node add failed for node %d\n", msg->src.node); + msg->uop = SCIF_NODE_ADD_NACK; + msg->dst.node = msg->src.node; + msg->src.node = scif_info.nodeid; + scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg); +} + +void scif_poll_qp_state(struct work_struct *work) +{ +#define SCIF_NODE_QP_RETRY 100 +#define SCIF_NODE_QP_TIMEOUT 100 + struct scif_dev *peerdev = container_of(work, struct scif_dev, + p2p_dwork.work); + struct scif_qp *qp = &peerdev->qpairs[0]; + + if (qp->qp_state != SCIF_QP_ONLINE || + qp->remote_qp->qp_state != SCIF_QP_ONLINE) { + if (peerdev->p2p_retry++ == SCIF_NODE_QP_RETRY) { + dev_err(&peerdev->sdev->dev, + "Warning: QP check timeout with state %d\n", + qp->qp_state); + goto timeout; + } + schedule_delayed_work(&peerdev->p2p_dwork, + msecs_to_jiffies(SCIF_NODE_QP_TIMEOUT)); + return; + } + return; +timeout: + dev_err(&peerdev->sdev->dev, + "%s %d remote node %d offline, state = 0x%x\n", + __func__, __LINE__, peerdev->node, qp->qp_state); + qp->remote_qp->qp_state = SCIF_QP_OFFLINE; + scif_peer_unregister_device(peerdev); + scif_cleanup_scifdev(peerdev); +} + +/** + * scif_node_add_ack() - Respond to SCIF_NODE_ADD_ACK interrupt message + * @scifdev: Remote SCIF device node + * @msg: Interrupt message + * + * After a MIC node receives the SCIF_NODE_ADD_ACK message it send this + * message to the mgmt node to confirm the sequence is finished. + * + */ +static __always_inline void +scif_node_add_ack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_dev *peerdev; + struct scif_qp *qp; + struct scif_dev *dst_dev = &scif_dev[msg->dst.node]; + + dev_dbg(&scifdev->sdev->dev, + "Scifdev %d received SCIF_NODE_ADD_ACK msg src %d dst %d\n", + scifdev->node, msg->src.node, msg->dst.node); + dev_dbg(&scifdev->sdev->dev, + "payload %llx %llx %llx %llx\n", msg->payload[0], + msg->payload[1], msg->payload[2], msg->payload[3]); + if (scif_is_mgmt_node()) { + /* + * the lock serializes with scif_qp_response_ack. The mgmt node + * is forwarding the NODE_ADD_ACK message from src to dst we + * need to make sure that the dst has already received a + * NODE_ADD for src and setup its end of the qp to dst + */ + mutex_lock(&scif_info.conflock); + msg->payload[1] = scif_info.maxid; + scif_nodeqp_send(dst_dev, msg); + mutex_unlock(&scif_info.conflock); + return; + } + peerdev = &scif_dev[msg->src.node]; + peerdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev; + peerdev->node = msg->src.node; + + qp = &peerdev->qpairs[0]; + + if ((scif_setup_qp_connect_response(peerdev, &peerdev->qpairs[0], + msg->payload[0]))) + goto local_error; + peerdev->rdb = msg->payload[2]; + qp->remote_qp->qp_state = SCIF_QP_ONLINE; + + scif_peer_register_device(peerdev); + + schedule_delayed_work(&peerdev->p2p_dwork, 0); + return; +local_error: + scif_cleanup_scifdev(peerdev); +} + +/** + * scif_node_add_nack: Respond to SCIF_NODE_ADD_NACK interrupt message + * @msg: Interrupt message + * + * SCIF_NODE_ADD failed, so inform the waiting wq. + */ +static __always_inline void +scif_node_add_nack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + if (scif_is_mgmt_node()) { + struct scif_dev *dst_dev = &scif_dev[msg->dst.node]; + + dev_dbg(&scifdev->sdev->dev, + "SCIF_NODE_ADD_NACK received from %d\n", scifdev->node); + scif_nodeqp_send(dst_dev, msg); + } +} + +/* + * scif_node_remove: Handle SCIF_NODE_REMOVE message + * @msg: Interrupt message + * + * Handle node removal. + */ +static __always_inline void +scif_node_remove(struct scif_dev *scifdev, struct scifmsg *msg) +{ + int node = msg->payload[0]; + struct scif_dev *scdev = &scif_dev[node]; + + scdev->node_remove_ack_pending = true; + scif_handle_remove_node(node); +} + +/* + * scif_node_remove_ack: Handle SCIF_NODE_REMOVE_ACK message + * @msg: Interrupt message + * + * The peer has acked a SCIF_NODE_REMOVE message. + */ +static __always_inline void +scif_node_remove_ack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_dev *sdev = &scif_dev[msg->payload[0]]; + + atomic_inc(&sdev->disconn_rescnt); + wake_up(&sdev->disconn_wq); +} + +/** + * scif_get_node_info: Respond to SCIF_GET_NODE_INFO interrupt message + * @msg: Interrupt message + * + * Retrieve node info i.e maxid and total from the mgmt node. + */ +static __always_inline void +scif_get_node_info_resp(struct scif_dev *scifdev, struct scifmsg *msg) +{ + if (scif_is_mgmt_node()) { + swap(msg->dst.node, msg->src.node); + mutex_lock(&scif_info.conflock); + msg->payload[1] = scif_info.maxid; + msg->payload[2] = scif_info.total; + mutex_unlock(&scif_info.conflock); + scif_nodeqp_send(scifdev, msg); + } else { + struct completion *node_info = + (struct completion *)msg->payload[3]; + + mutex_lock(&scif_info.conflock); + scif_info.maxid = msg->payload[1]; + scif_info.total = msg->payload[2]; + complete_all(node_info); + mutex_unlock(&scif_info.conflock); + } +} + +static void +scif_msg_unknown(struct scif_dev *scifdev, struct scifmsg *msg) +{ + /* Bogus Node Qp Message? */ + dev_err(&scifdev->sdev->dev, + "Unknown message 0x%xn scifdev->node 0x%x\n", + msg->uop, scifdev->node); +} + +static void (*scif_intr_func[SCIF_MAX_MSG + 1]) + (struct scif_dev *, struct scifmsg *msg) = { + scif_msg_unknown, /* Error */ + scif_init, /* SCIF_INIT */ + scif_exit, /* SCIF_EXIT */ + scif_exit_ack, /* SCIF_EXIT_ACK */ + scif_node_add, /* SCIF_NODE_ADD */ + scif_node_add_ack, /* SCIF_NODE_ADD_ACK */ + scif_node_add_nack, /* SCIF_NODE_ADD_NACK */ + scif_node_remove, /* SCIF_NODE_REMOVE */ + scif_node_remove_ack, /* SCIF_NODE_REMOVE_ACK */ + scif_cnctreq, /* SCIF_CNCT_REQ */ + scif_cnctgnt, /* SCIF_CNCT_GNT */ + scif_cnctgnt_ack, /* SCIF_CNCT_GNTACK */ + scif_cnctgnt_nack, /* SCIF_CNCT_GNTNACK */ + scif_cnctrej, /* SCIF_CNCT_REJ */ + scif_discnct, /* SCIF_DISCNCT */ + scif_discnt_ack, /* SCIF_DISCNT_ACK */ + scif_clientsend, /* SCIF_CLIENT_SENT */ + scif_clientrcvd, /* SCIF_CLIENT_RCVD */ + scif_get_node_info_resp,/* SCIF_GET_NODE_INFO */ + scif_recv_reg, /* SCIF_REGISTER */ + scif_recv_reg_ack, /* SCIF_REGISTER_ACK */ + scif_recv_reg_nack, /* SCIF_REGISTER_NACK */ + scif_recv_unreg, /* SCIF_UNREGISTER */ + scif_recv_unreg_ack, /* SCIF_UNREGISTER_ACK */ + scif_recv_unreg_nack, /* SCIF_UNREGISTER_NACK */ + scif_alloc_req, /* SCIF_ALLOC_REQ */ + scif_alloc_gnt_rej, /* SCIF_ALLOC_GNT */ + scif_alloc_gnt_rej, /* SCIF_ALLOC_REJ */ + scif_free_virt, /* SCIF_FREE_VIRT */ + scif_recv_munmap, /* SCIF_MUNMAP */ + scif_recv_mark, /* SCIF_MARK */ + scif_recv_mark_resp, /* SCIF_MARK_ACK */ + scif_recv_mark_resp, /* SCIF_MARK_NACK */ + scif_recv_wait, /* SCIF_WAIT */ + scif_recv_wait_resp, /* SCIF_WAIT_ACK */ + scif_recv_wait_resp, /* SCIF_WAIT_NACK */ + scif_recv_sig_local, /* SCIF_SIG_LOCAL */ + scif_recv_sig_remote, /* SCIF_SIG_REMOTE */ + scif_recv_sig_resp, /* SCIF_SIG_ACK */ + scif_recv_sig_resp, /* SCIF_SIG_NACK */ +}; + +/** + * scif_nodeqp_msg_handler() - Common handler for node messages + * @scifdev: Remote device to respond to + * @qp: Remote memory pointer + * @msg: The message to be handled. + * + * This routine calls the appropriate routine to handle a Node Qp + * message receipt + */ +static int scif_max_msg_id = SCIF_MAX_MSG; + +static void +scif_nodeqp_msg_handler(struct scif_dev *scifdev, + struct scif_qp *qp, struct scifmsg *msg) +{ + scif_display_message(scifdev, msg, "Rcvd"); + + if (msg->uop > (u32)scif_max_msg_id) { + /* Bogus Node Qp Message? */ + dev_err(&scifdev->sdev->dev, + "Unknown message 0x%xn scifdev->node 0x%x\n", + msg->uop, scifdev->node); + return; + } + + scif_intr_func[msg->uop](scifdev, msg); +} + +/** + * scif_nodeqp_intrhandler() - Interrupt handler for node messages + * @scifdev: Remote device to respond to + * @qp: Remote memory pointer + * + * This routine is triggered by the interrupt mechanism. It reads + * messages from the node queue RB and calls the Node QP Message handling + * routine. + */ +void scif_nodeqp_intrhandler(struct scif_dev *scifdev, struct scif_qp *qp) +{ + struct scifmsg msg; + int read_size; + + do { + read_size = scif_rb_get_next(&qp->inbound_q, &msg, sizeof(msg)); + if (!read_size) + break; + scif_nodeqp_msg_handler(scifdev, qp, &msg); + /* + * The node queue pair is unmapped so skip the read pointer + * update after receipt of a SCIF_EXIT_ACK + */ + if (SCIF_EXIT_ACK == msg.uop) + break; + scif_rb_update_read_ptr(&qp->inbound_q); + } while (1); +} + +/** + * scif_loopb_wq_handler - Loopback Workqueue Handler. + * @work: loop back work + * + * This work queue routine is invoked by the loopback work queue handler. + * It grabs the recv lock, dequeues any available messages from the head + * of the loopback message list, calls the node QP message handler, + * waits for it to return, then frees up this message and dequeues more + * elements of the list if available. + */ +static void scif_loopb_wq_handler(struct work_struct *unused) +{ + struct scif_dev *scifdev = scif_info.loopb_dev; + struct scif_qp *qp = scifdev->qpairs; + struct scif_loopb_msg *msg; + + do { + msg = NULL; + spin_lock(&qp->recv_lock); + if (!list_empty(&scif_info.loopb_recv_q)) { + msg = list_first_entry(&scif_info.loopb_recv_q, + struct scif_loopb_msg, + list); + list_del(&msg->list); + } + spin_unlock(&qp->recv_lock); + + if (msg) { + scif_nodeqp_msg_handler(scifdev, qp, &msg->msg); + kfree(msg); + } + } while (msg); +} + +/** + * scif_loopb_msg_handler() - Workqueue handler for loopback messages. + * @scifdev: SCIF device + * @qp: Queue pair. + * + * This work queue routine is triggered when a loopback message is received. + * + * We need special handling for receiving Node Qp messages on a loopback SCIF + * device via two workqueues for receiving messages. + * + * The reason we need the extra workqueue which is not required with *normal* + * non-loopback SCIF devices is the potential classic deadlock described below: + * + * Thread A tries to send a message on a loopback SCIF device and blocks since + * there is no space in the RB while it has the send_lock held or another + * lock called lock X for example. + * + * Thread B: The Loopback Node QP message receive workqueue receives the message + * and tries to send a message (eg an ACK) to the loopback SCIF device. It tries + * to grab the send lock again or lock X and deadlocks with Thread A. The RB + * cannot be drained any further due to this classic deadlock. + * + * In order to avoid deadlocks as mentioned above we have an extra level of + * indirection achieved by having two workqueues. + * 1) The first workqueue whose handler is scif_loopb_msg_handler reads + * messages from the Node QP RB, adds them to a list and queues work for the + * second workqueue. + * + * 2) The second workqueue whose handler is scif_loopb_wq_handler dequeues + * messages from the list, handles them, frees up the memory and dequeues + * more elements from the list if possible. + */ +int +scif_loopb_msg_handler(struct scif_dev *scifdev, struct scif_qp *qp) +{ + int read_size; + struct scif_loopb_msg *msg; + + do { + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (!msg) + return -ENOMEM; + read_size = scif_rb_get_next(&qp->inbound_q, &msg->msg, + sizeof(struct scifmsg)); + if (read_size != sizeof(struct scifmsg)) { + kfree(msg); + scif_rb_update_read_ptr(&qp->inbound_q); + break; + } + spin_lock(&qp->recv_lock); + list_add_tail(&msg->list, &scif_info.loopb_recv_q); + spin_unlock(&qp->recv_lock); + queue_work(scif_info.loopb_wq, &scif_info.loopb_work); + scif_rb_update_read_ptr(&qp->inbound_q); + } while (read_size == sizeof(struct scifmsg)); + return read_size; +} + +/** + * scif_setup_loopback_qp - One time setup work for Loopback Node Qp. + * @scifdev: SCIF device + * + * Sets up the required loopback workqueues, queue pairs and ring buffers + */ +int scif_setup_loopback_qp(struct scif_dev *scifdev) +{ + int err = 0; + void *local_q; + struct scif_qp *qp; + + err = scif_setup_intr_wq(scifdev); + if (err) + goto exit; + INIT_LIST_HEAD(&scif_info.loopb_recv_q); + snprintf(scif_info.loopb_wqname, sizeof(scif_info.loopb_wqname), + "SCIF LOOPB %d", scifdev->node); + scif_info.loopb_wq = + alloc_ordered_workqueue(scif_info.loopb_wqname, 0); + if (!scif_info.loopb_wq) { + err = -ENOMEM; + goto destroy_intr; + } + INIT_WORK(&scif_info.loopb_work, scif_loopb_wq_handler); + /* Allocate Self Qpair */ + scifdev->qpairs = kzalloc(sizeof(*scifdev->qpairs), GFP_KERNEL); + if (!scifdev->qpairs) { + err = -ENOMEM; + goto destroy_loopb_wq; + } + + qp = scifdev->qpairs; + qp->magic = SCIFEP_MAGIC; + spin_lock_init(&qp->send_lock); + spin_lock_init(&qp->recv_lock); + + local_q = kzalloc(SCIF_NODE_QP_SIZE, GFP_KERNEL); + if (!local_q) { + err = -ENOMEM; + goto free_qpairs; + } + /* + * For loopback the inbound_q and outbound_q are essentially the same + * since the Node sends a message on the loopback interface to the + * outbound_q which is then received on the inbound_q. + */ + scif_rb_init(&qp->outbound_q, + &qp->local_read, + &qp->local_write, + local_q, get_count_order(SCIF_NODE_QP_SIZE)); + + scif_rb_init(&qp->inbound_q, + &qp->local_read, + &qp->local_write, + local_q, get_count_order(SCIF_NODE_QP_SIZE)); + scif_info.nodeid = scifdev->node; + + scif_peer_register_device(scifdev); + + scif_info.loopb_dev = scifdev; + return err; +free_qpairs: + kfree(scifdev->qpairs); +destroy_loopb_wq: + destroy_workqueue(scif_info.loopb_wq); +destroy_intr: + scif_destroy_intr_wq(scifdev); +exit: + return err; +} + +/** + * scif_destroy_loopback_qp - One time uninit work for Loopback Node Qp + * @scifdev: SCIF device + * + * Destroys the workqueues and frees up the Ring Buffer and Queue Pair memory. + */ +int scif_destroy_loopback_qp(struct scif_dev *scifdev) +{ + scif_peer_unregister_device(scifdev); + destroy_workqueue(scif_info.loopb_wq); + scif_destroy_intr_wq(scifdev); + kfree(scifdev->qpairs->outbound_q.rb_base); + kfree(scifdev->qpairs); + scifdev->sdev = NULL; + scif_info.loopb_dev = NULL; + return 0; +} + +void scif_destroy_p2p(struct scif_dev *scifdev) +{ + struct scif_dev *peer_dev; + struct scif_p2p_info *p2p; + struct list_head *pos, *tmp; + int bd; + + mutex_lock(&scif_info.conflock); + /* Free P2P mappings in the given node for all its peer nodes */ + list_for_each_safe(pos, tmp, &scifdev->p2p) { + p2p = list_entry(pos, struct scif_p2p_info, ppi_list); + dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO], + p2p->sg_nentries[SCIF_PPI_MMIO], + DMA_BIDIRECTIONAL); + dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_APER], + p2p->sg_nentries[SCIF_PPI_APER], + DMA_BIDIRECTIONAL); + scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]); + scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]); + list_del(pos); + kfree(p2p); + } + + /* Free P2P mapping created in the peer nodes for the given node */ + for (bd = SCIF_MGMT_NODE + 1; bd <= scif_info.maxid; bd++) { + peer_dev = &scif_dev[bd]; + list_for_each_safe(pos, tmp, &peer_dev->p2p) { + p2p = list_entry(pos, struct scif_p2p_info, ppi_list); + if (p2p->ppi_peer_id == scifdev->node) { + dma_unmap_sg(&peer_dev->sdev->dev, + p2p->ppi_sg[SCIF_PPI_MMIO], + p2p->sg_nentries[SCIF_PPI_MMIO], + DMA_BIDIRECTIONAL); + dma_unmap_sg(&peer_dev->sdev->dev, + p2p->ppi_sg[SCIF_PPI_APER], + p2p->sg_nentries[SCIF_PPI_APER], + DMA_BIDIRECTIONAL); + scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]); + scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]); + list_del(pos); + kfree(p2p); + } + } + } + mutex_unlock(&scif_info.conflock); +} diff --git a/kernel/drivers/misc/mic/scif/scif_nodeqp.h b/kernel/drivers/misc/mic/scif/scif_nodeqp.h new file mode 100644 index 000000000..958962731 --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_nodeqp.h @@ -0,0 +1,221 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Intel SCIF driver. + * + */ +#ifndef SCIF_NODEQP +#define SCIF_NODEQP + +#include "scif_rb.h" +#include "scif_peer_bus.h" + +#define SCIF_INIT 1 /* First message sent to the peer node for discovery */ +#define SCIF_EXIT 2 /* Last message from the peer informing intent to exit */ +#define SCIF_EXIT_ACK 3 /* Response to SCIF_EXIT message */ +#define SCIF_NODE_ADD 4 /* Tell Online nodes a new node exits */ +#define SCIF_NODE_ADD_ACK 5 /* Confirm to mgmt node sequence is finished */ +#define SCIF_NODE_ADD_NACK 6 /* SCIF_NODE_ADD failed */ +#define SCIF_NODE_REMOVE 7 /* Request to deactivate a SCIF node */ +#define SCIF_NODE_REMOVE_ACK 8 /* Response to a SCIF_NODE_REMOVE message */ +#define SCIF_CNCT_REQ 9 /* Phys addr of Request connection to a port */ +#define SCIF_CNCT_GNT 10 /* Phys addr of new Grant connection request */ +#define SCIF_CNCT_GNTACK 11 /* Error type Reject a connection request */ +#define SCIF_CNCT_GNTNACK 12 /* Error type Reject a connection request */ +#define SCIF_CNCT_REJ 13 /* Error type Reject a connection request */ +#define SCIF_DISCNCT 14 /* Notify peer that connection is being terminated */ +#define SCIF_DISCNT_ACK 15 /* Notify peer that connection is being terminated */ +#define SCIF_CLIENT_SENT 16 /* Notify the peer that data has been written */ +#define SCIF_CLIENT_RCVD 17 /* Notify the peer that data has been read */ +#define SCIF_GET_NODE_INFO 18 /* Get current node mask from the mgmt node*/ +#define SCIF_REGISTER 19 /* Tell peer about a new registered window */ +#define SCIF_REGISTER_ACK 20 /* Notify peer about unregistration success */ +#define SCIF_REGISTER_NACK 21 /* Notify peer about registration success */ +#define SCIF_UNREGISTER 22 /* Tell peer about unregistering a window */ +#define SCIF_UNREGISTER_ACK 23 /* Notify peer about registration failure */ +#define SCIF_UNREGISTER_NACK 24 /* Notify peer about unregistration failure */ +#define SCIF_ALLOC_REQ 25 /* Request a mapped buffer */ +#define SCIF_ALLOC_GNT 26 /* Notify peer about allocation success */ +#define SCIF_ALLOC_REJ 27 /* Notify peer about allocation failure */ +#define SCIF_FREE_VIRT 28 /* Free previously allocated virtual memory */ +#define SCIF_MUNMAP 29 /* Acknowledgment for a SCIF_MMAP request */ +#define SCIF_MARK 30 /* SCIF Remote Fence Mark Request */ +#define SCIF_MARK_ACK 31 /* SCIF Remote Fence Mark Success */ +#define SCIF_MARK_NACK 32 /* SCIF Remote Fence Mark Failure */ +#define SCIF_WAIT 33 /* SCIF Remote Fence Wait Request */ +#define SCIF_WAIT_ACK 34 /* SCIF Remote Fence Wait Success */ +#define SCIF_WAIT_NACK 35 /* SCIF Remote Fence Wait Failure */ +#define SCIF_SIG_LOCAL 36 /* SCIF Remote Fence Local Signal Request */ +#define SCIF_SIG_REMOTE 37 /* SCIF Remote Fence Remote Signal Request */ +#define SCIF_SIG_ACK 38 /* SCIF Remote Fence Remote Signal Success */ +#define SCIF_SIG_NACK 39 /* SCIF Remote Fence Remote Signal Failure */ +#define SCIF_MAX_MSG SCIF_SIG_NACK + +/* + * struct scifmsg - Node QP message format + * + * @src: Source information + * @dst: Destination information + * @uop: The message opcode + * @payload: Unique payload format for each message + */ +struct scifmsg { + struct scif_port_id src; + struct scif_port_id dst; + u32 uop; + u64 payload[4]; +} __packed; + +/* + * struct scif_allocmsg - Used with SCIF_ALLOC_REQ to request + * the remote note to allocate memory + * + * phys_addr: Physical address of the buffer + * vaddr: Virtual address of the buffer + * size: Size of the buffer + * state: Current state + * allocwq: wait queue for status + */ +struct scif_allocmsg { + dma_addr_t phys_addr; + unsigned long vaddr; + size_t size; + enum scif_msg_state state; + wait_queue_head_t allocwq; +}; + +/* + * struct scif_qp - Node Queue Pair + * + * Interesting structure -- a little difficult because we can only + * write across the PCIe, so any r/w pointer we need to read is + * local. We only need to read the read pointer on the inbound_q + * and read the write pointer in the outbound_q + * + * @magic: Magic value to ensure the peer sees the QP correctly + * @outbound_q: The outbound ring buffer for sending messages + * @inbound_q: The inbound ring buffer for receiving messages + * @local_write: Local write index + * @local_read: Local read index + * @remote_qp: The remote queue pair + * @local_buf: DMA address of local ring buffer + * @local_qp: DMA address of the local queue pair data structure + * @remote_buf: DMA address of remote ring buffer + * @qp_state: QP state i.e. online or offline used for P2P + * @send_lock: synchronize access to outbound queue + * @recv_lock: Synchronize access to inbound queue + */ +struct scif_qp { + u64 magic; +#define SCIFEP_MAGIC 0x5c1f000000005c1fULL + struct scif_rb outbound_q; + struct scif_rb inbound_q; + + u32 local_write __aligned(64); + u32 local_read __aligned(64); + struct scif_qp *remote_qp; + dma_addr_t local_buf; + dma_addr_t local_qp; + dma_addr_t remote_buf; + u32 qp_state; +#define SCIF_QP_OFFLINE 0xdead +#define SCIF_QP_ONLINE 0xc0de + spinlock_t send_lock; + spinlock_t recv_lock; +}; + +/* + * struct scif_loopb_msg - An element in the loopback Node QP message list. + * + * @msg - The SCIF node QP message + * @list - link in the list of messages + */ +struct scif_loopb_msg { + struct scifmsg msg; + struct list_head list; +}; + +int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg); +int _scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_nodeqp_intrhandler(struct scif_dev *scifdev, struct scif_qp *qp); +int scif_loopb_msg_handler(struct scif_dev *scifdev, struct scif_qp *qp); +int scif_setup_qp(struct scif_dev *scifdev); +int scif_qp_response(phys_addr_t phys, struct scif_dev *dev); +int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset, + int local_size, struct scif_dev *scifdev); +int scif_setup_qp_accept(struct scif_qp *qp, dma_addr_t *qp_offset, + dma_addr_t phys, int local_size, + struct scif_dev *scifdev); +int scif_setup_qp_connect_response(struct scif_dev *scifdev, + struct scif_qp *qp, u64 payload); +int scif_setup_loopback_qp(struct scif_dev *scifdev); +int scif_destroy_loopback_qp(struct scif_dev *scifdev); +void scif_poll_qp_state(struct work_struct *work); +void scif_destroy_p2p(struct scif_dev *scifdev); +void scif_send_exit(struct scif_dev *scifdev); +static inline struct device *scif_get_peer_dev(struct scif_dev *scifdev) +{ + struct scif_peer_dev *spdev; + struct device *spdev_ret; + + rcu_read_lock(); + spdev = rcu_dereference(scifdev->spdev); + if (spdev) + spdev_ret = get_device(&spdev->dev); + else + spdev_ret = ERR_PTR(-ENODEV); + rcu_read_unlock(); + return spdev_ret; +} + +static inline void scif_put_peer_dev(struct device *dev) +{ + put_device(dev); +} +#endif /* SCIF_NODEQP */ diff --git a/kernel/drivers/misc/mic/scif/scif_peer_bus.c b/kernel/drivers/misc/mic/scif/scif_peer_bus.c new file mode 100644 index 000000000..6ffa3bdbd --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_peer_bus.c @@ -0,0 +1,183 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + */ +#include "scif_main.h" +#include "../bus/scif_bus.h" +#include "scif_peer_bus.h" + +static inline struct scif_peer_dev * +dev_to_scif_peer(struct device *dev) +{ + return container_of(dev, struct scif_peer_dev, dev); +} + +struct bus_type scif_peer_bus = { + .name = "scif_peer_bus", +}; + +static void scif_peer_release_dev(struct device *d) +{ + struct scif_peer_dev *sdev = dev_to_scif_peer(d); + struct scif_dev *scifdev = &scif_dev[sdev->dnode]; + + scif_cleanup_scifdev(scifdev); + kfree(sdev); +} + +static int scif_peer_initialize_device(struct scif_dev *scifdev) +{ + struct scif_peer_dev *spdev; + int ret; + + spdev = kzalloc(sizeof(*spdev), GFP_KERNEL); + if (!spdev) { + ret = -ENOMEM; + goto err; + } + + spdev->dev.parent = scifdev->sdev->dev.parent; + spdev->dev.release = scif_peer_release_dev; + spdev->dnode = scifdev->node; + spdev->dev.bus = &scif_peer_bus; + dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode); + + device_initialize(&spdev->dev); + get_device(&spdev->dev); + rcu_assign_pointer(scifdev->spdev, spdev); + + mutex_lock(&scif_info.conflock); + scif_info.total++; + scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid); + mutex_unlock(&scif_info.conflock); + return 0; +err: + dev_err(&scifdev->sdev->dev, + "dnode %d: initialize_device rc %d\n", scifdev->node, ret); + return ret; +} + +static int scif_peer_add_device(struct scif_dev *scifdev) +{ + struct scif_peer_dev *spdev = rcu_dereference(scifdev->spdev); + char pool_name[16]; + int ret; + + ret = device_add(&spdev->dev); + put_device(&spdev->dev); + if (ret) { + dev_err(&scifdev->sdev->dev, + "dnode %d: peer device_add failed\n", scifdev->node); + goto put_spdev; + } + + scnprintf(pool_name, sizeof(pool_name), "scif-%d", spdev->dnode); + scifdev->signal_pool = dmam_pool_create(pool_name, &scifdev->sdev->dev, + sizeof(struct scif_status), 1, + 0); + if (!scifdev->signal_pool) { + dev_err(&scifdev->sdev->dev, + "dnode %d: dmam_pool_create failed\n", scifdev->node); + ret = -ENOMEM; + goto del_spdev; + } + dev_dbg(&spdev->dev, "Added peer dnode %d\n", spdev->dnode); + return 0; +del_spdev: + device_del(&spdev->dev); +put_spdev: + RCU_INIT_POINTER(scifdev->spdev, NULL); + synchronize_rcu(); + put_device(&spdev->dev); + + mutex_lock(&scif_info.conflock); + scif_info.total--; + mutex_unlock(&scif_info.conflock); + return ret; +} + +void scif_add_peer_device(struct work_struct *work) +{ + struct scif_dev *scifdev = container_of(work, struct scif_dev, + peer_add_work); + + scif_peer_add_device(scifdev); +} + +/* + * Peer device registration is split into a device_initialize and a device_add. + * The reason for doing this is as follows: First, peer device registration + * itself cannot be done in the message processing thread and must be delegated + * to another workqueue, otherwise if SCIF client probe, called during peer + * device registration, calls scif_connect(..), it will block the message + * processing thread causing a deadlock. Next, device_initialize is done in the + * "top-half" message processing thread and device_add in the "bottom-half" + * workqueue. If this is not done, SCIF_CNCT_REQ message processing executing + * concurrently with SCIF_INIT message processing is unable to get a reference + * on the peer device, thereby failing the connect request. + */ +void scif_peer_register_device(struct scif_dev *scifdev) +{ + int ret; + + mutex_lock(&scifdev->lock); + ret = scif_peer_initialize_device(scifdev); + if (ret) + goto exit; + schedule_work(&scifdev->peer_add_work); +exit: + mutex_unlock(&scifdev->lock); +} + +int scif_peer_unregister_device(struct scif_dev *scifdev) +{ + struct scif_peer_dev *spdev; + + mutex_lock(&scifdev->lock); + /* Flush work to ensure device register is complete */ + flush_work(&scifdev->peer_add_work); + + /* + * Continue holding scifdev->lock since theoretically unregister_device + * can be called simultaneously from multiple threads + */ + spdev = rcu_dereference(scifdev->spdev); + if (!spdev) { + mutex_unlock(&scifdev->lock); + return -ENODEV; + } + + RCU_INIT_POINTER(scifdev->spdev, NULL); + synchronize_rcu(); + mutex_unlock(&scifdev->lock); + + dev_dbg(&spdev->dev, "Removing peer dnode %d\n", spdev->dnode); + device_unregister(&spdev->dev); + + mutex_lock(&scif_info.conflock); + scif_info.total--; + mutex_unlock(&scif_info.conflock); + return 0; +} + +int scif_peer_bus_init(void) +{ + return bus_register(&scif_peer_bus); +} + +void scif_peer_bus_exit(void) +{ + bus_unregister(&scif_peer_bus); +} diff --git a/kernel/drivers/misc/mic/scif/scif_peer_bus.h b/kernel/drivers/misc/mic/scif/scif_peer_bus.h new file mode 100644 index 000000000..a3b8dd2ed --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_peer_bus.h @@ -0,0 +1,31 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + */ +#ifndef _SCIF_PEER_BUS_H_ +#define _SCIF_PEER_BUS_H_ + +#include <linux/device.h> +#include <linux/mic_common.h> +#include <linux/scif.h> + +struct scif_dev; + +void scif_add_peer_device(struct work_struct *work); +void scif_peer_register_device(struct scif_dev *sdev); +int scif_peer_unregister_device(struct scif_dev *scifdev); +int scif_peer_bus_init(void); +void scif_peer_bus_exit(void); +#endif /* _SCIF_PEER_BUS_H */ diff --git a/kernel/drivers/misc/mic/scif/scif_ports.c b/kernel/drivers/misc/mic/scif/scif_ports.c new file mode 100644 index 000000000..594e18d27 --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_ports.c @@ -0,0 +1,124 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include <linux/idr.h> + +#include "scif_main.h" + +#define SCIF_PORT_COUNT 0x10000 /* Ports available */ + +struct idr scif_ports; + +/* + * struct scif_port - SCIF port information + * + * @ref_cnt - Reference count since there can be multiple endpoints + * created via scif_accept(..) simultaneously using a port. + */ +struct scif_port { + int ref_cnt; +}; + +/** + * __scif_get_port - Reserve a specified port # for SCIF and add it + * to the global list. + * @port : port # to be reserved. + * + * @return : Allocated SCIF port #, or -ENOSPC if port unavailable. + * On memory allocation failure, returns -ENOMEM. + */ +static int __scif_get_port(int start, int end) +{ + int id; + struct scif_port *port = kzalloc(sizeof(*port), GFP_ATOMIC); + + if (!port) + return -ENOMEM; + spin_lock(&scif_info.port_lock); + id = idr_alloc(&scif_ports, port, start, end, GFP_ATOMIC); + if (id >= 0) + port->ref_cnt++; + spin_unlock(&scif_info.port_lock); + return id; +} + +/** + * scif_rsrv_port - Reserve a specified port # for SCIF. + * @port : port # to be reserved. + * + * @return : Allocated SCIF port #, or -ENOSPC if port unavailable. + * On memory allocation failure, returns -ENOMEM. + */ +int scif_rsrv_port(u16 port) +{ + return __scif_get_port(port, port + 1); +} + +/** + * scif_get_new_port - Get and reserve any port # for SCIF in the range + * SCIF_PORT_RSVD + 1 to SCIF_PORT_COUNT - 1. + * + * @return : Allocated SCIF port #, or -ENOSPC if no ports available. + * On memory allocation failure, returns -ENOMEM. + */ +int scif_get_new_port(void) +{ + return __scif_get_port(SCIF_PORT_RSVD + 1, SCIF_PORT_COUNT); +} + +/** + * scif_get_port - Increment the reference count for a SCIF port + * @id : SCIF port + * + * @return : None + */ +void scif_get_port(u16 id) +{ + struct scif_port *port; + + if (!id) + return; + spin_lock(&scif_info.port_lock); + port = idr_find(&scif_ports, id); + if (port) + port->ref_cnt++; + spin_unlock(&scif_info.port_lock); +} + +/** + * scif_put_port - Release a reserved SCIF port + * @id : SCIF port to be released. + * + * @return : None + */ +void scif_put_port(u16 id) +{ + struct scif_port *port; + + if (!id) + return; + spin_lock(&scif_info.port_lock); + port = idr_find(&scif_ports, id); + if (port) { + port->ref_cnt--; + if (!port->ref_cnt) { + idr_remove(&scif_ports, id); + kfree(port); + } + } + spin_unlock(&scif_info.port_lock); +} diff --git a/kernel/drivers/misc/mic/scif/scif_rb.c b/kernel/drivers/misc/mic/scif/scif_rb.c new file mode 100644 index 000000000..637cc4686 --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_rb.c @@ -0,0 +1,249 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include <linux/circ_buf.h> +#include <linux/types.h> +#include <linux/io.h> +#include <linux/errno.h> + +#include "scif_rb.h" + +#define scif_rb_ring_cnt(head, tail, size) CIRC_CNT(head, tail, size) +#define scif_rb_ring_space(head, tail, size) CIRC_SPACE(head, tail, size) + +/** + * scif_rb_init - Initializes the ring buffer + * @rb: ring buffer + * @read_ptr: A pointer to the read offset + * @write_ptr: A pointer to the write offset + * @rb_base: A pointer to the base of the ring buffer + * @size: The size of the ring buffer in powers of two + */ +void scif_rb_init(struct scif_rb *rb, u32 *read_ptr, u32 *write_ptr, + void *rb_base, u8 size) +{ + rb->rb_base = rb_base; + rb->size = (1 << size); + rb->read_ptr = read_ptr; + rb->write_ptr = write_ptr; + rb->current_read_offset = *read_ptr; + rb->current_write_offset = *write_ptr; +} + +/* Copies a message to the ring buffer -- handles the wrap around case */ +static void memcpy_torb(struct scif_rb *rb, void *header, + void *msg, u32 size) +{ + u32 size1, size2; + + if (header + size >= rb->rb_base + rb->size) { + /* Need to call two copies if it wraps around */ + size1 = (u32)(rb->rb_base + rb->size - header); + size2 = size - size1; + memcpy_toio((void __iomem __force *)header, msg, size1); + memcpy_toio((void __iomem __force *)rb->rb_base, + msg + size1, size2); + } else { + memcpy_toio((void __iomem __force *)header, msg, size); + } +} + +/* Copies a message from the ring buffer -- handles the wrap around case */ +static void memcpy_fromrb(struct scif_rb *rb, void *header, + void *msg, u32 size) +{ + u32 size1, size2; + + if (header + size >= rb->rb_base + rb->size) { + /* Need to call two copies if it wraps around */ + size1 = (u32)(rb->rb_base + rb->size - header); + size2 = size - size1; + memcpy_fromio(msg, (void __iomem __force *)header, size1); + memcpy_fromio(msg + size1, + (void __iomem __force *)rb->rb_base, size2); + } else { + memcpy_fromio(msg, (void __iomem __force *)header, size); + } +} + +/** + * scif_rb_space - Query space available for writing to the RB + * @rb: ring buffer + * + * Return: size available for writing to RB in bytes. + */ +u32 scif_rb_space(struct scif_rb *rb) +{ + rb->current_read_offset = *rb->read_ptr; + /* + * Update from the HW read pointer only once the peer has exposed the + * new empty slot. This barrier is paired with the memory barrier + * scif_rb_update_read_ptr() + */ + mb(); + return scif_rb_ring_space(rb->current_write_offset, + rb->current_read_offset, rb->size); +} + +/** + * scif_rb_write - Write a message to the RB + * @rb: ring buffer + * @msg: buffer to send the message. Must be at least size bytes long + * @size: the size (in bytes) to be copied to the RB + * + * This API does not block if there isn't enough space in the RB. + * Returns: 0 on success or -ENOMEM on failure + */ +int scif_rb_write(struct scif_rb *rb, void *msg, u32 size) +{ + void *header; + + if (scif_rb_space(rb) < size) + return -ENOMEM; + header = rb->rb_base + rb->current_write_offset; + memcpy_torb(rb, header, msg, size); + /* + * Wait until scif_rb_commit(). Update the local ring + * buffer data, not the shared data until commit. + */ + rb->current_write_offset = + (rb->current_write_offset + size) & (rb->size - 1); + return 0; +} + +/** + * scif_rb_commit - To submit the message to let the peer fetch it + * @rb: ring buffer + */ +void scif_rb_commit(struct scif_rb *rb) +{ + /* + * We must ensure ordering between the all the data committed + * previously before we expose the new message to the peer by + * updating the write_ptr. This write barrier is paired with + * the read barrier in scif_rb_count(..) + */ + wmb(); + ACCESS_ONCE(*rb->write_ptr) = rb->current_write_offset; +#ifdef CONFIG_INTEL_MIC_CARD + /* + * X100 Si bug: For the case where a Core is performing an EXT_WR + * followed by a Doorbell Write, the Core must perform two EXT_WR to the + * same address with the same data before it does the Doorbell Write. + * This way, if ordering is violated for the Interrupt Message, it will + * fall just behind the first Posted associated with the first EXT_WR. + */ + ACCESS_ONCE(*rb->write_ptr) = rb->current_write_offset; +#endif +} + +/** + * scif_rb_get - To get next message from the ring buffer + * @rb: ring buffer + * @size: Number of bytes to be read + * + * Return: NULL if no bytes to be read from the ring buffer, otherwise the + * pointer to the next byte + */ +static void *scif_rb_get(struct scif_rb *rb, u32 size) +{ + void *header = NULL; + + if (scif_rb_count(rb, size) >= size) + header = rb->rb_base + rb->current_read_offset; + return header; +} + +/* + * scif_rb_get_next - Read from ring buffer. + * @rb: ring buffer + * @msg: buffer to hold the message. Must be at least size bytes long + * @size: Number of bytes to be read + * + * Return: number of bytes read if available bytes are >= size, otherwise + * returns zero. + */ +u32 scif_rb_get_next(struct scif_rb *rb, void *msg, u32 size) +{ + void *header = NULL; + int read_size = 0; + + header = scif_rb_get(rb, size); + if (header) { + u32 next_cmd_offset = + (rb->current_read_offset + size) & (rb->size - 1); + + read_size = size; + rb->current_read_offset = next_cmd_offset; + memcpy_fromrb(rb, header, msg, size); + } + return read_size; +} + +/** + * scif_rb_update_read_ptr + * @rb: ring buffer + */ +void scif_rb_update_read_ptr(struct scif_rb *rb) +{ + u32 new_offset; + + new_offset = rb->current_read_offset; + /* + * We must ensure ordering between the all the data committed or read + * previously before we expose the empty slot to the peer by updating + * the read_ptr. This barrier is paired with the memory barrier in + * scif_rb_space(..) + */ + mb(); + ACCESS_ONCE(*rb->read_ptr) = new_offset; +#ifdef CONFIG_INTEL_MIC_CARD + /* + * X100 Si Bug: For the case where a Core is performing an EXT_WR + * followed by a Doorbell Write, the Core must perform two EXT_WR to the + * same address with the same data before it does the Doorbell Write. + * This way, if ordering is violated for the Interrupt Message, it will + * fall just behind the first Posted associated with the first EXT_WR. + */ + ACCESS_ONCE(*rb->read_ptr) = new_offset; +#endif +} + +/** + * scif_rb_count + * @rb: ring buffer + * @size: Number of bytes expected to be read + * + * Return: number of bytes that can be read from the RB + */ +u32 scif_rb_count(struct scif_rb *rb, u32 size) +{ + if (scif_rb_ring_cnt(rb->current_write_offset, + rb->current_read_offset, + rb->size) < size) { + rb->current_write_offset = *rb->write_ptr; + /* + * Update from the HW write pointer if empty only once the peer + * has exposed the new message. This read barrier is paired + * with the write barrier in scif_rb_commit(..) + */ + smp_rmb(); + } + return scif_rb_ring_cnt(rb->current_write_offset, + rb->current_read_offset, + rb->size); +} diff --git a/kernel/drivers/misc/mic/scif/scif_rb.h b/kernel/drivers/misc/mic/scif/scif_rb.h new file mode 100644 index 000000000..166dffe30 --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_rb.h @@ -0,0 +1,100 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Intel SCIF driver. + */ +#ifndef SCIF_RB_H +#define SCIF_RB_H +/* + * This file describes a general purpose, byte based ring buffer. Writers to the + * ring buffer need to synchronize using a lock. The same is true for readers, + * although in practice, the ring buffer has a single reader. It is lockless + * between producer and consumer so it can handle being used across the PCIe + * bus. The ring buffer ensures that there are no reads across the PCIe bus for + * performance reasons. Two of these are used to form a single bidirectional + * queue-pair across PCIe. + */ +/* + * struct scif_rb - SCIF Ring Buffer + * + * @rb_base: The base of the memory used for storing RB messages + * @read_ptr: Pointer to the read offset + * @write_ptr: Pointer to the write offset + * @size: Size of the memory in rb_base + * @current_read_offset: Cached read offset for performance + * @current_write_offset: Cached write offset for performance + */ +struct scif_rb { + void *rb_base; + u32 *read_ptr; + u32 *write_ptr; + u32 size; + u32 current_read_offset; + u32 current_write_offset; +}; + +/* methods used by both */ +void scif_rb_init(struct scif_rb *rb, u32 *read_ptr, u32 *write_ptr, + void *rb_base, u8 size); +/* writer only methods */ +/* write a new command, then scif_rb_commit() */ +int scif_rb_write(struct scif_rb *rb, void *msg, u32 size); +/* after write(), then scif_rb_commit() */ +void scif_rb_commit(struct scif_rb *rb); +/* query space available for writing to a RB. */ +u32 scif_rb_space(struct scif_rb *rb); + +/* reader only methods */ +/* read a new message from the ring buffer of size bytes */ +u32 scif_rb_get_next(struct scif_rb *rb, void *msg, u32 size); +/* update the read pointer so that the space can be reused */ +void scif_rb_update_read_ptr(struct scif_rb *rb); +/* count the number of bytes that can be read */ +u32 scif_rb_count(struct scif_rb *rb, u32 size); +#endif diff --git a/kernel/drivers/misc/mic/scif/scif_rma.c b/kernel/drivers/misc/mic/scif/scif_rma.c new file mode 100644 index 000000000..8310b4dbf --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_rma.c @@ -0,0 +1,1775 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include <linux/dma_remapping.h> +#include <linux/pagemap.h> +#include "scif_main.h" +#include "scif_map.h" + +/* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */ +#define SCIF_MAP_ULIMIT 0x40 + +bool scif_ulimit_check = 1; + +/** + * scif_rma_ep_init: + * @ep: end point + * + * Initialize RMA per EP data structures. + */ +void scif_rma_ep_init(struct scif_endpt *ep) +{ + struct scif_endpt_rma_info *rma = &ep->rma_info; + + mutex_init(&rma->rma_lock); + init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN, + SCIF_DMA_64BIT_PFN); + spin_lock_init(&rma->tc_lock); + mutex_init(&rma->mmn_lock); + INIT_LIST_HEAD(&rma->reg_list); + INIT_LIST_HEAD(&rma->remote_reg_list); + atomic_set(&rma->tw_refcount, 0); + atomic_set(&rma->tcw_refcount, 0); + atomic_set(&rma->tcw_total_pages, 0); + atomic_set(&rma->fence_refcount, 0); + + rma->async_list_del = 0; + rma->dma_chan = NULL; + INIT_LIST_HEAD(&rma->mmn_list); + INIT_LIST_HEAD(&rma->vma_list); + init_waitqueue_head(&rma->markwq); +} + +/** + * scif_rma_ep_can_uninit: + * @ep: end point + * + * Returns 1 if an endpoint can be uninitialized and 0 otherwise. + */ +int scif_rma_ep_can_uninit(struct scif_endpt *ep) +{ + int ret = 0; + + mutex_lock(&ep->rma_info.rma_lock); + /* Destroy RMA Info only if both lists are empty */ + if (list_empty(&ep->rma_info.reg_list) && + list_empty(&ep->rma_info.remote_reg_list) && + list_empty(&ep->rma_info.mmn_list) && + !atomic_read(&ep->rma_info.tw_refcount) && + !atomic_read(&ep->rma_info.tcw_refcount) && + !atomic_read(&ep->rma_info.fence_refcount)) + ret = 1; + mutex_unlock(&ep->rma_info.rma_lock); + return ret; +} + +/** + * scif_create_pinned_pages: + * @nr_pages: number of pages in window + * @prot: read/write protection + * + * Allocate and prepare a set of pinned pages. + */ +static struct scif_pinned_pages * +scif_create_pinned_pages(int nr_pages, int prot) +{ + struct scif_pinned_pages *pin; + + might_sleep(); + pin = scif_zalloc(sizeof(*pin)); + if (!pin) + goto error; + + pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages)); + if (!pin->pages) + goto error_free_pinned_pages; + + pin->prot = prot; + pin->magic = SCIFEP_MAGIC; + return pin; + +error_free_pinned_pages: + scif_free(pin, sizeof(*pin)); +error: + return NULL; +} + +/** + * scif_destroy_pinned_pages: + * @pin: A set of pinned pages. + * + * Deallocate resources for pinned pages. + */ +static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin) +{ + int j; + int writeable = pin->prot & SCIF_PROT_WRITE; + int kernel = SCIF_MAP_KERNEL & pin->map_flags; + + for (j = 0; j < pin->nr_pages; j++) { + if (pin->pages[j] && !kernel) { + if (writeable) + SetPageDirty(pin->pages[j]); + put_page(pin->pages[j]); + } + } + + scif_free(pin->pages, + pin->nr_pages * sizeof(*pin->pages)); + scif_free(pin, sizeof(*pin)); + return 0; +} + +/* + * scif_create_window: + * @ep: end point + * @nr_pages: number of pages + * @offset: registration offset + * @temp: true if a temporary window is being created + * + * Allocate and prepare a self registration window. + */ +struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages, + s64 offset, bool temp) +{ + struct scif_window *window; + + might_sleep(); + window = scif_zalloc(sizeof(*window)); + if (!window) + goto error; + + window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr)); + if (!window->dma_addr) + goto error_free_window; + + window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages)); + if (!window->num_pages) + goto error_free_window; + + window->offset = offset; + window->ep = (u64)ep; + window->magic = SCIFEP_MAGIC; + window->reg_state = OP_IDLE; + init_waitqueue_head(&window->regwq); + window->unreg_state = OP_IDLE; + init_waitqueue_head(&window->unregwq); + INIT_LIST_HEAD(&window->list); + window->type = SCIF_WINDOW_SELF; + window->temp = temp; + return window; + +error_free_window: + scif_free(window->dma_addr, + nr_pages * sizeof(*window->dma_addr)); + scif_free(window, sizeof(*window)); +error: + return NULL; +} + +/** + * scif_destroy_incomplete_window: + * @ep: end point + * @window: registration window + * + * Deallocate resources for self window. + */ +static void scif_destroy_incomplete_window(struct scif_endpt *ep, + struct scif_window *window) +{ + int err; + int nr_pages = window->nr_pages; + struct scif_allocmsg *alloc = &window->alloc_handle; + struct scifmsg msg; + +retry: + /* Wait for a SCIF_ALLOC_GNT/REJ message */ + err = wait_event_timeout(alloc->allocwq, + alloc->state != OP_IN_PROGRESS, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err && scifdev_alive(ep)) + goto retry; + + mutex_lock(&ep->rma_info.rma_lock); + if (alloc->state == OP_COMPLETED) { + msg.uop = SCIF_FREE_VIRT; + msg.src = ep->port; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = window->alloc_handle.vaddr; + msg.payload[2] = (u64)window; + msg.payload[3] = SCIF_REGISTER; + _scif_nodeqp_send(ep->remote_dev, &msg); + } + mutex_unlock(&ep->rma_info.rma_lock); + + scif_free_window_offset(ep, window, window->offset); + scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr)); + scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages)); + scif_free(window, sizeof(*window)); +} + +/** + * scif_unmap_window: + * @remote_dev: SCIF remote device + * @window: registration window + * + * Delete any DMA mappings created for a registered self window + */ +void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window) +{ + int j; + + if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) { + if (window->st) { + dma_unmap_sg(&remote_dev->sdev->dev, + window->st->sgl, window->st->nents, + DMA_BIDIRECTIONAL); + sg_free_table(window->st); + kfree(window->st); + window->st = NULL; + } + } else { + for (j = 0; j < window->nr_contig_chunks; j++) { + if (window->dma_addr[j]) { + scif_unmap_single(window->dma_addr[j], + remote_dev, + window->num_pages[j] << + PAGE_SHIFT); + window->dma_addr[j] = 0x0; + } + } + } +} + +static inline struct mm_struct *__scif_acquire_mm(void) +{ + if (scif_ulimit_check) + return get_task_mm(current); + return NULL; +} + +static inline void __scif_release_mm(struct mm_struct *mm) +{ + if (mm) + mmput(mm); +} + +static inline int +__scif_dec_pinned_vm_lock(struct mm_struct *mm, + int nr_pages, bool try_lock) +{ + if (!mm || !nr_pages || !scif_ulimit_check) + return 0; + if (try_lock) { + if (!down_write_trylock(&mm->mmap_sem)) { + dev_err(scif_info.mdev.this_device, + "%s %d err\n", __func__, __LINE__); + return -1; + } + } else { + down_write(&mm->mmap_sem); + } + mm->pinned_vm -= nr_pages; + up_write(&mm->mmap_sem); + return 0; +} + +static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm, + int nr_pages) +{ + unsigned long locked, lock_limit; + + if (!mm || !nr_pages || !scif_ulimit_check) + return 0; + + locked = nr_pages; + locked += mm->pinned_vm; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { + dev_err(scif_info.mdev.this_device, + "locked(%lu) > lock_limit(%lu)\n", + locked, lock_limit); + return -ENOMEM; + } + mm->pinned_vm = locked; + return 0; +} + +/** + * scif_destroy_window: + * @ep: end point + * @window: registration window + * + * Deallocate resources for self window. + */ +int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window) +{ + int j; + struct scif_pinned_pages *pinned_pages = window->pinned_pages; + int nr_pages = window->nr_pages; + + might_sleep(); + if (!window->temp && window->mm) { + __scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0); + __scif_release_mm(window->mm); + window->mm = NULL; + } + + scif_free_window_offset(ep, window, window->offset); + scif_unmap_window(ep->remote_dev, window); + /* + * Decrement references for this set of pinned pages from + * this window. + */ + j = atomic_sub_return(1, &pinned_pages->ref_count); + if (j < 0) + dev_err(scif_info.mdev.this_device, + "%s %d incorrect ref count %d\n", + __func__, __LINE__, j); + /* + * If the ref count for pinned_pages is zero then someone + * has already called scif_unpin_pages() for it and we should + * destroy the page cache. + */ + if (!j) + scif_destroy_pinned_pages(window->pinned_pages); + scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr)); + scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages)); + window->magic = 0; + scif_free(window, sizeof(*window)); + return 0; +} + +/** + * scif_create_remote_lookup: + * @remote_dev: SCIF remote device + * @window: remote window + * + * Allocate and prepare lookup entries for the remote + * end to copy over the physical addresses. + * Returns 0 on success and appropriate errno on failure. + */ +static int scif_create_remote_lookup(struct scif_dev *remote_dev, + struct scif_window *window) +{ + int i, j, err = 0; + int nr_pages = window->nr_pages; + bool vmalloc_dma_phys, vmalloc_num_pages; + + might_sleep(); + /* Map window */ + err = scif_map_single(&window->mapped_offset, + window, remote_dev, sizeof(*window)); + if (err) + goto error_window; + + /* Compute the number of lookup entries. 21 == 2MB Shift */ + window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE, + ((2) * 1024 * 1024)) >> 21; + + window->dma_addr_lookup.lookup = + scif_alloc_coherent(&window->dma_addr_lookup.offset, + remote_dev, window->nr_lookup * + sizeof(*window->dma_addr_lookup.lookup), + GFP_KERNEL | __GFP_ZERO); + if (!window->dma_addr_lookup.lookup) { + err = -ENOMEM; + goto error_window; + } + + window->num_pages_lookup.lookup = + scif_alloc_coherent(&window->num_pages_lookup.offset, + remote_dev, window->nr_lookup * + sizeof(*window->num_pages_lookup.lookup), + GFP_KERNEL | __GFP_ZERO); + if (!window->num_pages_lookup.lookup) { + err = -ENOMEM; + goto error_window; + } + + vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]); + vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]); + + /* Now map each of the pages containing physical addresses */ + for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) { + err = scif_map_page(&window->dma_addr_lookup.lookup[j], + vmalloc_dma_phys ? + vmalloc_to_page(&window->dma_addr[i]) : + virt_to_page(&window->dma_addr[i]), + remote_dev); + if (err) + goto error_window; + err = scif_map_page(&window->num_pages_lookup.lookup[j], + vmalloc_dma_phys ? + vmalloc_to_page(&window->num_pages[i]) : + virt_to_page(&window->num_pages[i]), + remote_dev); + if (err) + goto error_window; + } + return 0; +error_window: + return err; +} + +/** + * scif_destroy_remote_lookup: + * @remote_dev: SCIF remote device + * @window: remote window + * + * Destroy lookup entries used for the remote + * end to copy over the physical addresses. + */ +static void scif_destroy_remote_lookup(struct scif_dev *remote_dev, + struct scif_window *window) +{ + int i, j; + + if (window->nr_lookup) { + struct scif_rma_lookup *lup = &window->dma_addr_lookup; + struct scif_rma_lookup *npup = &window->num_pages_lookup; + + for (i = 0, j = 0; i < window->nr_pages; + i += SCIF_NR_ADDR_IN_PAGE, j++) { + if (lup->lookup && lup->lookup[j]) + scif_unmap_single(lup->lookup[j], + remote_dev, + PAGE_SIZE); + if (npup->lookup && npup->lookup[j]) + scif_unmap_single(npup->lookup[j], + remote_dev, + PAGE_SIZE); + } + if (lup->lookup) + scif_free_coherent(lup->lookup, lup->offset, + remote_dev, window->nr_lookup * + sizeof(*lup->lookup)); + if (npup->lookup) + scif_free_coherent(npup->lookup, npup->offset, + remote_dev, window->nr_lookup * + sizeof(*npup->lookup)); + if (window->mapped_offset) + scif_unmap_single(window->mapped_offset, + remote_dev, sizeof(*window)); + window->nr_lookup = 0; + } +} + +/** + * scif_create_remote_window: + * @ep: end point + * @nr_pages: number of pages in window + * + * Allocate and prepare a remote registration window. + */ +static struct scif_window * +scif_create_remote_window(struct scif_dev *scifdev, int nr_pages) +{ + struct scif_window *window; + + might_sleep(); + window = scif_zalloc(sizeof(*window)); + if (!window) + goto error_ret; + + window->magic = SCIFEP_MAGIC; + window->nr_pages = nr_pages; + + window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr)); + if (!window->dma_addr) + goto error_window; + + window->num_pages = scif_zalloc(nr_pages * + sizeof(*window->num_pages)); + if (!window->num_pages) + goto error_window; + + if (scif_create_remote_lookup(scifdev, window)) + goto error_window; + + window->type = SCIF_WINDOW_PEER; + window->unreg_state = OP_IDLE; + INIT_LIST_HEAD(&window->list); + return window; +error_window: + scif_destroy_remote_window(window); +error_ret: + return NULL; +} + +/** + * scif_destroy_remote_window: + * @ep: end point + * @window: remote registration window + * + * Deallocate resources for remote window. + */ +void +scif_destroy_remote_window(struct scif_window *window) +{ + scif_free(window->dma_addr, window->nr_pages * + sizeof(*window->dma_addr)); + scif_free(window->num_pages, window->nr_pages * + sizeof(*window->num_pages)); + window->magic = 0; + scif_free(window, sizeof(*window)); +} + +/** + * scif_iommu_map: create DMA mappings if the IOMMU is enabled + * @remote_dev: SCIF remote device + * @window: remote registration window + * + * Map the physical pages using dma_map_sg(..) and then detect the number + * of contiguous DMA mappings allocated + */ +static int scif_iommu_map(struct scif_dev *remote_dev, + struct scif_window *window) +{ + struct scatterlist *sg; + int i, err; + scif_pinned_pages_t pin = window->pinned_pages; + + window->st = kzalloc(sizeof(*window->st), GFP_KERNEL); + if (!window->st) + return -ENOMEM; + + err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL); + if (err) + return err; + + for_each_sg(window->st->sgl, sg, window->st->nents, i) + sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0); + + err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl, + window->st->nents, DMA_BIDIRECTIONAL); + if (!err) + return -ENOMEM; + /* Detect contiguous ranges of DMA mappings */ + sg = window->st->sgl; + for (i = 0; sg; i++) { + dma_addr_t last_da; + + window->dma_addr[i] = sg_dma_address(sg); + window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT; + last_da = sg_dma_address(sg) + sg_dma_len(sg); + while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) { + window->num_pages[i] += + (sg_dma_len(sg) >> PAGE_SHIFT); + last_da = window->dma_addr[i] + + sg_dma_len(sg); + } + window->nr_contig_chunks++; + } + return 0; +} + +/** + * scif_map_window: + * @remote_dev: SCIF remote device + * @window: self registration window + * + * Map pages of a window into the aperture/PCI. + * Also determine addresses required for DMA. + */ +int +scif_map_window(struct scif_dev *remote_dev, struct scif_window *window) +{ + int i, j, k, err = 0, nr_contig_pages; + scif_pinned_pages_t pin; + phys_addr_t phys_prev, phys_curr; + + might_sleep(); + + pin = window->pinned_pages; + + if (intel_iommu_enabled && !scifdev_self(remote_dev)) + return scif_iommu_map(remote_dev, window); + + for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) { + phys_prev = page_to_phys(pin->pages[i]); + nr_contig_pages = 1; + + /* Detect physically contiguous chunks */ + for (k = i + 1; k < window->nr_pages; k++) { + phys_curr = page_to_phys(pin->pages[k]); + if (phys_curr != (phys_prev + PAGE_SIZE)) + break; + phys_prev = phys_curr; + nr_contig_pages++; + } + window->num_pages[j] = nr_contig_pages; + window->nr_contig_chunks++; + if (scif_is_mgmt_node()) { + /* + * Management node has to deal with SMPT on X100 and + * hence the DMA mapping is required + */ + err = scif_map_single(&window->dma_addr[j], + phys_to_virt(page_to_phys( + pin->pages[i])), + remote_dev, + nr_contig_pages << PAGE_SHIFT); + if (err) + return err; + } else { + window->dma_addr[j] = page_to_phys(pin->pages[i]); + } + } + return err; +} + +/** + * scif_send_scif_unregister: + * @ep: end point + * @window: self registration window + * + * Send a SCIF_UNREGISTER message. + */ +static int scif_send_scif_unregister(struct scif_endpt *ep, + struct scif_window *window) +{ + struct scifmsg msg; + + msg.uop = SCIF_UNREGISTER; + msg.src = ep->port; + msg.payload[0] = window->alloc_handle.vaddr; + msg.payload[1] = (u64)window; + return scif_nodeqp_send(ep->remote_dev, &msg); +} + +/** + * scif_unregister_window: + * @window: self registration window + * + * Send an unregistration request and wait for a response. + */ +int scif_unregister_window(struct scif_window *window) +{ + int err = 0; + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + bool send_msg = false; + + might_sleep(); + switch (window->unreg_state) { + case OP_IDLE: + { + window->unreg_state = OP_IN_PROGRESS; + send_msg = true; + /* fall through */ + } + case OP_IN_PROGRESS: + { + scif_get_window(window, 1); + mutex_unlock(&ep->rma_info.rma_lock); + if (send_msg) { + err = scif_send_scif_unregister(ep, window); + if (err) { + window->unreg_state = OP_COMPLETED; + goto done; + } + } else { + /* Return ENXIO since unregistration is in progress */ + mutex_lock(&ep->rma_info.rma_lock); + return -ENXIO; + } +retry: + /* Wait for a SCIF_UNREGISTER_(N)ACK message */ + err = wait_event_timeout(window->unregwq, + window->unreg_state != OP_IN_PROGRESS, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err && scifdev_alive(ep)) + goto retry; + if (!err) { + err = -ENODEV; + window->unreg_state = OP_COMPLETED; + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + } + if (err > 0) + err = 0; +done: + mutex_lock(&ep->rma_info.rma_lock); + scif_put_window(window, 1); + break; + } + case OP_FAILED: + { + if (!scifdev_alive(ep)) { + err = -ENODEV; + window->unreg_state = OP_COMPLETED; + } + break; + } + case OP_COMPLETED: + break; + default: + err = -ENODEV; + } + + if (window->unreg_state == OP_COMPLETED && window->ref_count) + scif_put_window(window, window->nr_pages); + + if (!window->ref_count) { + atomic_inc(&ep->rma_info.tw_refcount); + list_del_init(&window->list); + scif_free_window_offset(ep, window, window->offset); + mutex_unlock(&ep->rma_info.rma_lock); + if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) && + scifdev_alive(ep)) { + scif_drain_dma_intr(ep->remote_dev->sdev, + ep->rma_info.dma_chan); + } else { + if (!__scif_dec_pinned_vm_lock(window->mm, + window->nr_pages, 1)) { + __scif_release_mm(window->mm); + window->mm = NULL; + } + } + scif_queue_for_cleanup(window, &scif_info.rma); + mutex_lock(&ep->rma_info.rma_lock); + } + return err; +} + +/** + * scif_send_alloc_request: + * @ep: end point + * @window: self registration window + * + * Send a remote window allocation request + */ +static int scif_send_alloc_request(struct scif_endpt *ep, + struct scif_window *window) +{ + struct scifmsg msg; + struct scif_allocmsg *alloc = &window->alloc_handle; + + /* Set up the Alloc Handle */ + alloc->state = OP_IN_PROGRESS; + init_waitqueue_head(&alloc->allocwq); + + /* Send out an allocation request */ + msg.uop = SCIF_ALLOC_REQ; + msg.payload[1] = window->nr_pages; + msg.payload[2] = (u64)&window->alloc_handle; + return _scif_nodeqp_send(ep->remote_dev, &msg); +} + +/** + * scif_prep_remote_window: + * @ep: end point + * @window: self registration window + * + * Send a remote window allocation request, wait for an allocation response, + * and prepares the remote window by copying over the page lists + */ +static int scif_prep_remote_window(struct scif_endpt *ep, + struct scif_window *window) +{ + struct scifmsg msg; + struct scif_window *remote_window; + struct scif_allocmsg *alloc = &window->alloc_handle; + dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1; + int i = 0, j = 0; + int nr_contig_chunks, loop_nr_contig_chunks; + int remaining_nr_contig_chunks, nr_lookup; + int err, map_err; + + map_err = scif_map_window(ep->remote_dev, window); + if (map_err) + dev_err(&ep->remote_dev->sdev->dev, + "%s %d map_err %d\n", __func__, __LINE__, map_err); + remaining_nr_contig_chunks = window->nr_contig_chunks; + nr_contig_chunks = window->nr_contig_chunks; +retry: + /* Wait for a SCIF_ALLOC_GNT/REJ message */ + err = wait_event_timeout(alloc->allocwq, + alloc->state != OP_IN_PROGRESS, + SCIF_NODE_ALIVE_TIMEOUT); + mutex_lock(&ep->rma_info.rma_lock); + /* Synchronize with the thread waking up allocwq */ + mutex_unlock(&ep->rma_info.rma_lock); + if (!err && scifdev_alive(ep)) + goto retry; + + if (!err) + err = -ENODEV; + + if (err > 0) + err = 0; + else + return err; + + /* Bail out. The remote end rejected this request */ + if (alloc->state == OP_FAILED) + return -ENOMEM; + + if (map_err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, map_err); + msg.uop = SCIF_FREE_VIRT; + msg.src = ep->port; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = window->alloc_handle.vaddr; + msg.payload[2] = (u64)window; + msg.payload[3] = SCIF_REGISTER; + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) + err = _scif_nodeqp_send(ep->remote_dev, &msg); + else + err = -ENOTCONN; + spin_unlock(&ep->lock); + return err; + } + + remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window), + ep->remote_dev); + + /* Compute the number of lookup entries. 21 == 2MB Shift */ + nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE) + >> ilog2(SCIF_NR_ADDR_IN_PAGE); + + dma_phys_lookup = + scif_ioremap(remote_window->dma_addr_lookup.offset, + nr_lookup * + sizeof(*remote_window->dma_addr_lookup.lookup), + ep->remote_dev); + num_pages_lookup = + scif_ioremap(remote_window->num_pages_lookup.offset, + nr_lookup * + sizeof(*remote_window->num_pages_lookup.lookup), + ep->remote_dev); + + while (remaining_nr_contig_chunks) { + loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks, + (int)SCIF_NR_ADDR_IN_PAGE); + /* #1/2 - Copy physical addresses over to the remote side */ + + /* #2/2 - Copy DMA addresses (addresses that are fed into the + * DMA engine) We transfer bus addresses which are then + * converted into a MIC physical address on the remote + * side if it is a MIC, if the remote node is a mgmt node we + * transfer the MIC physical address + */ + tmp = scif_ioremap(dma_phys_lookup[j], + loop_nr_contig_chunks * + sizeof(*window->dma_addr), + ep->remote_dev); + tmp1 = scif_ioremap(num_pages_lookup[j], + loop_nr_contig_chunks * + sizeof(*window->num_pages), + ep->remote_dev); + if (scif_is_mgmt_node()) { + memcpy_toio((void __force __iomem *)tmp, + &window->dma_addr[i], loop_nr_contig_chunks + * sizeof(*window->dma_addr)); + memcpy_toio((void __force __iomem *)tmp1, + &window->num_pages[i], loop_nr_contig_chunks + * sizeof(*window->num_pages)); + } else { + if (scifdev_is_p2p(ep->remote_dev)) { + /* + * add remote node's base address for this node + * to convert it into a MIC address + */ + int m; + dma_addr_t dma_addr; + + for (m = 0; m < loop_nr_contig_chunks; m++) { + dma_addr = window->dma_addr[i + m] + + ep->remote_dev->base_addr; + writeq(dma_addr, + (void __force __iomem *)&tmp[m]); + } + memcpy_toio((void __force __iomem *)tmp1, + &window->num_pages[i], + loop_nr_contig_chunks + * sizeof(*window->num_pages)); + } else { + /* Mgmt node or loopback - transfer DMA + * addresses as is, this is the same as a + * MIC physical address (we use the dma_addr + * and not the phys_addr array since the + * phys_addr is only setup if there is a mmap() + * request from the mgmt node) + */ + memcpy_toio((void __force __iomem *)tmp, + &window->dma_addr[i], + loop_nr_contig_chunks * + sizeof(*window->dma_addr)); + memcpy_toio((void __force __iomem *)tmp1, + &window->num_pages[i], + loop_nr_contig_chunks * + sizeof(*window->num_pages)); + } + } + remaining_nr_contig_chunks -= loop_nr_contig_chunks; + i += loop_nr_contig_chunks; + j++; + scif_iounmap(tmp, loop_nr_contig_chunks * + sizeof(*window->dma_addr), ep->remote_dev); + scif_iounmap(tmp1, loop_nr_contig_chunks * + sizeof(*window->num_pages), ep->remote_dev); + } + + /* Prepare the remote window for the peer */ + remote_window->peer_window = (u64)window; + remote_window->offset = window->offset; + remote_window->prot = window->prot; + remote_window->nr_contig_chunks = nr_contig_chunks; + remote_window->ep = ep->remote_ep; + scif_iounmap(num_pages_lookup, + nr_lookup * + sizeof(*remote_window->num_pages_lookup.lookup), + ep->remote_dev); + scif_iounmap(dma_phys_lookup, + nr_lookup * + sizeof(*remote_window->dma_addr_lookup.lookup), + ep->remote_dev); + scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev); + window->peer_window = alloc->vaddr; + return err; +} + +/** + * scif_send_scif_register: + * @ep: end point + * @window: self registration window + * + * Send a SCIF_REGISTER message if EP is connected and wait for a + * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT + * message so that the peer can free its remote window allocated earlier. + */ +static int scif_send_scif_register(struct scif_endpt *ep, + struct scif_window *window) +{ + int err = 0; + struct scifmsg msg; + + msg.src = ep->port; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = window->alloc_handle.vaddr; + msg.payload[2] = (u64)window; + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) { + msg.uop = SCIF_REGISTER; + window->reg_state = OP_IN_PROGRESS; + err = _scif_nodeqp_send(ep->remote_dev, &msg); + spin_unlock(&ep->lock); + if (!err) { +retry: + /* Wait for a SCIF_REGISTER_(N)ACK message */ + err = wait_event_timeout(window->regwq, + window->reg_state != + OP_IN_PROGRESS, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err && scifdev_alive(ep)) + goto retry; + err = !err ? -ENODEV : 0; + if (window->reg_state == OP_FAILED) + err = -ENOTCONN; + } + } else { + msg.uop = SCIF_FREE_VIRT; + msg.payload[3] = SCIF_REGISTER; + err = _scif_nodeqp_send(ep->remote_dev, &msg); + spin_unlock(&ep->lock); + if (!err) + err = -ENOTCONN; + } + return err; +} + +/** + * scif_get_window_offset: + * @ep: end point descriptor + * @flags: flags + * @offset: offset hint + * @num_pages: number of pages + * @out_offset: computed offset returned by reference. + * + * Compute/Claim a new offset for this EP. + */ +int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset, + int num_pages, s64 *out_offset) +{ + s64 page_index; + struct iova *iova_ptr; + int err = 0; + + if (flags & SCIF_MAP_FIXED) { + page_index = SCIF_IOVA_PFN(offset); + iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index, + page_index + num_pages - 1); + if (!iova_ptr) + err = -EADDRINUSE; + } else { + iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages, + SCIF_DMA_63BIT_PFN - 1, 0); + if (!iova_ptr) + err = -ENOMEM; + } + if (!err) + *out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT; + return err; +} + +/** + * scif_free_window_offset: + * @ep: end point descriptor + * @window: registration window + * @offset: Offset to be freed + * + * Free offset for this EP. The callee is supposed to grab + * the RMA mutex before calling this API. + */ +void scif_free_window_offset(struct scif_endpt *ep, + struct scif_window *window, s64 offset) +{ + if ((window && !window->offset_freed) || !window) { + free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT); + if (window) + window->offset_freed = true; + } +} + +/** + * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message + * @msg: Interrupt message + * + * Remote side is requesting a memory allocation. + */ +void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg) +{ + int err; + struct scif_window *window = NULL; + int nr_pages = msg->payload[1]; + + window = scif_create_remote_window(scifdev, nr_pages); + if (!window) { + err = -ENOMEM; + goto error; + } + + /* The peer's allocation request is granted */ + msg->uop = SCIF_ALLOC_GNT; + msg->payload[0] = (u64)window; + msg->payload[1] = window->mapped_offset; + err = scif_nodeqp_send(scifdev, msg); + if (err) + scif_destroy_remote_window(window); + return; +error: + /* The peer's allocation request is rejected */ + dev_err(&scifdev->sdev->dev, + "%s %d error %d alloc_ptr %p nr_pages 0x%x\n", + __func__, __LINE__, err, window, nr_pages); + msg->uop = SCIF_ALLOC_REJ; + scif_nodeqp_send(scifdev, msg); +} + +/** + * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message + * @msg: Interrupt message + * + * Remote side responded to a memory allocation. + */ +void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2]; + struct scif_window *window = container_of(handle, struct scif_window, + alloc_handle); + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + + mutex_lock(&ep->rma_info.rma_lock); + handle->vaddr = msg->payload[0]; + handle->phys_addr = msg->payload[1]; + if (msg->uop == SCIF_ALLOC_GNT) + handle->state = OP_COMPLETED; + else + handle->state = OP_FAILED; + wake_up(&handle->allocwq); + mutex_unlock(&ep->rma_info.rma_lock); +} + +/** + * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message + * @msg: Interrupt message + * + * Free up memory kmalloc'd earlier. + */ +void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_window *window = (struct scif_window *)msg->payload[1]; + + scif_destroy_remote_window(window); +} + +static void +scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window) +{ + int j; + struct scif_hw_dev *sdev = dev->sdev; + phys_addr_t apt_base = 0; + + /* + * Add the aperture base if the DMA address is not card relative + * since the DMA addresses need to be an offset into the bar + */ + if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER && + sdev->aper && !sdev->card_rel_da) + apt_base = sdev->aper->pa; + else + return; + + for (j = 0; j < window->nr_contig_chunks; j++) { + if (window->num_pages[j]) + window->dma_addr[j] += apt_base; + else + break; + } +} + +/** + * scif_recv_reg: Respond to SCIF_REGISTER interrupt message + * @msg: Interrupt message + * + * Update remote window list with a new registered window. + */ +void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + struct scif_window *window = + (struct scif_window *)msg->payload[1]; + + mutex_lock(&ep->rma_info.rma_lock); + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) { + msg->uop = SCIF_REGISTER_ACK; + scif_nodeqp_send(ep->remote_dev, msg); + scif_fixup_aper_base(ep->remote_dev, window); + /* No further failures expected. Insert new window */ + scif_insert_window(window, &ep->rma_info.remote_reg_list); + } else { + msg->uop = SCIF_REGISTER_NACK; + scif_nodeqp_send(ep->remote_dev, msg); + } + spin_unlock(&ep->lock); + mutex_unlock(&ep->rma_info.rma_lock); + /* free up any lookup resources now that page lists are transferred */ + scif_destroy_remote_lookup(ep->remote_dev, window); + /* + * We could not insert the window but we need to + * destroy the window. + */ + if (msg->uop == SCIF_REGISTER_NACK) + scif_destroy_remote_window(window); +} + +/** + * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message + * @msg: Interrupt message + * + * Remove window from remote registration list; + */ +void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_rma_req req; + struct scif_window *window = NULL; + struct scif_window *recv_window = + (struct scif_window *)msg->payload[0]; + struct scif_endpt *ep; + int del_window = 0; + + ep = (struct scif_endpt *)recv_window->ep; + req.out_window = &window; + req.offset = recv_window->offset; + req.prot = 0; + req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT; + req.type = SCIF_WINDOW_FULL; + req.head = &ep->rma_info.remote_reg_list; + msg->payload[0] = ep->remote_ep; + + mutex_lock(&ep->rma_info.rma_lock); + /* Does a valid window exist? */ + if (scif_query_window(&req)) { + dev_err(&scifdev->sdev->dev, + "%s %d -ENXIO\n", __func__, __LINE__); + msg->uop = SCIF_UNREGISTER_ACK; + goto error; + } + if (window) { + if (window->ref_count) + scif_put_window(window, window->nr_pages); + else + dev_err(&scifdev->sdev->dev, + "%s %d ref count should be +ve\n", + __func__, __LINE__); + window->unreg_state = OP_COMPLETED; + if (!window->ref_count) { + msg->uop = SCIF_UNREGISTER_ACK; + atomic_inc(&ep->rma_info.tw_refcount); + ep->rma_info.async_list_del = 1; + list_del_init(&window->list); + del_window = 1; + } else { + /* NACK! There are valid references to this window */ + msg->uop = SCIF_UNREGISTER_NACK; + } + } else { + /* The window did not make its way to the list at all. ACK */ + msg->uop = SCIF_UNREGISTER_ACK; + scif_destroy_remote_window(recv_window); + } +error: + mutex_unlock(&ep->rma_info.rma_lock); + if (del_window) + scif_drain_dma_intr(ep->remote_dev->sdev, + ep->rma_info.dma_chan); + scif_nodeqp_send(ep->remote_dev, msg); + if (del_window) + scif_queue_for_cleanup(window, &scif_info.rma); +} + +/** + * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message + * @msg: Interrupt message + * + * Wake up the window waiting to complete registration. + */ +void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_window *window = + (struct scif_window *)msg->payload[2]; + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + + mutex_lock(&ep->rma_info.rma_lock); + window->reg_state = OP_COMPLETED; + wake_up(&window->regwq); + mutex_unlock(&ep->rma_info.rma_lock); +} + +/** + * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message + * @msg: Interrupt message + * + * Wake up the window waiting to inform it that registration + * cannot be completed. + */ +void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_window *window = + (struct scif_window *)msg->payload[2]; + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + + mutex_lock(&ep->rma_info.rma_lock); + window->reg_state = OP_FAILED; + wake_up(&window->regwq); + mutex_unlock(&ep->rma_info.rma_lock); +} + +/** + * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message + * @msg: Interrupt message + * + * Wake up the window waiting to complete unregistration. + */ +void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_window *window = + (struct scif_window *)msg->payload[1]; + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + + mutex_lock(&ep->rma_info.rma_lock); + window->unreg_state = OP_COMPLETED; + wake_up(&window->unregwq); + mutex_unlock(&ep->rma_info.rma_lock); +} + +/** + * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message + * @msg: Interrupt message + * + * Wake up the window waiting to inform it that unregistration + * cannot be completed immediately. + */ +void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_window *window = + (struct scif_window *)msg->payload[1]; + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + + mutex_lock(&ep->rma_info.rma_lock); + window->unreg_state = OP_FAILED; + wake_up(&window->unregwq); + mutex_unlock(&ep->rma_info.rma_lock); +} + +int __scif_pin_pages(void *addr, size_t len, int *out_prot, + int map_flags, scif_pinned_pages_t *pages) +{ + struct scif_pinned_pages *pinned_pages; + int nr_pages, err = 0, i; + bool vmalloc_addr = false; + bool try_upgrade = false; + int prot = *out_prot; + int ulimit = 0; + struct mm_struct *mm = NULL; + + /* Unsupported flags */ + if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT)) + return -EINVAL; + ulimit = !!(map_flags & SCIF_MAP_ULIMIT); + + /* Unsupported protection requested */ + if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE)) + return -EINVAL; + + /* addr/len must be page aligned. len should be non zero */ + if (!len || + (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) || + (ALIGN((u64)len, PAGE_SIZE) != (u64)len)) + return -EINVAL; + + might_sleep(); + + nr_pages = len >> PAGE_SHIFT; + + /* Allocate a set of pinned pages */ + pinned_pages = scif_create_pinned_pages(nr_pages, prot); + if (!pinned_pages) + return -ENOMEM; + + if (map_flags & SCIF_MAP_KERNEL) { + if (is_vmalloc_addr(addr)) + vmalloc_addr = true; + + for (i = 0; i < nr_pages; i++) { + if (vmalloc_addr) + pinned_pages->pages[i] = + vmalloc_to_page(addr + (i * PAGE_SIZE)); + else + pinned_pages->pages[i] = + virt_to_page(addr + (i * PAGE_SIZE)); + } + pinned_pages->nr_pages = nr_pages; + pinned_pages->map_flags = SCIF_MAP_KERNEL; + } else { + /* + * SCIF supports registration caching. If a registration has + * been requested with read only permissions, then we try + * to pin the pages with RW permissions so that a subsequent + * transfer with RW permission can hit the cache instead of + * invalidating it. If the upgrade fails with RW then we + * revert back to R permission and retry + */ + if (prot == SCIF_PROT_READ) + try_upgrade = true; + prot |= SCIF_PROT_WRITE; +retry: + mm = current->mm; + down_write(&mm->mmap_sem); + if (ulimit) { + err = __scif_check_inc_pinned_vm(mm, nr_pages); + if (err) { + up_write(&mm->mmap_sem); + pinned_pages->nr_pages = 0; + goto error_unmap; + } + } + + pinned_pages->nr_pages = get_user_pages( + current, + mm, + (u64)addr, + nr_pages, + !!(prot & SCIF_PROT_WRITE), + 0, + pinned_pages->pages, + NULL); + up_write(&mm->mmap_sem); + if (nr_pages != pinned_pages->nr_pages) { + if (try_upgrade) { + if (ulimit) + __scif_dec_pinned_vm_lock(mm, + nr_pages, 0); + /* Roll back any pinned pages */ + for (i = 0; i < pinned_pages->nr_pages; i++) { + if (pinned_pages->pages[i]) + put_page( + pinned_pages->pages[i]); + } + prot &= ~SCIF_PROT_WRITE; + try_upgrade = false; + goto retry; + } + } + pinned_pages->map_flags = 0; + } + + if (pinned_pages->nr_pages < nr_pages) { + err = -EFAULT; + pinned_pages->nr_pages = nr_pages; + goto dec_pinned; + } + + *out_prot = prot; + atomic_set(&pinned_pages->ref_count, 1); + *pages = pinned_pages; + return err; +dec_pinned: + if (ulimit) + __scif_dec_pinned_vm_lock(mm, nr_pages, 0); + /* Something went wrong! Rollback */ +error_unmap: + pinned_pages->nr_pages = nr_pages; + scif_destroy_pinned_pages(pinned_pages); + *pages = NULL; + dev_dbg(scif_info.mdev.this_device, + "%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len); + return err; +} + +int scif_pin_pages(void *addr, size_t len, int prot, + int map_flags, scif_pinned_pages_t *pages) +{ + return __scif_pin_pages(addr, len, &prot, map_flags, pages); +} +EXPORT_SYMBOL_GPL(scif_pin_pages); + +int scif_unpin_pages(scif_pinned_pages_t pinned_pages) +{ + int err = 0, ret; + + if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic) + return -EINVAL; + + ret = atomic_sub_return(1, &pinned_pages->ref_count); + if (ret < 0) { + dev_err(scif_info.mdev.this_device, + "%s %d scif_unpin_pages called without pinning? rc %d\n", + __func__, __LINE__, ret); + return -EINVAL; + } + /* + * Destroy the window if the ref count for this set of pinned + * pages has dropped to zero. If it is positive then there is + * a valid registered window which is backed by these pages and + * it will be destroyed once all such windows are unregistered. + */ + if (!ret) + err = scif_destroy_pinned_pages(pinned_pages); + + return err; +} +EXPORT_SYMBOL_GPL(scif_unpin_pages); + +static inline void +scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep) +{ + mutex_lock(&ep->rma_info.rma_lock); + scif_insert_window(window, &ep->rma_info.reg_list); + mutex_unlock(&ep->rma_info.rma_lock); +} + +off_t scif_register_pinned_pages(scif_epd_t epd, + scif_pinned_pages_t pinned_pages, + off_t offset, int map_flags) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + s64 computed_offset; + struct scif_window *window; + int err; + size_t len; + struct device *spdev; + + /* Unsupported flags */ + if (map_flags & ~SCIF_MAP_FIXED) + return -EINVAL; + + len = pinned_pages->nr_pages << PAGE_SHIFT; + + /* + * Offset is not page aligned/negative or offset+len + * wraps around with SCIF_MAP_FIXED. + */ + if ((map_flags & SCIF_MAP_FIXED) && + ((ALIGN(offset, PAGE_SIZE) != offset) || + (offset < 0) || + (offset + (off_t)len < offset))) + return -EINVAL; + + might_sleep(); + + err = scif_verify_epd(ep); + if (err) + return err; + /* + * It is an error to pass pinned_pages to scif_register_pinned_pages() + * after calling scif_unpin_pages(). + */ + if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0)) + return -EINVAL; + + /* Compute the offset for this registration */ + err = scif_get_window_offset(ep, map_flags, offset, + len, &computed_offset); + if (err) { + atomic_sub(1, &pinned_pages->ref_count); + return err; + } + + /* Allocate and prepare self registration window */ + window = scif_create_window(ep, pinned_pages->nr_pages, + computed_offset, false); + if (!window) { + atomic_sub(1, &pinned_pages->ref_count); + scif_free_window_offset(ep, NULL, computed_offset); + return -ENOMEM; + } + + window->pinned_pages = pinned_pages; + window->nr_pages = pinned_pages->nr_pages; + window->prot = pinned_pages->prot; + + spdev = scif_get_peer_dev(ep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + scif_destroy_window(ep, window); + return err; + } + err = scif_send_alloc_request(ep, window); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error_unmap; + } + + /* Prepare the remote registration window */ + err = scif_prep_remote_window(ep, window); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error_unmap; + } + + /* Tell the peer about the new window */ + err = scif_send_scif_register(ep, window); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error_unmap; + } + + scif_put_peer_dev(spdev); + /* No further failures expected. Insert new window */ + scif_insert_local_window(window, ep); + return computed_offset; +error_unmap: + scif_destroy_window(ep, window); + scif_put_peer_dev(spdev); + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + return err; +} +EXPORT_SYMBOL_GPL(scif_register_pinned_pages); + +off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, + int prot, int map_flags) +{ + scif_pinned_pages_t pinned_pages; + off_t err; + struct scif_endpt *ep = (struct scif_endpt *)epd; + s64 computed_offset; + struct scif_window *window; + struct mm_struct *mm = NULL; + struct device *spdev; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n", + epd, addr, len, offset, prot, map_flags); + /* Unsupported flags */ + if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL)) + return -EINVAL; + + /* + * Offset is not page aligned/negative or offset+len + * wraps around with SCIF_MAP_FIXED. + */ + if ((map_flags & SCIF_MAP_FIXED) && + ((ALIGN(offset, PAGE_SIZE) != offset) || + (offset < 0) || + (offset + (off_t)len < offset))) + return -EINVAL; + + /* Unsupported protection requested */ + if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE)) + return -EINVAL; + + /* addr/len must be page aligned. len should be non zero */ + if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) || + (ALIGN(len, PAGE_SIZE) != len)) + return -EINVAL; + + might_sleep(); + + err = scif_verify_epd(ep); + if (err) + return err; + + /* Compute the offset for this registration */ + err = scif_get_window_offset(ep, map_flags, offset, + len >> PAGE_SHIFT, &computed_offset); + if (err) + return err; + + spdev = scif_get_peer_dev(ep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + scif_free_window_offset(ep, NULL, computed_offset); + return err; + } + /* Allocate and prepare self registration window */ + window = scif_create_window(ep, len >> PAGE_SHIFT, + computed_offset, false); + if (!window) { + scif_free_window_offset(ep, NULL, computed_offset); + scif_put_peer_dev(spdev); + return -ENOMEM; + } + + window->nr_pages = len >> PAGE_SHIFT; + + err = scif_send_alloc_request(ep, window); + if (err) { + scif_destroy_incomplete_window(ep, window); + scif_put_peer_dev(spdev); + return err; + } + + if (!(map_flags & SCIF_MAP_KERNEL)) { + mm = __scif_acquire_mm(); + map_flags |= SCIF_MAP_ULIMIT; + } + /* Pin down the pages */ + err = __scif_pin_pages(addr, len, &prot, + map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT), + &pinned_pages); + if (err) { + scif_destroy_incomplete_window(ep, window); + __scif_release_mm(mm); + goto error; + } + + window->pinned_pages = pinned_pages; + window->prot = pinned_pages->prot; + window->mm = mm; + + /* Prepare the remote registration window */ + err = scif_prep_remote_window(ep, window); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %ld\n", __func__, __LINE__, err); + goto error_unmap; + } + + /* Tell the peer about the new window */ + err = scif_send_scif_register(ep, window); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %ld\n", __func__, __LINE__, err); + goto error_unmap; + } + + scif_put_peer_dev(spdev); + /* No further failures expected. Insert new window */ + scif_insert_local_window(window, ep); + dev_dbg(&ep->remote_dev->sdev->dev, + "SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n", + epd, addr, len, computed_offset); + return computed_offset; +error_unmap: + scif_destroy_window(ep, window); +error: + scif_put_peer_dev(spdev); + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %ld\n", __func__, __LINE__, err); + return err; +} +EXPORT_SYMBOL_GPL(scif_register); + +int +scif_unregister(scif_epd_t epd, off_t offset, size_t len) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scif_window *window = NULL; + struct scif_rma_req req; + int nr_pages, err; + struct device *spdev; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n", + ep, offset, len); + /* len must be page aligned. len should be non zero */ + if (!len || + (ALIGN((u64)len, PAGE_SIZE) != (u64)len)) + return -EINVAL; + + /* Offset is not page aligned or offset+len wraps around */ + if ((ALIGN(offset, PAGE_SIZE) != offset) || + (offset + (off_t)len < offset)) + return -EINVAL; + + err = scif_verify_epd(ep); + if (err) + return err; + + might_sleep(); + nr_pages = len >> PAGE_SHIFT; + + req.out_window = &window; + req.offset = offset; + req.prot = 0; + req.nr_bytes = len; + req.type = SCIF_WINDOW_FULL; + req.head = &ep->rma_info.reg_list; + + spdev = scif_get_peer_dev(ep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + return err; + } + mutex_lock(&ep->rma_info.rma_lock); + /* Does a valid window exist? */ + err = scif_query_window(&req); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error; + } + /* Unregister all the windows in this range */ + err = scif_rma_list_unregister(window, offset, nr_pages); + if (err) + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); +error: + mutex_unlock(&ep->rma_info.rma_lock); + scif_put_peer_dev(spdev); + return err; +} +EXPORT_SYMBOL_GPL(scif_unregister); diff --git a/kernel/drivers/misc/mic/scif/scif_rma.h b/kernel/drivers/misc/mic/scif/scif_rma.h new file mode 100644 index 000000000..fa6722279 --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_rma.h @@ -0,0 +1,464 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Intel SCIF driver. + * + */ +#ifndef SCIF_RMA_H +#define SCIF_RMA_H + +#include <linux/dma_remapping.h> +#include <linux/mmu_notifier.h> + +#include "../bus/scif_bus.h" + +/* If this bit is set then the mark is a remote fence mark */ +#define SCIF_REMOTE_FENCE_BIT 31 +/* Magic value used to indicate a remote fence request */ +#define SCIF_REMOTE_FENCE BIT_ULL(SCIF_REMOTE_FENCE_BIT) + +#define SCIF_MAX_UNALIGNED_BUF_SIZE (1024 * 1024ULL) +#define SCIF_KMEM_UNALIGNED_BUF_SIZE (SCIF_MAX_UNALIGNED_BUF_SIZE + \ + (L1_CACHE_BYTES << 1)) + +#define SCIF_IOVA_START_PFN (1) +#define SCIF_IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) +#define SCIF_DMA_64BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(64)) +#define SCIF_DMA_63BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(63)) + +/* + * struct scif_endpt_rma_info - Per Endpoint Remote Memory Access Information + * + * @reg_list: List of registration windows for self + * @remote_reg_list: List of registration windows for peer + * @iovad: Offset generator + * @rma_lock: Synchronizes access to self/remote list and also protects the + * window from being destroyed while RMAs are in progress. + * @tc_lock: Synchronizes access to temporary cached windows list + * for SCIF Registration Caching. + * @mmn_lock: Synchronizes access to the list of MMU notifiers registered + * @tw_refcount: Keeps track of number of outstanding temporary registered + * windows created by scif_vreadfrom/scif_vwriteto which have + * not been destroyed. + * @tcw_refcount: Same as tw_refcount but for temporary cached windows + * @tcw_total_pages: Same as tcw_refcount but in terms of pages pinned + * @mmn_list: MMU notifier so that we can destroy the windows when required + * @fence_refcount: Keeps track of number of outstanding remote fence + * requests which have been received by the peer. + * @dma_chan: DMA channel used for all DMA transfers for this endpoint. + * @async_list_del: Detect asynchronous list entry deletion + * @vma_list: List of vmas with remote memory mappings + * @markwq: Wait queue used for scif_fence_mark/scif_fence_wait +*/ +struct scif_endpt_rma_info { + struct list_head reg_list; + struct list_head remote_reg_list; + struct iova_domain iovad; + struct mutex rma_lock; + spinlock_t tc_lock; + struct mutex mmn_lock; + atomic_t tw_refcount; + atomic_t tcw_refcount; + atomic_t tcw_total_pages; + struct list_head mmn_list; + atomic_t fence_refcount; + struct dma_chan *dma_chan; + int async_list_del; + struct list_head vma_list; + wait_queue_head_t markwq; +}; + +/* + * struct scif_fence_info - used for tracking fence requests + * + * @state: State of this transfer + * @wq: Fences wait on this queue + * @dma_mark: Used for storing the DMA mark + */ +struct scif_fence_info { + enum scif_msg_state state; + struct completion comp; + int dma_mark; +}; + +/* + * struct scif_remote_fence_info - used for tracking remote fence requests + * + * @msg: List of SCIF node QP fence messages + * @list: Link to list of remote fence requests + */ +struct scif_remote_fence_info { + struct scifmsg msg; + struct list_head list; +}; + +/* + * Specifies whether an RMA operation can span across partial windows, a single + * window or multiple contiguous windows. Mmaps can span across partial windows. + * Unregistration can span across complete windows. scif_get_pages() can span a + * single window. A window can also be of type self or peer. + */ +enum scif_window_type { + SCIF_WINDOW_PARTIAL, + SCIF_WINDOW_SINGLE, + SCIF_WINDOW_FULL, + SCIF_WINDOW_SELF, + SCIF_WINDOW_PEER +}; + +/* The number of physical addresses that can be stored in a PAGE. */ +#define SCIF_NR_ADDR_IN_PAGE (0x1000 >> 3) + +/* + * struct scif_rma_lookup - RMA lookup data structure for page list transfers + * + * Store an array of lookup offsets. Each offset in this array maps + * one 4K page containing 512 physical addresses i.e. 2MB. 512 such + * offsets in a 4K page will correspond to 1GB of registered address space. + + * @lookup: Array of offsets + * @offset: DMA offset of lookup array + */ +struct scif_rma_lookup { + dma_addr_t *lookup; + dma_addr_t offset; +}; + +/* + * struct scif_pinned_pages - A set of pinned pages obtained with + * scif_pin_pages() which could be part of multiple registered + * windows across different end points. + * + * @nr_pages: Number of pages which is defined as a s64 instead of an int + * to avoid sign extension with buffers >= 2GB + * @prot: read/write protections + * @map_flags: Flags specified during the pin operation + * @ref_count: Reference count bumped in terms of number of pages + * @magic: A magic value + * @pages: Array of pointers to struct pages populated with get_user_pages(..) + */ +struct scif_pinned_pages { + s64 nr_pages; + int prot; + int map_flags; + atomic_t ref_count; + u64 magic; + struct page **pages; +}; + +/* + * struct scif_status - Stores DMA status update information + * + * @src_dma_addr: Source buffer DMA address + * @val: src location for value to be written to the destination + * @ep: SCIF endpoint + */ +struct scif_status { + dma_addr_t src_dma_addr; + u64 val; + struct scif_endpt *ep; +}; + +/* + * struct scif_window - Registration Window for Self and Remote + * + * @nr_pages: Number of pages which is defined as a s64 instead of an int + * to avoid sign extension with buffers >= 2GB + * @nr_contig_chunks: Number of contiguous physical chunks + * @prot: read/write protections + * @ref_count: reference count in terms of number of pages + * @magic: Cookie to detect corruption + * @offset: registered offset + * @va_for_temp: va address that this window represents + * @dma_mark: Used to determine if all DMAs against the window are done + * @ep: Pointer to EP. Useful for passing EP around with messages to + avoid expensive list traversals. + * @list: link to list of windows for the endpoint + * @type: self or peer window + * @peer_window: Pointer to peer window. Useful for sending messages to peer + * without requiring an extra list traversal + * @unreg_state: unregistration state + * @offset_freed: True if the offset has been freed + * @temp: True for temporary windows created via scif_vreadfrom/scif_vwriteto + * @mm: memory descriptor for the task_struct which initiated the RMA + * @st: scatter gather table for DMA mappings with IOMMU enabled + * @pinned_pages: The set of pinned_pages backing this window + * @alloc_handle: Handle for sending ALLOC_REQ + * @regwq: Wait Queue for an registration (N)ACK + * @reg_state: Registration state + * @unregwq: Wait Queue for an unregistration (N)ACK + * @dma_addr_lookup: Lookup for physical addresses used for DMA + * @nr_lookup: Number of entries in lookup + * @mapped_offset: Offset used to map the window by the peer + * @dma_addr: Array of physical addresses used for Mgmt node & MIC initiated DMA + * @num_pages: Array specifying number of pages for each physical address + */ +struct scif_window { + s64 nr_pages; + int nr_contig_chunks; + int prot; + int ref_count; + u64 magic; + s64 offset; + unsigned long va_for_temp; + int dma_mark; + u64 ep; + struct list_head list; + enum scif_window_type type; + u64 peer_window; + enum scif_msg_state unreg_state; + bool offset_freed; + bool temp; + struct mm_struct *mm; + struct sg_table *st; + union { + struct { + struct scif_pinned_pages *pinned_pages; + struct scif_allocmsg alloc_handle; + wait_queue_head_t regwq; + enum scif_msg_state reg_state; + wait_queue_head_t unregwq; + }; + struct { + struct scif_rma_lookup dma_addr_lookup; + struct scif_rma_lookup num_pages_lookup; + int nr_lookup; + dma_addr_t mapped_offset; + }; + }; + dma_addr_t *dma_addr; + u64 *num_pages; +} __packed; + +/* + * scif_mmu_notif - SCIF mmu notifier information + * + * @mmu_notifier ep_mmu_notifier: MMU notifier operations + * @tc_reg_list: List of temp registration windows for self + * @mm: memory descriptor for the task_struct which initiated the RMA + * @ep: SCIF endpoint + * @list: link to list of MMU notifier information + */ +struct scif_mmu_notif { +#ifdef CONFIG_MMU_NOTIFIER + struct mmu_notifier ep_mmu_notifier; +#endif + struct list_head tc_reg_list; + struct mm_struct *mm; + struct scif_endpt *ep; + struct list_head list; +}; + +enum scif_rma_dir { + SCIF_LOCAL_TO_REMOTE, + SCIF_REMOTE_TO_LOCAL +}; + +extern struct kmem_cache *unaligned_cache; +/* Initialize RMA for this EP */ +void scif_rma_ep_init(struct scif_endpt *ep); +/* Check if epd can be uninitialized */ +int scif_rma_ep_can_uninit(struct scif_endpt *ep); +/* Obtain a new offset. Callee must grab RMA lock */ +int scif_get_window_offset(struct scif_endpt *ep, int flags, + s64 offset, int nr_pages, s64 *out_offset); +/* Free offset. Callee must grab RMA lock */ +void scif_free_window_offset(struct scif_endpt *ep, + struct scif_window *window, s64 offset); +/* Create self registration window */ +struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages, + s64 offset, bool temp); +/* Destroy self registration window.*/ +int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window); +void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window); +/* Map pages of self window to Aperture/PCI */ +int scif_map_window(struct scif_dev *remote_dev, + struct scif_window *window); +/* Unregister a self window */ +int scif_unregister_window(struct scif_window *window); +/* Destroy remote registration window */ +void +scif_destroy_remote_window(struct scif_window *window); +/* remove valid remote memory mappings from process address space */ +void scif_zap_mmaps(int node); +/* Query if any applications have remote memory mappings */ +bool scif_rma_do_apps_have_mmaps(int node); +/* Cleanup remote registration lists for zombie endpoints */ +void scif_cleanup_rma_for_zombies(int node); +/* Reserve a DMA channel for a particular endpoint */ +int scif_reserve_dma_chan(struct scif_endpt *ep); +/* Setup a DMA mark for an endpoint */ +int _scif_fence_mark(scif_epd_t epd, int *mark); +int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val, + enum scif_window_type type); +void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_mmu_notif_handler(struct work_struct *work); +void scif_rma_handle_remote_fences(void); +void scif_rma_destroy_windows(void); +void scif_rma_destroy_tcw_invalid(void); +int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan); + +struct scif_window_iter { + s64 offset; + int index; +}; + +static inline void +scif_init_window_iter(struct scif_window *window, struct scif_window_iter *iter) +{ + iter->offset = window->offset; + iter->index = 0; +} + +dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off, + size_t *nr_bytes, + struct scif_window_iter *iter); +static inline +dma_addr_t __scif_off_to_dma_addr(struct scif_window *window, s64 off) +{ + return scif_off_to_dma_addr(window, off, NULL, NULL); +} + +static inline bool scif_unaligned(off_t src_offset, off_t dst_offset) +{ + src_offset = src_offset & (L1_CACHE_BYTES - 1); + dst_offset = dst_offset & (L1_CACHE_BYTES - 1); + return !(src_offset == dst_offset); +} + +/* + * scif_zalloc: + * @size: Size of the allocation request. + * + * Helper API which attempts to allocate zeroed pages via + * __get_free_pages(..) first and then falls back on + * vzalloc(..) if that fails. + */ +static inline void *scif_zalloc(size_t size) +{ + void *ret = NULL; + size_t align = ALIGN(size, PAGE_SIZE); + + if (align && get_order(align) < MAX_ORDER) + ret = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(align)); + return ret ? ret : vzalloc(align); +} + +/* + * scif_free: + * @addr: Address to be freed. + * @size: Size of the allocation. + * Helper API which frees memory allocated via scif_zalloc(). + */ +static inline void scif_free(void *addr, size_t size) +{ + size_t align = ALIGN(size, PAGE_SIZE); + + if (is_vmalloc_addr(addr)) + vfree(addr); + else + free_pages((unsigned long)addr, get_order(align)); +} + +static inline void scif_get_window(struct scif_window *window, int nr_pages) +{ + window->ref_count += nr_pages; +} + +static inline void scif_put_window(struct scif_window *window, int nr_pages) +{ + window->ref_count -= nr_pages; +} + +static inline void scif_set_window_ref(struct scif_window *window, int nr_pages) +{ + window->ref_count = nr_pages; +} + +static inline void +scif_queue_for_cleanup(struct scif_window *window, struct list_head *list) +{ + spin_lock(&scif_info.rmalock); + list_add_tail(&window->list, list); + spin_unlock(&scif_info.rmalock); + schedule_work(&scif_info.misc_work); +} + +static inline void __scif_rma_destroy_tcw_helper(struct scif_window *window) +{ + list_del_init(&window->list); + scif_queue_for_cleanup(window, &scif_info.rma_tc); +} + +static inline bool scif_is_iommu_enabled(void) +{ +#ifdef CONFIG_INTEL_IOMMU + return intel_iommu_enabled; +#else + return false; +#endif +} +#endif /* SCIF_RMA_H */ diff --git a/kernel/drivers/misc/mic/scif/scif_rma_list.c b/kernel/drivers/misc/mic/scif/scif_rma_list.c new file mode 100644 index 000000000..e1ef8daed --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_rma_list.c @@ -0,0 +1,291 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include "scif_main.h" +#include <linux/mmu_notifier.h> +#include <linux/highmem.h> + +/* + * scif_insert_tcw: + * + * Insert a temp window to the temp registration list sorted by va_for_temp. + * RMA lock must be held. + */ +void scif_insert_tcw(struct scif_window *window, struct list_head *head) +{ + struct scif_window *curr = NULL; + struct scif_window *prev = list_entry(head, struct scif_window, list); + struct list_head *item; + + INIT_LIST_HEAD(&window->list); + /* Compare with tail and if the entry is new tail add it to the end */ + if (!list_empty(head)) { + curr = list_entry(head->prev, struct scif_window, list); + if (curr->va_for_temp < window->va_for_temp) { + list_add_tail(&window->list, head); + return; + } + } + list_for_each(item, head) { + curr = list_entry(item, struct scif_window, list); + if (curr->va_for_temp > window->va_for_temp) + break; + prev = curr; + } + list_add(&window->list, &prev->list); +} + +/* + * scif_insert_window: + * + * Insert a window to the self registration list sorted by offset. + * RMA lock must be held. + */ +void scif_insert_window(struct scif_window *window, struct list_head *head) +{ + struct scif_window *curr = NULL, *prev = NULL; + struct list_head *item; + + INIT_LIST_HEAD(&window->list); + list_for_each(item, head) { + curr = list_entry(item, struct scif_window, list); + if (curr->offset > window->offset) + break; + prev = curr; + } + if (!prev) + list_add(&window->list, head); + else + list_add(&window->list, &prev->list); + scif_set_window_ref(window, window->nr_pages); +} + +/* + * scif_query_tcw: + * + * Query the temp cached registration list of ep for an overlapping window + * in case of permission mismatch, destroy the previous window. if permissions + * match and overlap is partial, destroy the window but return the new range + * RMA lock must be held. + */ +int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *req) +{ + struct list_head *item, *temp, *head = req->head; + struct scif_window *window; + u64 start_va_window, start_va_req = req->va_for_temp; + u64 end_va_window, end_va_req = start_va_req + req->nr_bytes; + + if (!req->nr_bytes) + return -EINVAL; + /* + * Avoid traversing the entire list to find out that there + * is no entry that matches + */ + if (!list_empty(head)) { + window = list_last_entry(head, struct scif_window, list); + end_va_window = window->va_for_temp + + (window->nr_pages << PAGE_SHIFT); + if (start_va_req > end_va_window) + return -ENXIO; + } + list_for_each_safe(item, temp, head) { + window = list_entry(item, struct scif_window, list); + start_va_window = window->va_for_temp; + end_va_window = window->va_for_temp + + (window->nr_pages << PAGE_SHIFT); + if (start_va_req < start_va_window && + end_va_req < start_va_window) + break; + if (start_va_req >= end_va_window) + continue; + if ((window->prot & req->prot) == req->prot) { + if (start_va_req >= start_va_window && + end_va_req <= end_va_window) { + *req->out_window = window; + return 0; + } + /* expand window */ + if (start_va_req < start_va_window) { + req->nr_bytes += + start_va_window - start_va_req; + req->va_for_temp = start_va_window; + } + if (end_va_req >= end_va_window) + req->nr_bytes += end_va_window - end_va_req; + } + /* Destroy the old window to create a new one */ + __scif_rma_destroy_tcw_helper(window); + break; + } + return -ENXIO; +} + +/* + * scif_query_window: + * + * Query the registration list and check if a valid contiguous + * range of windows exist. + * RMA lock must be held. + */ +int scif_query_window(struct scif_rma_req *req) +{ + struct list_head *item; + struct scif_window *window; + s64 end_offset, offset = req->offset; + u64 tmp_min, nr_bytes_left = req->nr_bytes; + + if (!req->nr_bytes) + return -EINVAL; + + list_for_each(item, req->head) { + window = list_entry(item, struct scif_window, list); + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + if (offset < window->offset) + /* Offset not found! */ + return -ENXIO; + if (offset >= end_offset) + continue; + /* Check read/write protections. */ + if ((window->prot & req->prot) != req->prot) + return -EPERM; + if (nr_bytes_left == req->nr_bytes) + /* Store the first window */ + *req->out_window = window; + tmp_min = min((u64)end_offset - offset, nr_bytes_left); + nr_bytes_left -= tmp_min; + offset += tmp_min; + /* + * Range requested encompasses + * multiple windows contiguously. + */ + if (!nr_bytes_left) { + /* Done for partial window */ + if (req->type == SCIF_WINDOW_PARTIAL || + req->type == SCIF_WINDOW_SINGLE) + return 0; + /* Extra logic for full windows */ + if (offset == end_offset) + /* Spanning multiple whole windows */ + return 0; + /* Not spanning multiple whole windows */ + return -ENXIO; + } + if (req->type == SCIF_WINDOW_SINGLE) + break; + } + dev_err(scif_info.mdev.this_device, + "%s %d ENXIO\n", __func__, __LINE__); + return -ENXIO; +} + +/* + * scif_rma_list_unregister: + * + * Traverse the self registration list starting from window: + * 1) Call scif_unregister_window(..) + * RMA lock must be held. + */ +int scif_rma_list_unregister(struct scif_window *window, + s64 offset, int nr_pages) +{ + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + struct list_head *head = &ep->rma_info.reg_list; + s64 end_offset; + int err = 0; + int loop_nr_pages; + struct scif_window *_window; + + list_for_each_entry_safe_from(window, _window, head, list) { + end_offset = window->offset + (window->nr_pages << PAGE_SHIFT); + loop_nr_pages = min((int)((end_offset - offset) >> PAGE_SHIFT), + nr_pages); + err = scif_unregister_window(window); + if (err) + return err; + nr_pages -= loop_nr_pages; + offset += (loop_nr_pages << PAGE_SHIFT); + if (!nr_pages) + break; + } + return 0; +} + +/* + * scif_unmap_all_window: + * + * Traverse all the windows in the self registration list and: + * 1) Delete any DMA mappings created + */ +void scif_unmap_all_windows(scif_epd_t epd) +{ + struct list_head *item, *tmp; + struct scif_window *window; + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct list_head *head = &ep->rma_info.reg_list; + + mutex_lock(&ep->rma_info.rma_lock); + list_for_each_safe(item, tmp, head) { + window = list_entry(item, struct scif_window, list); + scif_unmap_window(ep->remote_dev, window); + } + mutex_unlock(&ep->rma_info.rma_lock); +} + +/* + * scif_unregister_all_window: + * + * Traverse all the windows in the self registration list and: + * 1) Call scif_unregister_window(..) + * RMA lock must be held. + */ +int scif_unregister_all_windows(scif_epd_t epd) +{ + struct list_head *item, *tmp; + struct scif_window *window; + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct list_head *head = &ep->rma_info.reg_list; + int err = 0; + + mutex_lock(&ep->rma_info.rma_lock); +retry: + item = NULL; + tmp = NULL; + list_for_each_safe(item, tmp, head) { + window = list_entry(item, struct scif_window, list); + ep->rma_info.async_list_del = 0; + err = scif_unregister_window(window); + if (err) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", + __func__, __LINE__, err); + /* + * Need to restart list traversal if there has been + * an asynchronous list entry deletion. + */ + if (ACCESS_ONCE(ep->rma_info.async_list_del)) + goto retry; + } + mutex_unlock(&ep->rma_info.rma_lock); + if (!list_empty(&ep->rma_info.mmn_list)) { + spin_lock(&scif_info.rmalock); + list_add_tail(&ep->mmu_list, &scif_info.mmu_notif_cleanup); + spin_unlock(&scif_info.rmalock); + schedule_work(&scif_info.mmu_notif_work); + } + return err; +} diff --git a/kernel/drivers/misc/mic/scif/scif_rma_list.h b/kernel/drivers/misc/mic/scif/scif_rma_list.h new file mode 100644 index 000000000..7d58d1d55 --- /dev/null +++ b/kernel/drivers/misc/mic/scif/scif_rma_list.h @@ -0,0 +1,57 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#ifndef SCIF_RMA_LIST_H +#define SCIF_RMA_LIST_H + +/* + * struct scif_rma_req - Self Registration list RMA Request query + * + * @out_window - Returns the window if found + * @offset: Starting offset + * @nr_bytes: number of bytes + * @prot: protection requested i.e. read or write or both + * @type: Specify single, partial or multiple windows + * @head: Head of list on which to search + * @va_for_temp: VA for searching temporary cached windows + */ +struct scif_rma_req { + struct scif_window **out_window; + union { + s64 offset; + unsigned long va_for_temp; + }; + size_t nr_bytes; + int prot; + enum scif_window_type type; + struct list_head *head; +}; + +/* Insert */ +void scif_insert_window(struct scif_window *window, struct list_head *head); +void scif_insert_tcw(struct scif_window *window, + struct list_head *head); +/* Query */ +int scif_query_window(struct scif_rma_req *request); +int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *request); +/* Called from close to unregister all self windows */ +int scif_unregister_all_windows(scif_epd_t epd); +void scif_unmap_all_windows(scif_epd_t epd); +/* Traverse list and unregister */ +int scif_rma_list_unregister(struct scif_window *window, s64 offset, + int nr_pages); +#endif /* SCIF_RMA_LIST_H */ diff --git a/kernel/drivers/misc/qcom-coincell.c b/kernel/drivers/misc/qcom-coincell.c new file mode 100644 index 000000000..7b4a2da48 --- /dev/null +++ b/kernel/drivers/misc/qcom-coincell.c @@ -0,0 +1,152 @@ +/* Copyright (c) 2013, The Linux Foundation. All rights reserved. + * Copyright (c) 2015, Sony Mobile Communications Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/of.h> +#include <linux/regmap.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> + +struct qcom_coincell { + struct device *dev; + struct regmap *regmap; + u32 base_addr; +}; + +#define QCOM_COINCELL_REG_RSET 0x44 +#define QCOM_COINCELL_REG_VSET 0x45 +#define QCOM_COINCELL_REG_ENABLE 0x46 + +#define QCOM_COINCELL_ENABLE BIT(7) + +static const int qcom_rset_map[] = { 2100, 1700, 1200, 800 }; +static const int qcom_vset_map[] = { 2500, 3200, 3100, 3000 }; +/* NOTE: for pm8921 and others, voltage of 2500 is 16 (10000b), not 0 */ + +/* if enable==0, rset and vset are ignored */ +static int qcom_coincell_chgr_config(struct qcom_coincell *chgr, int rset, + int vset, bool enable) +{ + int i, j, rc; + + /* if disabling, just do that and skip other operations */ + if (!enable) + return regmap_write(chgr->regmap, + chgr->base_addr + QCOM_COINCELL_REG_ENABLE, 0); + + /* find index for current-limiting resistor */ + for (i = 0; i < ARRAY_SIZE(qcom_rset_map); i++) + if (rset == qcom_rset_map[i]) + break; + + if (i >= ARRAY_SIZE(qcom_rset_map)) { + dev_err(chgr->dev, "invalid rset-ohms value %d\n", rset); + return -EINVAL; + } + + /* find index for charge voltage */ + for (j = 0; j < ARRAY_SIZE(qcom_vset_map); j++) + if (vset == qcom_vset_map[j]) + break; + + if (j >= ARRAY_SIZE(qcom_vset_map)) { + dev_err(chgr->dev, "invalid vset-millivolts value %d\n", vset); + return -EINVAL; + } + + rc = regmap_write(chgr->regmap, + chgr->base_addr + QCOM_COINCELL_REG_RSET, i); + if (rc) { + /* + * This is mainly to flag a bad base_addr (reg) from dts. + * Other failures writing to the registers should be + * extremely rare, or indicative of problems that + * should be reported elsewhere (eg. spmi failure). + */ + dev_err(chgr->dev, "could not write to RSET register\n"); + return rc; + } + + rc = regmap_write(chgr->regmap, + chgr->base_addr + QCOM_COINCELL_REG_VSET, j); + if (rc) + return rc; + + /* set 'enable' register */ + return regmap_write(chgr->regmap, + chgr->base_addr + QCOM_COINCELL_REG_ENABLE, + QCOM_COINCELL_ENABLE); +} + +static int qcom_coincell_probe(struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + struct qcom_coincell chgr; + u32 rset, vset; + bool enable; + int rc; + + chgr.dev = &pdev->dev; + + chgr.regmap = dev_get_regmap(pdev->dev.parent, NULL); + if (!chgr.regmap) { + dev_err(chgr.dev, "Unable to get regmap\n"); + return -EINVAL; + } + + rc = of_property_read_u32(node, "reg", &chgr.base_addr); + if (rc) + return rc; + + enable = !of_property_read_bool(node, "qcom,charger-disable"); + + if (enable) { + rc = of_property_read_u32(node, "qcom,rset-ohms", &rset); + if (rc) { + dev_err(chgr.dev, + "can't find 'qcom,rset-ohms' in DT block"); + return rc; + } + + rc = of_property_read_u32(node, "qcom,vset-millivolts", &vset); + if (rc) { + dev_err(chgr.dev, + "can't find 'qcom,vset-millivolts' in DT block"); + return rc; + } + } + + return qcom_coincell_chgr_config(&chgr, rset, vset, enable); +} + +static const struct of_device_id qcom_coincell_match_table[] = { + { .compatible = "qcom,pm8941-coincell", }, + {} +}; + +MODULE_DEVICE_TABLE(of, qcom_coincell_match_table); + +static struct platform_driver qcom_coincell_driver = { + .driver = { + .name = "qcom-spmi-coincell", + .of_match_table = qcom_coincell_match_table, + }, + .probe = qcom_coincell_probe, +}; + +module_platform_driver(qcom_coincell_driver); + +MODULE_DESCRIPTION("Qualcomm PMIC coincell charger driver"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/drivers/misc/sgi-gru/gruhandles.c b/kernel/drivers/misc/sgi-gru/gruhandles.c index 2f30badc6..1ee8e82ba 100644 --- a/kernel/drivers/misc/sgi-gru/gruhandles.c +++ b/kernel/drivers/misc/sgi-gru/gruhandles.c @@ -196,12 +196,6 @@ void tfh_write_restart(struct gru_tlb_fault_handle *tfh, start_instruction(tfh); } -void tfh_restart(struct gru_tlb_fault_handle *tfh) -{ - tfh->opc = TFHOP_RESTART; - start_instruction(tfh); -} - void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh) { tfh->opc = TFHOP_USER_POLLING_MODE; diff --git a/kernel/drivers/misc/sgi-gru/gruhandles.h b/kernel/drivers/misc/sgi-gru/gruhandles.h index 3f998b924..3d7bd36a1 100644 --- a/kernel/drivers/misc/sgi-gru/gruhandles.h +++ b/kernel/drivers/misc/sgi-gru/gruhandles.h @@ -524,7 +524,6 @@ int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr, int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr, int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); -void tfh_restart(struct gru_tlb_fault_handle *tfh); void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh); void tfh_exception(struct gru_tlb_fault_handle *tfh); diff --git a/kernel/drivers/misc/sgi-gru/grukdump.c b/kernel/drivers/misc/sgi-gru/grukdump.c index a3700a56b..313da3150 100644 --- a/kernel/drivers/misc/sgi-gru/grukdump.c +++ b/kernel/drivers/misc/sgi-gru/grukdump.c @@ -78,11 +78,10 @@ static int gru_dump_tfm(struct gru_state *gru, void __user *ubuf, void __user *ubufend) { struct gru_tlb_fault_map *tfm; - int i, ret, bytes; + int i; - bytes = GRU_NUM_TFM * GRU_CACHE_LINE_BYTES; - if (bytes > ubufend - ubuf) - ret = -EFBIG; + if (GRU_NUM_TFM * GRU_CACHE_LINE_BYTES > ubufend - ubuf) + return -EFBIG; for (i = 0; i < GRU_NUM_TFM; i++) { tfm = get_tfm(gru->gs_gru_base_vaddr, i); @@ -99,11 +98,10 @@ static int gru_dump_tgh(struct gru_state *gru, void __user *ubuf, void __user *ubufend) { struct gru_tlb_global_handle *tgh; - int i, ret, bytes; + int i; - bytes = GRU_NUM_TGH * GRU_CACHE_LINE_BYTES; - if (bytes > ubufend - ubuf) - ret = -EFBIG; + if (GRU_NUM_TGH * GRU_CACHE_LINE_BYTES > ubufend - ubuf) + return -EFBIG; for (i = 0; i < GRU_NUM_TGH; i++) { tgh = get_tgh(gru->gs_gru_base_vaddr, i); @@ -196,7 +194,7 @@ int gru_dump_chiplet_request(unsigned long arg) return -EFAULT; /* Currently, only dump by gid is implemented */ - if (req.gid >= gru_max_gids || req.gid < 0) + if (req.gid >= gru_max_gids) return -EINVAL; gru = GID_TO_GRU(req.gid); diff --git a/kernel/drivers/misc/sgi-gru/grukservices.c b/kernel/drivers/misc/sgi-gru/grukservices.c index 913de07e5..967b9dd24 100644 --- a/kernel/drivers/misc/sgi-gru/grukservices.c +++ b/kernel/drivers/misc/sgi-gru/grukservices.c @@ -160,7 +160,12 @@ static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) down_write(&bs->bs_kgts_sema); if (!bs->bs_kgts) { - bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); + do { + bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); + if (!IS_ERR(bs->bs_kgts)) + break; + msleep(1); + } while (true); bs->bs_kgts->ts_user_blade_id = blade_id; } kgts = bs->bs_kgts; @@ -429,8 +434,8 @@ int gru_get_cb_exception_detail(void *cb, return 0; } -char *gru_get_cb_exception_detail_str(int ret, void *cb, - char *buf, int size) +static char *gru_get_cb_exception_detail_str(int ret, void *cb, + char *buf, int size) { struct gru_control_block_status *gen = (void *)cb; struct control_block_extended_exc_detail excdet; @@ -505,7 +510,7 @@ int gru_wait_proc(void *cb) return ret; } -void gru_abort(int ret, void *cb, char *str) +static void gru_abort(int ret, void *cb, char *str) { char buf[GRU_EXC_STR_SIZE]; @@ -997,7 +1002,6 @@ static int quicktest1(unsigned long arg) { struct gru_message_queue_desc mqd; void *p, *mq; - unsigned long *dw; int i, ret = -EIO; char mes[GRU_CACHE_LINE_BYTES], *m; @@ -1007,7 +1011,6 @@ static int quicktest1(unsigned long arg) return -ENOMEM; mq = ALIGNUP(p, 1024); memset(mes, 0xee, sizeof(mes)); - dw = mq; gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0); for (i = 0; i < 6; i++) { diff --git a/kernel/drivers/misc/sgi-gru/grumain.c b/kernel/drivers/misc/sgi-gru/grumain.c index ae16c8cb4..1525870f4 100644 --- a/kernel/drivers/misc/sgi-gru/grumain.c +++ b/kernel/drivers/misc/sgi-gru/grumain.c @@ -930,6 +930,7 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct gru_thread_state *gts; unsigned long paddr, vaddr; + unsigned long expires; vaddr = (unsigned long)vmf->virtual_address; gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n", @@ -954,7 +955,8 @@ again: mutex_unlock(>s->ts_ctxlock); set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */ - if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies) + expires = gts->ts_steal_jiffies + GRU_STEAL_DELAY; + if (time_before(expires, jiffies)) gru_steal_context(gts); goto again; } diff --git a/kernel/drivers/misc/sgi-gru/grutlbpurge.c b/kernel/drivers/misc/sgi-gru/grutlbpurge.c index 2129274ef..e936d4389 100644 --- a/kernel/drivers/misc/sgi-gru/grutlbpurge.c +++ b/kernel/drivers/misc/sgi-gru/grutlbpurge.c @@ -306,19 +306,20 @@ struct gru_mm_struct *gru_register_mmu_notifier(void) atomic_inc(&gms->ms_refcnt); } else { gms = kzalloc(sizeof(*gms), GFP_KERNEL); - if (gms) { - STAT(gms_alloc); - spin_lock_init(&gms->ms_asid_lock); - gms->ms_notifier.ops = &gru_mmuops; - atomic_set(&gms->ms_refcnt, 1); - init_waitqueue_head(&gms->ms_wait_queue); - err = __mmu_notifier_register(&gms->ms_notifier, current->mm); - if (err) - goto error; - } + if (!gms) + return ERR_PTR(-ENOMEM); + STAT(gms_alloc); + spin_lock_init(&gms->ms_asid_lock); + gms->ms_notifier.ops = &gru_mmuops; + atomic_set(&gms->ms_refcnt, 1); + init_waitqueue_head(&gms->ms_wait_queue); + err = __mmu_notifier_register(&gms->ms_notifier, current->mm); + if (err) + goto error; } - gru_dbg(grudev, "gms %p, refcnt %d\n", gms, - atomic_read(&gms->ms_refcnt)); + if (gms) + gru_dbg(grudev, "gms %p, refcnt %d\n", gms, + atomic_read(&gms->ms_refcnt)); return gms; error: kfree(gms); diff --git a/kernel/drivers/misc/sgi-xp/xpc_uv.c b/kernel/drivers/misc/sgi-xp/xpc_uv.c index 95c894482..340b44d9e 100644 --- a/kernel/drivers/misc/sgi-xp/xpc_uv.c +++ b/kernel/drivers/misc/sgi-xp/xpc_uv.c @@ -239,7 +239,7 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name, mq->mmr_blade = uv_cpu_to_blade_id(cpu); nid = cpu_to_node(cpu); - page = alloc_pages_exact_node(nid, + page = __alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, pg_order); if (page == NULL) { diff --git a/kernel/drivers/misc/spear13xx_pcie_gadget.c b/kernel/drivers/misc/spear13xx_pcie_gadget.c index fe3ad0ca9..ee120dcbb 100644 --- a/kernel/drivers/misc/spear13xx_pcie_gadget.c +++ b/kernel/drivers/misc/spear13xx_pcie_gadget.c @@ -2,7 +2,7 @@ * drivers/misc/spear13xx_pcie_gadget.c * * Copyright (C) 2010 ST Microelectronics - * Pratyush Anand<pratyush.anand@st.com> + * Pratyush Anand<pratyush.anand@gmail.com> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -220,11 +220,17 @@ static irqreturn_t spear_pcie_gadget_irq(int irq, void *dev_id) /* * configfs interfaces show/store functions */ -static ssize_t pcie_gadget_show_link( - struct spear_pcie_gadget_config *config, - char *buf) + +static struct pcie_gadget_target *to_target(struct config_item *item) { - struct pcie_app_reg __iomem *app_reg = config->va_app_base; + return item ? + container_of(to_configfs_subsystem(to_config_group(item)), + struct pcie_gadget_target, subsys) : NULL; +} + +static ssize_t pcie_gadget_link_show(struct config_item *item, char *buf) +{ + struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base; if (readl(&app_reg->app_status_1) & ((u32)1 << XMLH_LINK_UP_ID)) return sprintf(buf, "UP"); @@ -232,11 +238,10 @@ static ssize_t pcie_gadget_show_link( return sprintf(buf, "DOWN"); } -static ssize_t pcie_gadget_store_link( - struct spear_pcie_gadget_config *config, +static ssize_t pcie_gadget_link_store(struct config_item *item, const char *buf, size_t count) { - struct pcie_app_reg __iomem *app_reg = config->va_app_base; + struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base; if (sysfs_streq(buf, "UP")) writel(readl(&app_reg->app_ctrl_0) | (1 << APP_LTSSM_ENABLE_ID), @@ -250,17 +255,15 @@ static ssize_t pcie_gadget_store_link( return count; } -static ssize_t pcie_gadget_show_int_type( - struct spear_pcie_gadget_config *config, - char *buf) +static ssize_t pcie_gadget_int_type_show(struct config_item *item, char *buf) { - return sprintf(buf, "%s", config->int_type); + return sprintf(buf, "%s", to_target(item)->int_type); } -static ssize_t pcie_gadget_store_int_type( - struct spear_pcie_gadget_config *config, +static ssize_t pcie_gadget_int_type_store(struct config_item *item, const char *buf, size_t count) { + struct spear_pcie_gadget_config *config = to_target(item) u32 cap, vec, flags; ulong vector; @@ -288,11 +291,10 @@ static ssize_t pcie_gadget_store_int_type( return count; } -static ssize_t pcie_gadget_show_no_of_msi( - struct spear_pcie_gadget_config *config, - char *buf) +static ssize_t pcie_gadget_no_of_msi_show(struct config_item *item, char *buf) { - struct pcie_app_reg __iomem *app_reg = config->va_app_base; + struct spear_pcie_gadget_config *config = to_target(item) + struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base; u32 cap, vec, flags; ulong vector; @@ -313,13 +315,12 @@ static ssize_t pcie_gadget_show_no_of_msi( return sprintf(buf, "%lu", vector); } -static ssize_t pcie_gadget_store_no_of_msi( - struct spear_pcie_gadget_config *config, +static ssize_t pcie_gadget_no_of_msi_store(struct config_item *item, const char *buf, size_t count) { int ret; - ret = kstrtoul(buf, 0, &config->requested_msi); + ret = kstrtoul(buf, 0, &to_target(item)->requested_msi); if (ret) return ret; @@ -329,11 +330,10 @@ static ssize_t pcie_gadget_store_no_of_msi( return count; } -static ssize_t pcie_gadget_store_inta( - struct spear_pcie_gadget_config *config, +static ssize_t pcie_gadget_inta_store(struct config_item *item, const char *buf, size_t count) { - struct pcie_app_reg __iomem *app_reg = config->va_app_base; + struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base; ulong en; int ret; @@ -351,10 +351,10 @@ static ssize_t pcie_gadget_store_inta( return count; } -static ssize_t pcie_gadget_store_send_msi( - struct spear_pcie_gadget_config *config, +static ssize_t pcie_gadget_send_msi_store(struct config_item *item, const char *buf, size_t count) { + struct spear_pcie_gadget_config *config = to_target(item) struct pcie_app_reg __iomem *app_reg = config->va_app_base; ulong vector; u32 ven_msi; @@ -388,19 +388,16 @@ static ssize_t pcie_gadget_store_send_msi( return count; } -static ssize_t pcie_gadget_show_vendor_id( - struct spear_pcie_gadget_config *config, - char *buf) +static ssize_t pcie_gadget_vendor_id_show(struct config_item *item, char *buf) { u32 id; - spear_dbi_read_reg(config, PCI_VENDOR_ID, 2, &id); + spear_dbi_read_reg(to_target(item), PCI_VENDOR_ID, 2, &id); return sprintf(buf, "%x", id); } -static ssize_t pcie_gadget_store_vendor_id( - struct spear_pcie_gadget_config *config, +static ssize_t pcie_gadget_vendor_id_store(struct config_item *item, const char *buf, size_t count) { ulong id; @@ -410,24 +407,21 @@ static ssize_t pcie_gadget_store_vendor_id( if (ret) return ret; - spear_dbi_write_reg(config, PCI_VENDOR_ID, 2, id); + spear_dbi_write_reg(to_target(item), PCI_VENDOR_ID, 2, id); return count; } -static ssize_t pcie_gadget_show_device_id( - struct spear_pcie_gadget_config *config, - char *buf) +static ssize_t pcie_gadget_device_id_show(struct config_item *item, char *buf) { u32 id; - spear_dbi_read_reg(config, PCI_DEVICE_ID, 2, &id); + spear_dbi_read_reg(to_target(item), PCI_DEVICE_ID, 2, &id); return sprintf(buf, "%x", id); } -static ssize_t pcie_gadget_store_device_id( - struct spear_pcie_gadget_config *config, +static ssize_t pcie_gadget_device_id_store(struct config_item *item, const char *buf, size_t count) { ulong id; @@ -437,22 +431,20 @@ static ssize_t pcie_gadget_store_device_id( if (ret) return ret; - spear_dbi_write_reg(config, PCI_DEVICE_ID, 2, id); + spear_dbi_write_reg(to_target(item), PCI_DEVICE_ID, 2, id); return count; } -static ssize_t pcie_gadget_show_bar0_size( - struct spear_pcie_gadget_config *config, - char *buf) +static ssize_t pcie_gadget_bar0_size_show(struct config_item *item, char *buf) { - return sprintf(buf, "%lx", config->bar0_size); + return sprintf(buf, "%lx", to_target(item)->bar0_size); } -static ssize_t pcie_gadget_store_bar0_size( - struct spear_pcie_gadget_config *config, +static ssize_t pcie_gadget_bar0_size_store(struct config_item *item, const char *buf, size_t count) { + struct spear_pcie_gadget_config *config = to_target(item) ulong size; u32 pos, pos1; u32 no_of_bit = 0; @@ -489,21 +481,20 @@ static ssize_t pcie_gadget_store_bar0_size( return count; } -static ssize_t pcie_gadget_show_bar0_address( - struct spear_pcie_gadget_config *config, +static ssize_t pcie_gadget_bar0_address_show(struct config_item *item, char *buf) { - struct pcie_app_reg __iomem *app_reg = config->va_app_base; + struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base; u32 address = readl(&app_reg->pim0_mem_addr_start); return sprintf(buf, "%x", address); } -static ssize_t pcie_gadget_store_bar0_address( - struct spear_pcie_gadget_config *config, +static ssize_t pcie_gadget_bar0_address_store(struct config_item *item, const char *buf, size_t count) { + struct spear_pcie_gadget_config *config = to_target(item) struct pcie_app_reg __iomem *app_reg = config->va_app_base; ulong address; int ret; @@ -524,15 +515,13 @@ static ssize_t pcie_gadget_store_bar0_address( return count; } -static ssize_t pcie_gadget_show_bar0_rw_offset( - struct spear_pcie_gadget_config *config, +static ssize_t pcie_gadget_bar0_rw_offset_show(struct config_item *item, char *buf) { - return sprintf(buf, "%lx", config->bar0_rw_offset); + return sprintf(buf, "%lx", to_target(item)->bar0_rw_offset); } -static ssize_t pcie_gadget_store_bar0_rw_offset( - struct spear_pcie_gadget_config *config, +static ssize_t pcie_gadget_bar0_rw_offset_store(struct config_item *item, const char *buf, size_t count) { ulong offset; @@ -545,15 +534,14 @@ static ssize_t pcie_gadget_store_bar0_rw_offset( if (offset % 4) return -EINVAL; - config->bar0_rw_offset = offset; + to_target(item)->bar0_rw_offset = offset; return count; } -static ssize_t pcie_gadget_show_bar0_data( - struct spear_pcie_gadget_config *config, - char *buf) +static ssize_t pcie_gadget_bar0_data_show(struct config_item *item, char *buf) { + struct spear_pcie_gadget_config *config = to_target(item) ulong data; if (!config->va_bar0_address) @@ -564,10 +552,10 @@ static ssize_t pcie_gadget_show_bar0_data( return sprintf(buf, "%lx", data); } -static ssize_t pcie_gadget_store_bar0_data( - struct spear_pcie_gadget_config *config, +static ssize_t pcie_gadget_bar0_data_store(struct config_item *item, const char *buf, size_t count) { + struct spear_pcie_gadget_config *config = to_target(item) ulong data; int ret; @@ -583,97 +571,35 @@ static ssize_t pcie_gadget_store_bar0_data( return count; } -/* - * Attribute definitions. - */ - -#define PCIE_GADGET_TARGET_ATTR_RO(_name) \ -static struct pcie_gadget_target_attr pcie_gadget_target_##_name = \ - __CONFIGFS_ATTR(_name, S_IRUGO, pcie_gadget_show_##_name, NULL) - -#define PCIE_GADGET_TARGET_ATTR_WO(_name) \ -static struct pcie_gadget_target_attr pcie_gadget_target_##_name = \ - __CONFIGFS_ATTR(_name, S_IWUSR, NULL, pcie_gadget_store_##_name) - -#define PCIE_GADGET_TARGET_ATTR_RW(_name) \ -static struct pcie_gadget_target_attr pcie_gadget_target_##_name = \ - __CONFIGFS_ATTR(_name, S_IRUGO | S_IWUSR, pcie_gadget_show_##_name, \ - pcie_gadget_store_##_name) -PCIE_GADGET_TARGET_ATTR_RW(link); -PCIE_GADGET_TARGET_ATTR_RW(int_type); -PCIE_GADGET_TARGET_ATTR_RW(no_of_msi); -PCIE_GADGET_TARGET_ATTR_WO(inta); -PCIE_GADGET_TARGET_ATTR_WO(send_msi); -PCIE_GADGET_TARGET_ATTR_RW(vendor_id); -PCIE_GADGET_TARGET_ATTR_RW(device_id); -PCIE_GADGET_TARGET_ATTR_RW(bar0_size); -PCIE_GADGET_TARGET_ATTR_RW(bar0_address); -PCIE_GADGET_TARGET_ATTR_RW(bar0_rw_offset); -PCIE_GADGET_TARGET_ATTR_RW(bar0_data); +CONFIGFS_ATTR(pcie_gadget_, link); +CONFIGFS_ATTR(pcie_gadget_, int_type); +CONFIGFS_ATTR(pcie_gadget_, no_of_msi); +CONFIGFS_ATTR_WO(pcie_gadget_, inta); +CONFIGFS_ATTR_WO(pcie_gadget_, send_msi); +CONFIGFS_ATTR(pcie_gadget_, vendor_id); +CONFIGFS_ATTR(pcie_gadget_, device_id); +CONFIGFS_ATTR(pcie_gadget_, bar0_size); +CONFIGFS_ATTR(pcie_gadget_, bar0_address); +CONFIGFS_ATTR(pcie_gadget_, bar0_rw_offset); +CONFIGFS_ATTR(pcie_gadget_, bar0_data); static struct configfs_attribute *pcie_gadget_target_attrs[] = { - &pcie_gadget_target_link.attr, - &pcie_gadget_target_int_type.attr, - &pcie_gadget_target_no_of_msi.attr, - &pcie_gadget_target_inta.attr, - &pcie_gadget_target_send_msi.attr, - &pcie_gadget_target_vendor_id.attr, - &pcie_gadget_target_device_id.attr, - &pcie_gadget_target_bar0_size.attr, - &pcie_gadget_target_bar0_address.attr, - &pcie_gadget_target_bar0_rw_offset.attr, - &pcie_gadget_target_bar0_data.attr, + &pcie_gadget_attr_link, + &pcie_gadget_attr_int_type, + &pcie_gadget_attr_no_of_msi, + &pcie_gadget_attr_inta, + &pcie_gadget_attr_send_msi, + &pcie_gadget_attr_vendor_id, + &pcie_gadget_attr_device_id, + &pcie_gadget_attr_bar0_size, + &pcie_gadget_attr_bar0_address, + &pcie_gadget_attr_bar0_rw_offset, + &pcie_gadget_attr_bar0_data, NULL, }; -static struct pcie_gadget_target *to_target(struct config_item *item) -{ - return item ? - container_of(to_configfs_subsystem(to_config_group(item)), - struct pcie_gadget_target, subsys) : NULL; -} - -/* - * Item operations and type for pcie_gadget_target. - */ - -static ssize_t pcie_gadget_target_attr_show(struct config_item *item, - struct configfs_attribute *attr, - char *buf) -{ - ssize_t ret = -EINVAL; - struct pcie_gadget_target *target = to_target(item); - struct pcie_gadget_target_attr *t_attr = - container_of(attr, struct pcie_gadget_target_attr, attr); - - if (t_attr->show) - ret = t_attr->show(&target->config, buf); - return ret; -} - -static ssize_t pcie_gadget_target_attr_store(struct config_item *item, - struct configfs_attribute *attr, - const char *buf, - size_t count) -{ - ssize_t ret = -EINVAL; - struct pcie_gadget_target *target = to_target(item); - struct pcie_gadget_target_attr *t_attr = - container_of(attr, struct pcie_gadget_target_attr, attr); - - if (t_attr->store) - ret = t_attr->store(&target->config, buf, count); - return ret; -} - -static struct configfs_item_operations pcie_gadget_target_item_ops = { - .show_attribute = pcie_gadget_target_attr_show, - .store_attribute = pcie_gadget_target_attr_store, -}; - static struct config_item_type pcie_gadget_target_type = { .ct_attrs = pcie_gadget_target_attrs, - .ct_item_ops = &pcie_gadget_target_item_ops, .ct_owner = THIS_MODULE, }; diff --git a/kernel/drivers/misc/sram.c b/kernel/drivers/misc/sram.c index eeaaf5fca..736dae715 100644 --- a/kernel/drivers/misc/sram.c +++ b/kernel/drivers/misc/sram.c @@ -18,33 +18,154 @@ * MA 02110-1301, USA. */ -#include <linux/kernel.h> -#include <linux/init.h> #include <linux/clk.h> -#include <linux/err.h> +#include <linux/genalloc.h> #include <linux/io.h> -#include <linux/of.h> -#include <linux/of_address.h> -#include <linux/list.h> #include <linux/list_sort.h> +#include <linux/of_address.h> #include <linux/platform_device.h> #include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/genalloc.h> #define SRAM_GRANULARITY 32 +struct sram_partition { + void __iomem *base; + + struct gen_pool *pool; + struct bin_attribute battr; + struct mutex lock; +}; + struct sram_dev { + struct device *dev; + void __iomem *virt_base; + struct gen_pool *pool; struct clk *clk; + + struct sram_partition *partition; + u32 partitions; }; struct sram_reserve { struct list_head list; u32 start; u32 size; + bool export; + bool pool; + const char *label; }; +static ssize_t sram_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t pos, size_t count) +{ + struct sram_partition *part; + + part = container_of(attr, struct sram_partition, battr); + + mutex_lock(&part->lock); + memcpy_fromio(buf, part->base + pos, count); + mutex_unlock(&part->lock); + + return count; +} + +static ssize_t sram_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t pos, size_t count) +{ + struct sram_partition *part; + + part = container_of(attr, struct sram_partition, battr); + + mutex_lock(&part->lock); + memcpy_toio(part->base + pos, buf, count); + mutex_unlock(&part->lock); + + return count; +} + +static int sram_add_pool(struct sram_dev *sram, struct sram_reserve *block, + phys_addr_t start, struct sram_partition *part) +{ + int ret; + + part->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY), + NUMA_NO_NODE, block->label); + if (IS_ERR(part->pool)) + return PTR_ERR(part->pool); + + ret = gen_pool_add_virt(part->pool, (unsigned long)part->base, start, + block->size, NUMA_NO_NODE); + if (ret < 0) { + dev_err(sram->dev, "failed to register subpool: %d\n", ret); + return ret; + } + + return 0; +} + +static int sram_add_export(struct sram_dev *sram, struct sram_reserve *block, + phys_addr_t start, struct sram_partition *part) +{ + sysfs_bin_attr_init(&part->battr); + part->battr.attr.name = devm_kasprintf(sram->dev, GFP_KERNEL, + "%llx.sram", + (unsigned long long)start); + if (!part->battr.attr.name) + return -ENOMEM; + + part->battr.attr.mode = S_IRUSR | S_IWUSR; + part->battr.read = sram_read; + part->battr.write = sram_write; + part->battr.size = block->size; + + return device_create_bin_file(sram->dev, &part->battr); +} + +static int sram_add_partition(struct sram_dev *sram, struct sram_reserve *block, + phys_addr_t start) +{ + int ret; + struct sram_partition *part = &sram->partition[sram->partitions]; + + mutex_init(&part->lock); + part->base = sram->virt_base + block->start; + + if (block->pool) { + ret = sram_add_pool(sram, block, start, part); + if (ret) + return ret; + } + if (block->export) { + ret = sram_add_export(sram, block, start, part); + if (ret) + return ret; + } + sram->partitions++; + + return 0; +} + +static void sram_free_partitions(struct sram_dev *sram) +{ + struct sram_partition *part; + + if (!sram->partitions) + return; + + part = &sram->partition[sram->partitions - 1]; + for (; sram->partitions; sram->partitions--, part--) { + if (part->battr.size) + device_remove_bin_file(sram->dev, &part->battr); + + if (part->pool && + gen_pool_avail(part->pool) < gen_pool_size(part->pool)) + dev_err(sram->dev, "removed pool while SRAM allocated\n"); + } +} + static int sram_reserve_cmp(void *priv, struct list_head *a, struct list_head *b) { @@ -54,62 +175,28 @@ static int sram_reserve_cmp(void *priv, struct list_head *a, return ra->start - rb->start; } -static int sram_probe(struct platform_device *pdev) +static int sram_reserve_regions(struct sram_dev *sram, struct resource *res) { - void __iomem *virt_base; - struct sram_dev *sram; - struct resource *res; - struct device_node *np = pdev->dev.of_node, *child; + struct device_node *np = sram->dev->of_node, *child; unsigned long size, cur_start, cur_size; struct sram_reserve *rblocks, *block; struct list_head reserve_list; - unsigned int nblocks; - int ret; + unsigned int nblocks, exports = 0; + const char *label; + int ret = 0; INIT_LIST_HEAD(&reserve_list); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "found no memory resource\n"); - return -EINVAL; - } - size = resource_size(res); - if (!devm_request_mem_region(&pdev->dev, - res->start, size, pdev->name)) { - dev_err(&pdev->dev, "could not request region for resource\n"); - return -EBUSY; - } - - virt_base = devm_ioremap_wc(&pdev->dev, res->start, size); - if (IS_ERR(virt_base)) - return PTR_ERR(virt_base); - - sram = devm_kzalloc(&pdev->dev, sizeof(*sram), GFP_KERNEL); - if (!sram) - return -ENOMEM; - - sram->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(sram->clk)) - sram->clk = NULL; - else - clk_prepare_enable(sram->clk); - - sram->pool = devm_gen_pool_create(&pdev->dev, ilog2(SRAM_GRANULARITY), -1); - if (!sram->pool) - return -ENOMEM; - /* * We need an additional block to mark the end of the memory region * after the reserved blocks from the dt are processed. */ nblocks = (np) ? of_get_available_child_count(np) + 1 : 1; - rblocks = kmalloc((nblocks) * sizeof(*rblocks), GFP_KERNEL); - if (!rblocks) { - ret = -ENOMEM; - goto err_alloc; - } + rblocks = kzalloc((nblocks) * sizeof(*rblocks), GFP_KERNEL); + if (!rblocks) + return -ENOMEM; block = &rblocks[0]; for_each_available_child_of_node(np, child) { @@ -117,14 +204,14 @@ static int sram_probe(struct platform_device *pdev) ret = of_address_to_resource(child, 0, &child_res); if (ret < 0) { - dev_err(&pdev->dev, + dev_err(sram->dev, "could not get address for node %s\n", child->full_name); goto err_chunks; } if (child_res.start < res->start || child_res.end > res->end) { - dev_err(&pdev->dev, + dev_err(sram->dev, "reserved block %s outside the sram area\n", child->full_name); ret = -EINVAL; @@ -135,12 +222,42 @@ static int sram_probe(struct platform_device *pdev) block->size = resource_size(&child_res); list_add_tail(&block->list, &reserve_list); - dev_dbg(&pdev->dev, "found reserved block 0x%x-0x%x\n", - block->start, - block->start + block->size); + if (of_find_property(child, "export", NULL)) + block->export = true; + + if (of_find_property(child, "pool", NULL)) + block->pool = true; + + if ((block->export || block->pool) && block->size) { + exports++; + + label = NULL; + ret = of_property_read_string(child, "label", &label); + if (ret && ret != -EINVAL) { + dev_err(sram->dev, + "%s has invalid label name\n", + child->full_name); + goto err_chunks; + } + if (!label) + label = child->name; + + block->label = devm_kstrdup(sram->dev, + label, GFP_KERNEL); + if (!block->label) + goto err_chunks; + + dev_dbg(sram->dev, "found %sblock '%s' 0x%x-0x%x\n", + block->export ? "exported " : "", block->label, + block->start, block->start + block->size); + } else { + dev_dbg(sram->dev, "found reserved block 0x%x-0x%x\n", + block->start, block->start + block->size); + } block++; } + child = NULL; /* the last chunk marks the end of the region */ rblocks[nblocks - 1].start = size; @@ -149,18 +266,37 @@ static int sram_probe(struct platform_device *pdev) list_sort(NULL, &reserve_list, sram_reserve_cmp); - cur_start = 0; + if (exports) { + sram->partition = devm_kzalloc(sram->dev, + exports * sizeof(*sram->partition), + GFP_KERNEL); + if (!sram->partition) { + ret = -ENOMEM; + goto err_chunks; + } + } + cur_start = 0; list_for_each_entry(block, &reserve_list, list) { /* can only happen if sections overlap */ if (block->start < cur_start) { - dev_err(&pdev->dev, + dev_err(sram->dev, "block at 0x%x starts after current offset 0x%lx\n", block->start, cur_start); ret = -EINVAL; + sram_free_partitions(sram); goto err_chunks; } + if ((block->export || block->pool) && block->size) { + ret = sram_add_partition(sram, block, + res->start + block->start); + if (ret) { + sram_free_partitions(sram); + goto err_chunks; + } + } + /* current start is in a reserved block, so continue after it */ if (block->start == cur_start) { cur_start = block->start + block->size; @@ -174,40 +310,91 @@ static int sram_probe(struct platform_device *pdev) */ cur_size = block->start - cur_start; - dev_dbg(&pdev->dev, "adding chunk 0x%lx-0x%lx\n", + dev_dbg(sram->dev, "adding chunk 0x%lx-0x%lx\n", cur_start, cur_start + cur_size); + ret = gen_pool_add_virt(sram->pool, - (unsigned long)virt_base + cur_start, + (unsigned long)sram->virt_base + cur_start, res->start + cur_start, cur_size, -1); - if (ret < 0) + if (ret < 0) { + sram_free_partitions(sram); goto err_chunks; + } /* next allocation after this reserved block */ cur_start = block->start + block->size; } + err_chunks: + if (child) + of_node_put(child); + kfree(rblocks); + return ret; +} + +static int sram_probe(struct platform_device *pdev) +{ + struct sram_dev *sram; + struct resource *res; + size_t size; + int ret; + + sram = devm_kzalloc(&pdev->dev, sizeof(*sram), GFP_KERNEL); + if (!sram) + return -ENOMEM; + + sram->dev = &pdev->dev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(sram->dev, "found no memory resource\n"); + return -EINVAL; + } + + size = resource_size(res); + + if (!devm_request_mem_region(sram->dev, res->start, size, pdev->name)) { + dev_err(sram->dev, "could not request region for resource\n"); + return -EBUSY; + } + + sram->virt_base = devm_ioremap_wc(sram->dev, res->start, size); + if (IS_ERR(sram->virt_base)) + return PTR_ERR(sram->virt_base); + + sram->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY), + NUMA_NO_NODE, NULL); + if (IS_ERR(sram->pool)) + return PTR_ERR(sram->pool); + + ret = sram_reserve_regions(sram, res); + if (ret) + return ret; + + sram->clk = devm_clk_get(sram->dev, NULL); + if (IS_ERR(sram->clk)) + sram->clk = NULL; + else + clk_prepare_enable(sram->clk); + platform_set_drvdata(pdev, sram); - dev_dbg(&pdev->dev, "SRAM pool: %ld KiB @ 0x%p\n", size / 1024, virt_base); + dev_dbg(sram->dev, "SRAM pool: %zu KiB @ 0x%p\n", + gen_pool_size(sram->pool) / 1024, sram->virt_base); return 0; - -err_chunks: - kfree(rblocks); -err_alloc: - if (sram->clk) - clk_disable_unprepare(sram->clk); - return ret; } static int sram_remove(struct platform_device *pdev) { struct sram_dev *sram = platform_get_drvdata(pdev); + sram_free_partitions(sram); + if (gen_pool_avail(sram->pool) < gen_pool_size(sram->pool)) - dev_dbg(&pdev->dev, "removed while SRAM allocated\n"); + dev_err(sram->dev, "removed while SRAM allocated\n"); if (sram->clk) clk_disable_unprepare(sram->clk); diff --git a/kernel/drivers/misc/ti-st/st_core.c b/kernel/drivers/misc/ti-st/st_core.c index c8c6a3630..6e3af8b42 100644 --- a/kernel/drivers/misc/ti-st/st_core.c +++ b/kernel/drivers/misc/ti-st/st_core.c @@ -460,6 +460,13 @@ static void st_int_enqueue(struct st_data_s *st_gdata, struct sk_buff *skb) * - TTY layer when write's finished * - st_write (in context of the protocol stack) */ +static void work_fn_write_wakeup(struct work_struct *work) +{ + struct st_data_s *st_gdata = container_of(work, struct st_data_s, + work_write_wakeup); + + st_tx_wakeup((void *)st_gdata); +} void st_tx_wakeup(struct st_data_s *st_data) { struct sk_buff *skb; @@ -812,8 +819,12 @@ static void st_tty_wakeup(struct tty_struct *tty) /* don't do an wakeup for now */ clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - /* call our internal wakeup */ - st_tx_wakeup((void *)st_gdata); + /* + * schedule the internal wakeup instead of calling directly to + * avoid lockup (port->lock needed in tty->ops->write is + * already taken here + */ + schedule_work(&st_gdata->work_write_wakeup); } static void st_tty_flush_buffer(struct tty_struct *tty) @@ -881,6 +892,9 @@ int st_core_init(struct st_data_s **core_data) pr_err("unable to un-register ldisc"); return err; } + + INIT_WORK(&st_gdata->work_write_wakeup, work_fn_write_wakeup); + *core_data = st_gdata; return 0; } diff --git a/kernel/drivers/misc/ti-st/st_kim.c b/kernel/drivers/misc/ti-st/st_kim.c index 18e7a0398..71b64550b 100644 --- a/kernel/drivers/misc/ti-st/st_kim.c +++ b/kernel/drivers/misc/ti-st/st_kim.c @@ -36,8 +36,6 @@ #include <linux/skbuff.h> #include <linux/ti_wilink_st.h> #include <linux/module.h> -#include <linux/of.h> -#include <linux/of_device.h> #define MAX_ST_DEVICES 3 /* Imagine 1 on each UART for now */ static struct platform_device *st_kim_devices[MAX_ST_DEVICES]; @@ -45,9 +43,6 @@ static struct platform_device *st_kim_devices[MAX_ST_DEVICES]; /**********************************************************************/ /* internal functions */ -struct ti_st_plat_data *dt_pdata; -static struct ti_st_plat_data *get_platform_data(struct device *dev); - /** * st_get_plat_device - * function which returns the reference to the platform device @@ -469,12 +464,7 @@ long st_kim_start(void *kim_data) struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data; pr_info(" %s", __func__); - if (kim_gdata->kim_pdev->dev.of_node) { - pr_debug("use device tree data"); - pdata = dt_pdata; - } else { - pdata = kim_gdata->kim_pdev->dev.platform_data; - } + pdata = kim_gdata->kim_pdev->dev.platform_data; do { /* platform specific enabling code here */ @@ -482,9 +472,9 @@ long st_kim_start(void *kim_data) pdata->chip_enable(kim_gdata); /* Configure BT nShutdown to HIGH state */ - gpio_set_value(kim_gdata->nshutdown, GPIO_LOW); + gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW); mdelay(5); /* FIXME: a proper toggle */ - gpio_set_value(kim_gdata->nshutdown, GPIO_HIGH); + gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_HIGH); mdelay(100); /* re-initialize the completion */ reinit_completion(&kim_gdata->ldisc_installed); @@ -534,18 +524,12 @@ long st_kim_stop(void *kim_data) { long err = 0; struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data; - struct ti_st_plat_data *pdata; + struct ti_st_plat_data *pdata = + kim_gdata->kim_pdev->dev.platform_data; struct tty_struct *tty = kim_gdata->core_data->tty; reinit_completion(&kim_gdata->ldisc_installed); - if (kim_gdata->kim_pdev->dev.of_node) { - pr_debug("use device tree data"); - pdata = dt_pdata; - } else - pdata = kim_gdata->kim_pdev->dev.platform_data; - - if (tty) { /* can be called before ldisc is installed */ /* Flush any pending characters in the driver and discipline. */ tty_ldisc_flush(tty); @@ -566,11 +550,11 @@ long st_kim_stop(void *kim_data) } /* By default configure BT nShutdown to LOW state */ - gpio_set_value(kim_gdata->nshutdown, GPIO_LOW); + gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW); mdelay(1); - gpio_set_value(kim_gdata->nshutdown, GPIO_HIGH); + gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_HIGH); mdelay(1); - gpio_set_value(kim_gdata->nshutdown, GPIO_LOW); + gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW); /* platform specific disable */ if (pdata->chip_disable) @@ -737,53 +721,13 @@ static const struct file_operations list_debugfs_fops = { * board-*.c file */ -static const struct of_device_id kim_of_match[] = { -{ - .compatible = "kim", - }, - {} -}; -MODULE_DEVICE_TABLE(of, kim_of_match); - -static struct ti_st_plat_data *get_platform_data(struct device *dev) -{ - struct device_node *np = dev->of_node; - const u32 *dt_property; - int len; - - dt_pdata = kzalloc(sizeof(*dt_pdata), GFP_KERNEL); - - if (!dt_pdata) - pr_err("Can't allocate device_tree platform data\n"); - - dt_property = of_get_property(np, "dev_name", &len); - if (dt_property) - memcpy(&dt_pdata->dev_name, dt_property, len); - of_property_read_u32(np, "nshutdown_gpio", - &dt_pdata->nshutdown_gpio); - of_property_read_u32(np, "flow_cntrl", &dt_pdata->flow_cntrl); - of_property_read_u32(np, "baud_rate", &dt_pdata->baud_rate); - - return dt_pdata; -} - static struct dentry *kim_debugfs_dir; static int kim_probe(struct platform_device *pdev) { struct kim_data_s *kim_gdata; - struct ti_st_plat_data *pdata; + struct ti_st_plat_data *pdata = pdev->dev.platform_data; int err; - if (pdev->dev.of_node) - pdata = get_platform_data(&pdev->dev); - else - pdata = pdev->dev.platform_data; - - if (pdata == NULL) { - dev_err(&pdev->dev, "Platform Data is missing\n"); - return -ENXIO; - } - if ((pdev->id != -1) && (pdev->id < MAX_ST_DEVICES)) { /* multiple devices could exist */ st_kim_devices[pdev->id] = pdev; @@ -864,16 +808,9 @@ err_core_init: static int kim_remove(struct platform_device *pdev) { /* free the GPIOs requested */ - struct ti_st_plat_data *pdata; + struct ti_st_plat_data *pdata = pdev->dev.platform_data; struct kim_data_s *kim_gdata; - if (pdev->dev.of_node) { - pr_debug("use device tree data"); - pdata = dt_pdata; - } else { - pdata = pdev->dev.platform_data; - } - kim_gdata = platform_get_drvdata(pdev); /* Free the Bluetooth/FM/GPIO @@ -891,22 +828,12 @@ static int kim_remove(struct platform_device *pdev) kfree(kim_gdata); kim_gdata = NULL; - kfree(dt_pdata); - dt_pdata = NULL; - return 0; } static int kim_suspend(struct platform_device *pdev, pm_message_t state) { - struct ti_st_plat_data *pdata; - - if (pdev->dev.of_node) { - pr_debug("use device tree data"); - pdata = dt_pdata; - } else { - pdata = pdev->dev.platform_data; - } + struct ti_st_plat_data *pdata = pdev->dev.platform_data; if (pdata->suspend) return pdata->suspend(pdev, state); @@ -916,14 +843,7 @@ static int kim_suspend(struct platform_device *pdev, pm_message_t state) static int kim_resume(struct platform_device *pdev) { - struct ti_st_plat_data *pdata; - - if (pdev->dev.of_node) { - pr_debug("use device tree data"); - pdata = dt_pdata; - } else { - pdata = pdev->dev.platform_data; - } + struct ti_st_plat_data *pdata = pdev->dev.platform_data; if (pdata->resume) return pdata->resume(pdev); @@ -940,8 +860,6 @@ static struct platform_driver kim_platform_driver = { .resume = kim_resume, .driver = { .name = "kim", - .owner = THIS_MODULE, - .of_match_table = of_match_ptr(kim_of_match), }, }; diff --git a/kernel/drivers/misc/ti-st/st_ll.c b/kernel/drivers/misc/ti-st/st_ll.c index 518e1b7f2..93b4d67cc 100644 --- a/kernel/drivers/misc/ti-st/st_ll.c +++ b/kernel/drivers/misc/ti-st/st_ll.c @@ -26,7 +26,6 @@ #include <linux/ti_wilink_st.h> /**********************************************************************/ - /* internal functions */ static void send_ll_cmd(struct st_data_s *st_data, unsigned char cmd) @@ -54,13 +53,7 @@ static void ll_device_want_to_sleep(struct st_data_s *st_data) /* communicate to platform about chip asleep */ kim_data = st_data->kim_data; - if (kim_data->kim_pdev->dev.of_node) { - pr_debug("use device tree data"); - pdata = dt_pdata; - } else { - pdata = kim_data->kim_pdev->dev.platform_data; - } - + pdata = kim_data->kim_pdev->dev.platform_data; if (pdata->chip_asleep) pdata->chip_asleep(NULL); } @@ -93,13 +86,7 @@ static void ll_device_want_to_wakeup(struct st_data_s *st_data) /* communicate to platform about chip wakeup */ kim_data = st_data->kim_data; - if (kim_data->kim_pdev->dev.of_node) { - pr_debug("use device tree data"); - pdata = dt_pdata; - } else { - pdata = kim_data->kim_pdev->dev.platform_data; - } - + pdata = kim_data->kim_pdev->dev.platform_data; if (pdata->chip_awake) pdata->chip_awake(NULL); } diff --git a/kernel/drivers/misc/ti_dac7512.c b/kernel/drivers/misc/ti_dac7512.c index cb0289b44..f5456fb7d 100644 --- a/kernel/drivers/misc/ti_dac7512.c +++ b/kernel/drivers/misc/ti_dac7512.c @@ -89,7 +89,6 @@ MODULE_DEVICE_TABLE(of, dac7512_of_match); static struct spi_driver dac7512_driver = { .driver = { .name = "dac7512", - .owner = THIS_MODULE, .of_match_table = of_match_ptr(dac7512_of_match), }, .probe = dac7512_probe, diff --git a/kernel/drivers/misc/tsl2550.c b/kernel/drivers/misc/tsl2550.c index b00335652..87a13374f 100644 --- a/kernel/drivers/misc/tsl2550.c +++ b/kernel/drivers/misc/tsl2550.c @@ -446,7 +446,6 @@ MODULE_DEVICE_TABLE(i2c, tsl2550_id); static struct i2c_driver tsl2550_driver = { .driver = { .name = TSL2550_DRV_NAME, - .owner = THIS_MODULE, .pm = TSL2550_PM_OPS, }, .probe = tsl2550_probe, diff --git a/kernel/drivers/misc/vmw_balloon.c b/kernel/drivers/misc/vmw_balloon.c index 191617492..1e688bfec 100644 --- a/kernel/drivers/misc/vmw_balloon.c +++ b/kernel/drivers/misc/vmw_balloon.c @@ -1,7 +1,7 @@ /* * VMware Balloon driver. * - * Copyright (C) 2000-2010, VMware, Inc. All Rights Reserved. + * Copyright (C) 2000-2014, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -37,16 +37,19 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/vmalloc.h> #include <linux/sched.h> #include <linux/module.h> #include <linux/workqueue.h> #include <linux/debugfs.h> #include <linux/seq_file.h> +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> #include <asm/hypervisor.h> MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver"); -MODULE_VERSION("1.2.1.3-k"); +MODULE_VERSION("1.5.0.0-k"); MODULE_ALIAS("dmi:*:svnVMware*:*"); MODULE_ALIAS("vmware_vmmemctl"); MODULE_LICENSE("GPL"); @@ -57,12 +60,6 @@ MODULE_LICENSE("GPL"); */ /* - * Rate of allocating memory when there is no memory pressure - * (driver performs non-sleeping allocations). - */ -#define VMW_BALLOON_NOSLEEP_ALLOC_MAX 16384U - -/* * Rates of memory allocaton when guest experiences memory pressure * (driver performs sleeping allocations). */ @@ -71,13 +68,6 @@ MODULE_LICENSE("GPL"); #define VMW_BALLOON_RATE_ALLOC_INC 16U /* - * Rates for releasing pages while deflating balloon. - */ -#define VMW_BALLOON_RATE_FREE_MIN 512U -#define VMW_BALLOON_RATE_FREE_MAX 16384U -#define VMW_BALLOON_RATE_FREE_INC 16U - -/* * When guest is under memory pressure, use a reduced page allocation * rate for next several cycles. */ @@ -85,7 +75,7 @@ MODULE_LICENSE("GPL"); /* * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't - * allow wait (__GFP_WAIT) for NOSLEEP page allocations. Use + * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use * __GFP_NOWARN, to suppress page allocation failure warnings. */ #define VMW_PAGE_ALLOC_NOSLEEP (__GFP_HIGHMEM|__GFP_NOWARN) @@ -99,9 +89,6 @@ MODULE_LICENSE("GPL"); */ #define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER) -/* Maximum number of page allocations without yielding processor */ -#define VMW_BALLOON_YIELD_THRESHOLD 1024 - /* Maximum number of refused pages we accumulate during inflation cycle */ #define VMW_BALLOON_MAX_REFUSED 16 @@ -110,68 +97,154 @@ MODULE_LICENSE("GPL"); */ #define VMW_BALLOON_HV_PORT 0x5670 #define VMW_BALLOON_HV_MAGIC 0x456c6d6f -#define VMW_BALLOON_PROTOCOL_VERSION 2 #define VMW_BALLOON_GUEST_ID 1 /* Linux */ -#define VMW_BALLOON_CMD_START 0 -#define VMW_BALLOON_CMD_GET_TARGET 1 -#define VMW_BALLOON_CMD_LOCK 2 -#define VMW_BALLOON_CMD_UNLOCK 3 -#define VMW_BALLOON_CMD_GUEST_ID 4 +enum vmwballoon_capabilities { + /* + * Bit 0 is reserved and not associated to any capability. + */ + VMW_BALLOON_BASIC_CMDS = (1 << 1), + VMW_BALLOON_BATCHED_CMDS = (1 << 2), + VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3), + VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4), +}; + +#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \ + | VMW_BALLOON_BATCHED_CMDS \ + | VMW_BALLOON_BATCHED_2M_CMDS \ + | VMW_BALLOON_SIGNALLED_WAKEUP_CMD) + +#define VMW_BALLOON_2M_SHIFT (9) +#define VMW_BALLOON_NUM_PAGE_SIZES (2) + +/* + * Backdoor commands availability: + * + * START, GET_TARGET and GUEST_ID are always available, + * + * VMW_BALLOON_BASIC_CMDS: + * LOCK and UNLOCK commands, + * VMW_BALLOON_BATCHED_CMDS: + * BATCHED_LOCK and BATCHED_UNLOCK commands. + * VMW BALLOON_BATCHED_2M_CMDS: + * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands, + * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD: + * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command. + */ +#define VMW_BALLOON_CMD_START 0 +#define VMW_BALLOON_CMD_GET_TARGET 1 +#define VMW_BALLOON_CMD_LOCK 2 +#define VMW_BALLOON_CMD_UNLOCK 3 +#define VMW_BALLOON_CMD_GUEST_ID 4 +#define VMW_BALLOON_CMD_BATCHED_LOCK 6 +#define VMW_BALLOON_CMD_BATCHED_UNLOCK 7 +#define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8 +#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9 +#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET 10 + /* error codes */ -#define VMW_BALLOON_SUCCESS 0 -#define VMW_BALLOON_FAILURE -1 -#define VMW_BALLOON_ERROR_CMD_INVALID 1 -#define VMW_BALLOON_ERROR_PPN_INVALID 2 -#define VMW_BALLOON_ERROR_PPN_LOCKED 3 -#define VMW_BALLOON_ERROR_PPN_UNLOCKED 4 -#define VMW_BALLOON_ERROR_PPN_PINNED 5 -#define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6 -#define VMW_BALLOON_ERROR_RESET 7 -#define VMW_BALLOON_ERROR_BUSY 8 - -#define VMWARE_BALLOON_CMD(cmd, data, result) \ -({ \ - unsigned long __stat, __dummy1, __dummy2; \ - __asm__ __volatile__ ("inl %%dx" : \ - "=a"(__stat), \ - "=c"(__dummy1), \ - "=d"(__dummy2), \ - "=b"(result) : \ - "0"(VMW_BALLOON_HV_MAGIC), \ - "1"(VMW_BALLOON_CMD_##cmd), \ - "2"(VMW_BALLOON_HV_PORT), \ - "3"(data) : \ - "memory"); \ - result &= -1UL; \ - __stat & -1UL; \ +#define VMW_BALLOON_SUCCESS 0 +#define VMW_BALLOON_FAILURE -1 +#define VMW_BALLOON_ERROR_CMD_INVALID 1 +#define VMW_BALLOON_ERROR_PPN_INVALID 2 +#define VMW_BALLOON_ERROR_PPN_LOCKED 3 +#define VMW_BALLOON_ERROR_PPN_UNLOCKED 4 +#define VMW_BALLOON_ERROR_PPN_PINNED 5 +#define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6 +#define VMW_BALLOON_ERROR_RESET 7 +#define VMW_BALLOON_ERROR_BUSY 8 + +#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000) + +/* Batch page description */ + +/* + * Layout of a page in the batch page: + * + * +-------------+----------+--------+ + * | | | | + * | Page number | Reserved | Status | + * | | | | + * +-------------+----------+--------+ + * 64 PAGE_SHIFT 6 0 + * + * The reserved field should be set to 0. + */ +#define VMW_BALLOON_BATCH_MAX_PAGES (PAGE_SIZE / sizeof(u64)) +#define VMW_BALLOON_BATCH_STATUS_MASK ((1UL << 5) - 1) +#define VMW_BALLOON_BATCH_PAGE_MASK (~((1UL << PAGE_SHIFT) - 1)) + +struct vmballoon_batch_page { + u64 pages[VMW_BALLOON_BATCH_MAX_PAGES]; +}; + +static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx) +{ + return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK; +} + +static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch, + int idx) +{ + return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK); +} + +static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx, + u64 pa) +{ + batch->pages[idx] = pa; +} + + +#define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result) \ +({ \ + unsigned long __status, __dummy1, __dummy2, __dummy3; \ + __asm__ __volatile__ ("inl %%dx" : \ + "=a"(__status), \ + "=c"(__dummy1), \ + "=d"(__dummy2), \ + "=b"(result), \ + "=S" (__dummy3) : \ + "0"(VMW_BALLOON_HV_MAGIC), \ + "1"(VMW_BALLOON_CMD_##cmd), \ + "2"(VMW_BALLOON_HV_PORT), \ + "3"(arg1), \ + "4" (arg2) : \ + "memory"); \ + if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START) \ + result = __dummy1; \ + result &= -1UL; \ + __status & -1UL; \ }) #ifdef CONFIG_DEBUG_FS struct vmballoon_stats { unsigned int timer; + unsigned int doorbell; /* allocation statistics */ - unsigned int alloc; - unsigned int alloc_fail; + unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES]; unsigned int sleep_alloc; unsigned int sleep_alloc_fail; - unsigned int refused_alloc; - unsigned int refused_free; - unsigned int free; + unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES]; /* monitor operations */ - unsigned int lock; - unsigned int lock_fail; - unsigned int unlock; - unsigned int unlock_fail; + unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES]; unsigned int target; unsigned int target_fail; unsigned int start; unsigned int start_fail; unsigned int guest_type; unsigned int guest_type_fail; + unsigned int doorbell_set; + unsigned int doorbell_unset; }; #define STATS_INC(stat) (stat)++ @@ -179,14 +252,30 @@ struct vmballoon_stats { #define STATS_INC(stat) #endif -struct vmballoon { +struct vmballoon; +struct vmballoon_ops { + void (*add_page)(struct vmballoon *b, int idx, struct page *p); + int (*lock)(struct vmballoon *b, unsigned int num_pages, + bool is_2m_pages, unsigned int *target); + int (*unlock)(struct vmballoon *b, unsigned int num_pages, + bool is_2m_pages, unsigned int *target); +}; + +struct vmballoon_page_size { /* list of reserved physical pages */ struct list_head pages; /* transient list of non-balloonable pages */ struct list_head refused_pages; unsigned int n_refused_pages; +}; + +struct vmballoon { + struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES]; + + /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */ + unsigned supported_page_sizes; /* balloon size in pages */ unsigned int size; @@ -197,11 +286,18 @@ struct vmballoon { /* adjustment rates (pages per second) */ unsigned int rate_alloc; - unsigned int rate_free; /* slowdown page allocations for next few cycles */ unsigned int slow_allocation_cycles; + unsigned long capabilities; + + struct vmballoon_batch_page *batch_page; + unsigned int batch_max_pages; + struct page *page; + + const struct vmballoon_ops *ops; + #ifdef CONFIG_DEBUG_FS /* statistics */ struct vmballoon_stats stats; @@ -213,6 +309,8 @@ struct vmballoon { struct sysinfo sysinfo; struct delayed_work dwork; + + struct vmci_handle vmci_doorbell; }; static struct vmballoon balloon; @@ -221,19 +319,38 @@ static struct vmballoon balloon; * Send "start" command to the host, communicating supported version * of the protocol. */ -static bool vmballoon_send_start(struct vmballoon *b) +static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps) { - unsigned long status, dummy; + unsigned long status, capabilities, dummy = 0; + bool success; STATS_INC(b->stats.start); - status = VMWARE_BALLOON_CMD(START, VMW_BALLOON_PROTOCOL_VERSION, dummy); - if (status == VMW_BALLOON_SUCCESS) - return true; + status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities); - pr_debug("%s - failed, hv returns %ld\n", __func__, status); - STATS_INC(b->stats.start_fail); - return false; + switch (status) { + case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES: + b->capabilities = capabilities; + success = true; + break; + case VMW_BALLOON_SUCCESS: + b->capabilities = VMW_BALLOON_BASIC_CMDS; + success = true; + break; + default: + success = false; + } + + if (b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) + b->supported_page_sizes = 2; + else + b->supported_page_sizes = 1; + + if (!success) { + pr_debug("%s - failed, hv returns %ld\n", __func__, status); + STATS_INC(b->stats.start_fail); + } + return success; } static bool vmballoon_check_status(struct vmballoon *b, unsigned long status) @@ -259,9 +376,10 @@ static bool vmballoon_check_status(struct vmballoon *b, unsigned long status) */ static bool vmballoon_send_guest_id(struct vmballoon *b) { - unsigned long status, dummy; + unsigned long status, dummy = 0; - status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy); + status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy, + dummy); STATS_INC(b->stats.guest_type); @@ -273,6 +391,14 @@ static bool vmballoon_send_guest_id(struct vmballoon *b) return false; } +static u16 vmballoon_page_size(bool is_2m_page) +{ + if (is_2m_page) + return 1 << VMW_BALLOON_2M_SHIFT; + + return 1; +} + /* * Retrieve desired balloon size from the host. */ @@ -281,6 +407,7 @@ static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target) unsigned long status; unsigned long target; unsigned long limit; + unsigned long dummy = 0; u32 limit32; /* @@ -299,7 +426,7 @@ static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target) /* update stats */ STATS_INC(b->stats.target); - status = VMWARE_BALLOON_CMD(GET_TARGET, limit, target); + status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target); if (vmballoon_check_status(b, status)) { *new_target = target; return true; @@ -316,23 +443,46 @@ static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target) * check the return value and maybe submit a different page. */ static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn, - unsigned int *hv_status) + unsigned int *hv_status, unsigned int *target) { - unsigned long status, dummy; + unsigned long status, dummy = 0; u32 pfn32; pfn32 = (u32)pfn; if (pfn32 != pfn) return -1; - STATS_INC(b->stats.lock); + STATS_INC(b->stats.lock[false]); - *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy); + *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target); if (vmballoon_check_status(b, status)) return 0; pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); - STATS_INC(b->stats.lock_fail); + STATS_INC(b->stats.lock_fail[false]); + return 1; +} + +static int vmballoon_send_batched_lock(struct vmballoon *b, + unsigned int num_pages, bool is_2m_pages, unsigned int *target) +{ + unsigned long status; + unsigned long pfn = page_to_pfn(b->page); + + STATS_INC(b->stats.lock[is_2m_pages]); + + if (is_2m_pages) + status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages, + *target); + else + status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages, + *target); + + if (vmballoon_check_status(b, status)) + return 0; + + pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status); + STATS_INC(b->stats.lock_fail[is_2m_pages]); return 1; } @@ -340,26 +490,66 @@ static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn, * Notify the host that guest intends to release given page back into * the pool of available (to the guest) pages. */ -static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn) +static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn, + unsigned int *target) { - unsigned long status, dummy; + unsigned long status, dummy = 0; u32 pfn32; pfn32 = (u32)pfn; if (pfn32 != pfn) return false; - STATS_INC(b->stats.unlock); + STATS_INC(b->stats.unlock[false]); - status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy); + status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target); if (vmballoon_check_status(b, status)) return true; pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); - STATS_INC(b->stats.unlock_fail); + STATS_INC(b->stats.unlock_fail[false]); + return false; +} + +static bool vmballoon_send_batched_unlock(struct vmballoon *b, + unsigned int num_pages, bool is_2m_pages, unsigned int *target) +{ + unsigned long status; + unsigned long pfn = page_to_pfn(b->page); + + STATS_INC(b->stats.unlock[is_2m_pages]); + + if (is_2m_pages) + status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages, + *target); + else + status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages, + *target); + + if (vmballoon_check_status(b, status)) + return true; + + pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status); + STATS_INC(b->stats.unlock_fail[is_2m_pages]); return false; } +static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page) +{ + if (is_2m_page) + return alloc_pages(flags, VMW_BALLOON_2M_SHIFT); + + return alloc_page(flags); +} + +static void vmballoon_free_page(struct page *page, bool is_2m_page) +{ + if (is_2m_page) + __free_pages(page, VMW_BALLOON_2M_SHIFT); + else + __free_page(page); +} + /* * Quickly release all pages allocated for the balloon. This function is * called when host decides to "reset" balloon for one reason or another. @@ -369,91 +559,75 @@ static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn) static void vmballoon_pop(struct vmballoon *b) { struct page *page, *next; - unsigned int count = 0; - - list_for_each_entry_safe(page, next, &b->pages, lru) { - list_del(&page->lru); - __free_page(page); - STATS_INC(b->stats.free); - b->size--; - - if (++count >= b->rate_free) { - count = 0; + unsigned is_2m_pages; + + for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; + is_2m_pages++) { + struct vmballoon_page_size *page_size = + &b->page_sizes[is_2m_pages]; + u16 size_per_page = vmballoon_page_size(is_2m_pages); + + list_for_each_entry_safe(page, next, &page_size->pages, lru) { + list_del(&page->lru); + vmballoon_free_page(page, is_2m_pages); + STATS_INC(b->stats.free[is_2m_pages]); + b->size -= size_per_page; cond_resched(); } } -} -/* - * Perform standard reset sequence by popping the balloon (in case it - * is not empty) and then restarting protocol. This operation normally - * happens when host responds with VMW_BALLOON_ERROR_RESET to a command. - */ -static void vmballoon_reset(struct vmballoon *b) -{ - /* free all pages, skipping monitor unlock */ - vmballoon_pop(b); + if (b->batch_page) { + vunmap(b->batch_page); + b->batch_page = NULL; + } - if (vmballoon_send_start(b)) { - b->reset_required = false; - if (!vmballoon_send_guest_id(b)) - pr_err("failed to send guest ID to the host\n"); + if (b->page) { + __free_page(b->page); + b->page = NULL; } } /* - * Allocate (or reserve) a page for the balloon and notify the host. If host - * refuses the page put it on "refuse" list and allocate another one until host - * is satisfied. "Refused" pages are released at the end of inflation cycle - * (when we allocate b->rate_alloc pages). + * Notify the host of a ballooned page. If host rejects the page put it on the + * refuse list, those refused page are then released at the end of the + * inflation cycle. */ -static int vmballoon_reserve_page(struct vmballoon *b, bool can_sleep) +static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages, + bool is_2m_pages, unsigned int *target) { - struct page *page; - gfp_t flags; - unsigned int hv_status; - int locked; - flags = can_sleep ? VMW_PAGE_ALLOC_CANSLEEP : VMW_PAGE_ALLOC_NOSLEEP; - - do { - if (!can_sleep) - STATS_INC(b->stats.alloc); - else - STATS_INC(b->stats.sleep_alloc); + int locked, hv_status; + struct page *page = b->page; + struct vmballoon_page_size *page_size = &b->page_sizes[false]; - page = alloc_page(flags); - if (!page) { - if (!can_sleep) - STATS_INC(b->stats.alloc_fail); - else - STATS_INC(b->stats.sleep_alloc_fail); - return -ENOMEM; - } + /* is_2m_pages can never happen as 2m pages support implies batching */ - /* inform monitor */ - locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status); - if (locked > 0) { - STATS_INC(b->stats.refused_alloc); + locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status, + target); + if (locked > 0) { + STATS_INC(b->stats.refused_alloc[false]); - if (hv_status == VMW_BALLOON_ERROR_RESET || - hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) { - __free_page(page); - return -EIO; - } + if (hv_status == VMW_BALLOON_ERROR_RESET || + hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) { + vmballoon_free_page(page, false); + return -EIO; + } - /* - * Place page on the list of non-balloonable pages - * and retry allocation, unless we already accumulated - * too many of them, in which case take a breather. - */ - list_add(&page->lru, &b->refused_pages); - if (++b->n_refused_pages >= VMW_BALLOON_MAX_REFUSED) - return -EIO; + /* + * Place page on the list of non-balloonable pages + * and retry allocation, unless we already accumulated + * too many of them, in which case take a breather. + */ + if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) { + page_size->n_refused_pages++; + list_add(&page->lru, &page_size->refused_pages); + } else { + vmballoon_free_page(page, false); } - } while (locked != 0); + return -EIO; + } /* track allocated page */ - list_add(&page->lru, &b->pages); + list_add(&page->lru, &page_size->pages); /* update balloon size */ b->size++; @@ -461,21 +635,81 @@ static int vmballoon_reserve_page(struct vmballoon *b, bool can_sleep) return 0; } +static int vmballoon_lock_batched_page(struct vmballoon *b, + unsigned int num_pages, bool is_2m_pages, unsigned int *target) +{ + int locked, i; + u16 size_per_page = vmballoon_page_size(is_2m_pages); + + locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages, + target); + if (locked > 0) { + for (i = 0; i < num_pages; i++) { + u64 pa = vmballoon_batch_get_pa(b->batch_page, i); + struct page *p = pfn_to_page(pa >> PAGE_SHIFT); + + vmballoon_free_page(p, is_2m_pages); + } + + return -EIO; + } + + for (i = 0; i < num_pages; i++) { + u64 pa = vmballoon_batch_get_pa(b->batch_page, i); + struct page *p = pfn_to_page(pa >> PAGE_SHIFT); + struct vmballoon_page_size *page_size = + &b->page_sizes[is_2m_pages]; + + locked = vmballoon_batch_get_status(b->batch_page, i); + + switch (locked) { + case VMW_BALLOON_SUCCESS: + list_add(&p->lru, &page_size->pages); + b->size += size_per_page; + break; + case VMW_BALLOON_ERROR_PPN_PINNED: + case VMW_BALLOON_ERROR_PPN_INVALID: + if (page_size->n_refused_pages + < VMW_BALLOON_MAX_REFUSED) { + list_add(&p->lru, &page_size->refused_pages); + page_size->n_refused_pages++; + break; + } + /* Fallthrough */ + case VMW_BALLOON_ERROR_RESET: + case VMW_BALLOON_ERROR_PPN_NOTNEEDED: + vmballoon_free_page(p, is_2m_pages); + break; + default: + /* This should never happen */ + WARN_ON_ONCE(true); + } + } + + return 0; +} + /* * Release the page allocated for the balloon. Note that we first notify * the host so it can make sure the page will be available for the guest * to use, if needed. */ -static int vmballoon_release_page(struct vmballoon *b, struct page *page) +static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages, + bool is_2m_pages, unsigned int *target) { - if (!vmballoon_send_unlock_page(b, page_to_pfn(page))) - return -EIO; + struct page *page = b->page; + struct vmballoon_page_size *page_size = &b->page_sizes[false]; - list_del(&page->lru); + /* is_2m_pages can never happen as 2m pages support implies batching */ + + if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) { + list_add(&page->lru, &page_size->pages); + return -EIO; + } /* deallocate page */ - __free_page(page); - STATS_INC(b->stats.free); + vmballoon_free_page(page, false); + STATS_INC(b->stats.free[false]); /* update balloon size */ b->size--; @@ -483,21 +717,76 @@ static int vmballoon_release_page(struct vmballoon *b, struct page *page) return 0; } +static int vmballoon_unlock_batched_page(struct vmballoon *b, + unsigned int num_pages, bool is_2m_pages, + unsigned int *target) +{ + int locked, i, ret = 0; + bool hv_success; + u16 size_per_page = vmballoon_page_size(is_2m_pages); + + hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages, + target); + if (!hv_success) + ret = -EIO; + + for (i = 0; i < num_pages; i++) { + u64 pa = vmballoon_batch_get_pa(b->batch_page, i); + struct page *p = pfn_to_page(pa >> PAGE_SHIFT); + struct vmballoon_page_size *page_size = + &b->page_sizes[is_2m_pages]; + + locked = vmballoon_batch_get_status(b->batch_page, i); + if (!hv_success || locked != VMW_BALLOON_SUCCESS) { + /* + * That page wasn't successfully unlocked by the + * hypervisor, re-add it to the list of pages owned by + * the balloon driver. + */ + list_add(&p->lru, &page_size->pages); + } else { + /* deallocate page */ + vmballoon_free_page(p, is_2m_pages); + STATS_INC(b->stats.free[is_2m_pages]); + + /* update balloon size */ + b->size -= size_per_page; + } + } + + return ret; +} + /* * Release pages that were allocated while attempting to inflate the * balloon but were refused by the host for one reason or another. */ -static void vmballoon_release_refused_pages(struct vmballoon *b) +static void vmballoon_release_refused_pages(struct vmballoon *b, + bool is_2m_pages) { struct page *page, *next; + struct vmballoon_page_size *page_size = + &b->page_sizes[is_2m_pages]; - list_for_each_entry_safe(page, next, &b->refused_pages, lru) { + list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) { list_del(&page->lru); - __free_page(page); - STATS_INC(b->stats.refused_free); + vmballoon_free_page(page, is_2m_pages); + STATS_INC(b->stats.refused_free[is_2m_pages]); } - b->n_refused_pages = 0; + page_size->n_refused_pages = 0; +} + +static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p) +{ + b->page = p; +} + +static void vmballoon_add_batched_page(struct vmballoon *b, int idx, + struct page *p) +{ + vmballoon_batch_set_pa(b->batch_page, idx, + (u64)page_to_pfn(p) << PAGE_SHIFT); } /* @@ -507,12 +796,12 @@ static void vmballoon_release_refused_pages(struct vmballoon *b) */ static void vmballoon_inflate(struct vmballoon *b) { - unsigned int goal; - unsigned int rate; - unsigned int i; + unsigned rate; unsigned int allocations = 0; + unsigned int num_pages = 0; int error = 0; - bool alloc_can_sleep = false; + gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP; + bool is_2m_pages; pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); @@ -531,31 +820,51 @@ static void vmballoon_inflate(struct vmballoon *b) * slowdown page allocations considerably. */ - goal = b->target - b->size; /* * Start with no sleep allocation rate which may be higher * than sleeping allocation rate. */ - rate = b->slow_allocation_cycles ? - b->rate_alloc : VMW_BALLOON_NOSLEEP_ALLOC_MAX; + if (b->slow_allocation_cycles) { + rate = b->rate_alloc; + is_2m_pages = false; + } else { + rate = UINT_MAX; + is_2m_pages = + b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES; + } + + pr_debug("%s - goal: %d, no-sleep rate: %u, sleep rate: %d\n", + __func__, b->target - b->size, rate, b->rate_alloc); + + while (!b->reset_required && + b->size + num_pages * vmballoon_page_size(is_2m_pages) + < b->target) { + struct page *page; - pr_debug("%s - goal: %d, no-sleep rate: %d, sleep rate: %d\n", - __func__, goal, rate, b->rate_alloc); + if (flags == VMW_PAGE_ALLOC_NOSLEEP) + STATS_INC(b->stats.alloc[is_2m_pages]); + else + STATS_INC(b->stats.sleep_alloc); + + page = vmballoon_alloc_page(flags, is_2m_pages); + if (!page) { + STATS_INC(b->stats.alloc_fail[is_2m_pages]); - for (i = 0; i < goal; i++) { + if (is_2m_pages) { + b->ops->lock(b, num_pages, true, &b->target); - error = vmballoon_reserve_page(b, alloc_can_sleep); - if (error) { - if (error != -ENOMEM) { /* - * Not a page allocation failure, stop this - * cycle. Maybe we'll get new target from - * the host soon. + * ignore errors from locking as we now switch + * to 4k pages and we might get different + * errors. */ - break; + + num_pages = 0; + is_2m_pages = false; + continue; } - if (alloc_can_sleep) { + if (flags == VMW_PAGE_ALLOC_CANSLEEP) { /* * CANSLEEP page allocation failed, so guest * is under severe memory pressure. Quickly @@ -563,6 +872,7 @@ static void vmballoon_inflate(struct vmballoon *b) */ b->rate_alloc = max(b->rate_alloc / 2, VMW_BALLOON_RATE_ALLOC_MIN); + STATS_INC(b->stats.sleep_alloc_fail); break; } @@ -576,38 +886,49 @@ static void vmballoon_inflate(struct vmballoon *b) */ b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES; - if (i >= b->rate_alloc) + if (allocations >= b->rate_alloc) break; - alloc_can_sleep = true; + flags = VMW_PAGE_ALLOC_CANSLEEP; /* Lower rate for sleeping allocations. */ rate = b->rate_alloc; + continue; } - if (++allocations > VMW_BALLOON_YIELD_THRESHOLD) { - cond_resched(); - allocations = 0; + b->ops->add_page(b, num_pages++, page); + if (num_pages == b->batch_max_pages) { + error = b->ops->lock(b, num_pages, is_2m_pages, + &b->target); + num_pages = 0; + if (error) + break; } - if (i >= rate) { + cond_resched(); + + if (allocations >= rate) { /* We allocated enough pages, let's take a break. */ break; } } + if (num_pages > 0) + b->ops->lock(b, num_pages, is_2m_pages, &b->target); + /* * We reached our goal without failures so try increasing * allocation rate. */ - if (error == 0 && i >= b->rate_alloc) { - unsigned int mult = i / b->rate_alloc; + if (error == 0 && allocations >= b->rate_alloc) { + unsigned int mult = allocations / b->rate_alloc; b->rate_alloc = min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC, VMW_BALLOON_RATE_ALLOC_MAX); } - vmballoon_release_refused_pages(b); + vmballoon_release_refused_pages(b, true); + vmballoon_release_refused_pages(b, false); } /* @@ -615,35 +936,176 @@ static void vmballoon_inflate(struct vmballoon *b) */ static void vmballoon_deflate(struct vmballoon *b) { - struct page *page, *next; - unsigned int i = 0; - unsigned int goal; - int error; + unsigned is_2m_pages; pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); - /* limit deallocation rate */ - goal = min(b->size - b->target, b->rate_free); + /* free pages to reach target */ + for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes; + is_2m_pages++) { + struct page *page, *next; + unsigned int num_pages = 0; + struct vmballoon_page_size *page_size = + &b->page_sizes[is_2m_pages]; + + list_for_each_entry_safe(page, next, &page_size->pages, lru) { + if (b->reset_required || + (b->target > 0 && + b->size - num_pages + * vmballoon_page_size(is_2m_pages) + < b->target + vmballoon_page_size(true))) + break; - pr_debug("%s - goal: %d, rate: %d\n", __func__, goal, b->rate_free); + list_del(&page->lru); + b->ops->add_page(b, num_pages++, page); - /* free pages to reach target */ - list_for_each_entry_safe(page, next, &b->pages, lru) { - error = vmballoon_release_page(b, page); - if (error) { - /* quickly decrease rate in case of error */ - b->rate_free = max(b->rate_free / 2, - VMW_BALLOON_RATE_FREE_MIN); - return; + if (num_pages == b->batch_max_pages) { + int error; + + error = b->ops->unlock(b, num_pages, + is_2m_pages, &b->target); + num_pages = 0; + if (error) + return; + } + + cond_resched(); } - if (++i >= goal) - break; + if (num_pages > 0) + b->ops->unlock(b, num_pages, is_2m_pages, &b->target); + } +} + +static const struct vmballoon_ops vmballoon_basic_ops = { + .add_page = vmballoon_add_page, + .lock = vmballoon_lock_page, + .unlock = vmballoon_unlock_page +}; + +static const struct vmballoon_ops vmballoon_batched_ops = { + .add_page = vmballoon_add_batched_page, + .lock = vmballoon_lock_batched_page, + .unlock = vmballoon_unlock_batched_page +}; + +static bool vmballoon_init_batching(struct vmballoon *b) +{ + b->page = alloc_page(VMW_PAGE_ALLOC_NOSLEEP); + if (!b->page) + return false; + + b->batch_page = vmap(&b->page, 1, VM_MAP, PAGE_KERNEL); + if (!b->batch_page) { + __free_page(b->page); + return false; + } + + return true; +} + +/* + * Receive notification and resize balloon + */ +static void vmballoon_doorbell(void *client_data) +{ + struct vmballoon *b = client_data; + + STATS_INC(b->stats.doorbell); + + mod_delayed_work(system_freezable_wq, &b->dwork, 0); +} + +/* + * Clean up vmci doorbell + */ +static void vmballoon_vmci_cleanup(struct vmballoon *b) +{ + int error; + + VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID, + VMCI_INVALID_ID, error); + STATS_INC(b->stats.doorbell_unset); + + if (!vmci_handle_is_invalid(b->vmci_doorbell)) { + vmci_doorbell_destroy(b->vmci_doorbell); + b->vmci_doorbell = VMCI_INVALID_HANDLE; + } +} + +/* + * Initialize vmci doorbell, to get notified as soon as balloon changes + */ +static int vmballoon_vmci_init(struct vmballoon *b) +{ + int error = 0; + + if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) != 0) { + error = vmci_doorbell_create(&b->vmci_doorbell, + VMCI_FLAG_DELAYED_CB, + VMCI_PRIVILEGE_FLAG_RESTRICTED, + vmballoon_doorbell, b); + + if (error == VMCI_SUCCESS) { + VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, + b->vmci_doorbell.context, + b->vmci_doorbell.resource, error); + STATS_INC(b->stats.doorbell_set); + } + } + + if (error != 0) { + vmballoon_vmci_cleanup(b); + + return -EIO; } - /* slowly increase rate if there were no errors */ - b->rate_free = min(b->rate_free + VMW_BALLOON_RATE_FREE_INC, - VMW_BALLOON_RATE_FREE_MAX); + return 0; +} + +/* + * Perform standard reset sequence by popping the balloon (in case it + * is not empty) and then restarting protocol. This operation normally + * happens when host responds with VMW_BALLOON_ERROR_RESET to a command. + */ +static void vmballoon_reset(struct vmballoon *b) +{ + int error; + + vmballoon_vmci_cleanup(b); + + /* free all pages, skipping monitor unlock */ + vmballoon_pop(b); + + if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES)) + return; + + if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) { + b->ops = &vmballoon_batched_ops; + b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES; + if (!vmballoon_init_batching(b)) { + /* + * We failed to initialize batching, inform the monitor + * about it by sending a null capability. + * + * The guest will retry in one second. + */ + vmballoon_send_start(b, 0); + return; + } + } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) { + b->ops = &vmballoon_basic_ops; + b->batch_max_pages = 1; + } + + b->reset_required = false; + + error = vmballoon_vmci_init(b); + if (error) + pr_err("failed to initialize vmci doorbell\n"); + + if (!vmballoon_send_guest_id(b)) + pr_err("failed to send guest ID to the host\n"); } /* @@ -664,13 +1126,14 @@ static void vmballoon_work(struct work_struct *work) if (b->slow_allocation_cycles > 0) b->slow_allocation_cycles--; - if (vmballoon_send_get_target(b, &target)) { + if (!b->reset_required && vmballoon_send_get_target(b, &target)) { /* update target, adjust size */ b->target = target; if (b->size < target) vmballoon_inflate(b); - else if (b->size > target) + else if (target == 0 || + b->size > target + vmballoon_page_size(true)) vmballoon_deflate(b); } @@ -692,6 +1155,14 @@ static int vmballoon_debug_show(struct seq_file *f, void *offset) struct vmballoon *b = f->private; struct vmballoon_stats *stats = &b->stats; + /* format capabilities info */ + seq_printf(f, + "balloon capabilities: %#4x\n" + "used capabilities: %#4lx\n" + "is resetting: %c\n", + VMW_BALLOON_CAPABILITIES, b->capabilities, + b->reset_required ? 'y' : 'n'); + /* format size info */ seq_printf(f, "target: %8d pages\n" @@ -700,35 +1171,48 @@ static int vmballoon_debug_show(struct seq_file *f, void *offset) /* format rate info */ seq_printf(f, - "rateNoSleepAlloc: %8d pages/sec\n" - "rateSleepAlloc: %8d pages/sec\n" - "rateFree: %8d pages/sec\n", - VMW_BALLOON_NOSLEEP_ALLOC_MAX, - b->rate_alloc, b->rate_free); + "rateSleepAlloc: %8d pages/sec\n", + b->rate_alloc); seq_printf(f, "\n" "timer: %8u\n" + "doorbell: %8u\n" "start: %8u (%4u failed)\n" "guestType: %8u (%4u failed)\n" + "2m-lock: %8u (%4u failed)\n" "lock: %8u (%4u failed)\n" + "2m-unlock: %8u (%4u failed)\n" "unlock: %8u (%4u failed)\n" "target: %8u (%4u failed)\n" + "prim2mAlloc: %8u (%4u failed)\n" "primNoSleepAlloc: %8u (%4u failed)\n" "primCanSleepAlloc: %8u (%4u failed)\n" + "prim2mFree: %8u\n" "primFree: %8u\n" + "err2mAlloc: %8u\n" "errAlloc: %8u\n" - "errFree: %8u\n", + "err2mFree: %8u\n" + "errFree: %8u\n" + "doorbellSet: %8u\n" + "doorbellUnset: %8u\n", stats->timer, + stats->doorbell, stats->start, stats->start_fail, stats->guest_type, stats->guest_type_fail, - stats->lock, stats->lock_fail, - stats->unlock, stats->unlock_fail, + stats->lock[true], stats->lock_fail[true], + stats->lock[false], stats->lock_fail[false], + stats->unlock[true], stats->unlock_fail[true], + stats->unlock[false], stats->unlock_fail[false], stats->target, stats->target_fail, - stats->alloc, stats->alloc_fail, + stats->alloc[true], stats->alloc_fail[true], + stats->alloc[false], stats->alloc_fail[false], stats->sleep_alloc, stats->sleep_alloc_fail, - stats->free, - stats->refused_alloc, stats->refused_free); + stats->free[true], + stats->free[false], + stats->refused_alloc[true], stats->refused_alloc[false], + stats->refused_free[true], stats->refused_free[false], + stats->doorbell_set, stats->doorbell_unset); return 0; } @@ -782,7 +1266,7 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b) static int __init vmballoon_init(void) { int error; - + unsigned is_2m_pages; /* * Check if we are running on VMware's hypervisor and bail out * if we are not. @@ -790,32 +1274,26 @@ static int __init vmballoon_init(void) if (x86_hyper != &x86_hyper_vmware) return -ENODEV; - INIT_LIST_HEAD(&balloon.pages); - INIT_LIST_HEAD(&balloon.refused_pages); + for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; + is_2m_pages++) { + INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages); + INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages); + } /* initialize rates */ balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX; - balloon.rate_free = VMW_BALLOON_RATE_FREE_MAX; INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work); - /* - * Start balloon. - */ - if (!vmballoon_send_start(&balloon)) { - pr_err("failed to send start command to the host\n"); - return -EIO; - } - - if (!vmballoon_send_guest_id(&balloon)) { - pr_err("failed to send guest ID to the host\n"); - return -EIO; - } - error = vmballoon_debugfs_init(&balloon); if (error) return error; + balloon.vmci_doorbell = VMCI_INVALID_HANDLE; + balloon.batch_page = NULL; + balloon.page = NULL; + balloon.reset_required = true; + queue_delayed_work(system_freezable_wq, &balloon.dwork, 0); return 0; @@ -824,6 +1302,7 @@ module_init(vmballoon_init); static void __exit vmballoon_exit(void) { + vmballoon_vmci_cleanup(&balloon); cancel_delayed_work_sync(&balloon.dwork); vmballoon_debugfs_exit(&balloon); @@ -833,7 +1312,7 @@ static void __exit vmballoon_exit(void) * Reset connection before deallocating memory to avoid potential for * additional spurious resets from guest touching deallocated pages. */ - vmballoon_send_start(&balloon); + vmballoon_send_start(&balloon, 0); vmballoon_pop(&balloon); } module_exit(vmballoon_exit); diff --git a/kernel/drivers/misc/vmw_vmci/vmci_datagram.c b/kernel/drivers/misc/vmw_vmci/vmci_datagram.c index 822665245..8a4b6bbe1 100644 --- a/kernel/drivers/misc/vmw_vmci/vmci_datagram.c +++ b/kernel/drivers/misc/vmw_vmci/vmci_datagram.c @@ -276,11 +276,10 @@ static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg) } /* We make a copy to enqueue. */ - new_dg = kmalloc(dg_size, GFP_KERNEL); + new_dg = kmemdup(dg, dg_size, GFP_KERNEL); if (new_dg == NULL) return VMCI_ERROR_NO_MEM; - memcpy(new_dg, dg, dg_size); retval = vmci_ctx_enqueue_datagram(dg->dst.context, new_dg); if (retval < VMCI_SUCCESS) { kfree(new_dg); diff --git a/kernel/drivers/misc/vmw_vmci/vmci_host.c b/kernel/drivers/misc/vmw_vmci/vmci_host.c index a721b5d8a..9ec262a52 100644 --- a/kernel/drivers/misc/vmw_vmci/vmci_host.c +++ b/kernel/drivers/misc/vmw_vmci/vmci_host.c @@ -1031,14 +1031,9 @@ int __init vmci_host_init(void) void __exit vmci_host_exit(void) { - int error; - vmci_host_device_initialized = false; - error = misc_deregister(&vmci_host_miscdev); - if (error) - pr_warn("Error unregistering character device: %d\n", error); - + misc_deregister(&vmci_host_miscdev); vmci_ctx_destroy(host_context); vmci_qp_broker_exit(); |