These changes are the raw update to linux-4.4.6-rt14. Kernel sources

are taken from kernel.org, and rt patch from the rt wiki download page. During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
author: José Pekkarinen <jose.pekkarinen@nokia.com> 2016-04-11 10:41:07 +0300
committer: José Pekkarinen <jose.pekkarinen@nokia.com> 2016-04-13 08:17:18 +0300
commit: e09b41010ba33a20a87472ee821fa407a5b8da36 (patch)
tree: d10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/drivers/misc
parent: f93b97fd65072de626c074dbe099a1fff05ce060 (diff)
151 files changed, 21335 insertions, 6610 deletions
diff --git a/kernel/drivers/misc/Kconfig b/kernel/drivers/misc/Kconfig
index 453a61660..3540f684f 100644
--- a/kernel/drivers/misc/Kconfig
+++ b/kernel/drivers/misc/Kconfig
@@ -309,6 +309,16 @@ config HP_ILO
 	  To compile this driver as a module, choose M here: the
 	  module will be called hpilo.
 
+config QCOM_COINCELL
+	tristate "Qualcomm coincell charger support"
+	depends on MFD_SPMI_PMIC || COMPILE_TEST
+	help
+	  This driver supports the coincell block found inside of
+	  Qualcomm PMICs.  The coincell charger provides a means to
+	  charge a coincell battery or backup capacitor which is used
+	  to maintain PMIC register and RTC state in the absence of
+	  external power.
+
 config SGI_GRU
 	tristate "SGI GRU driver"
 	depends on X86_UV && SMP
@@ -442,7 +452,7 @@ config TI_DAC7512
 
 config VMWARE_BALLOON
 	tristate "VMware Balloon Driver"
-	depends on X86 && HYPERVISOR_GUEST
+	depends on VMWARE_VMCI && X86 && HYPERVISOR_GUEST
 	help
 	  This is VMware physical memory management driver which acts
 	  like a "balloon" that can be inflated to reclaim physical pages
@@ -558,7 +568,6 @@ source "drivers/misc/eeprom/Kconfig"
 source "drivers/misc/cb710/Kconfig"
 source "drivers/misc/ti-st/Kconfig"
 source "drivers/misc/lis3lv02d/Kconfig"
-source "drivers/misc/carma/Kconfig"
 source "drivers/misc/altera-stapl/Kconfig"
 source "drivers/misc/mei/Kconfig"
 source "drivers/misc/vmw_vmci/Kconfig"
diff --git a/kernel/drivers/misc/Makefile b/kernel/drivers/misc/Makefile
index 6a8e39388..ec4aecba0 100644
--- a/kernel/drivers/misc/Makefile
+++ b/kernel/drivers/misc/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_LKDTM)		+= lkdtm.o
 obj-$(CONFIG_TIFM_CORE)       	+= tifm_core.o
 obj-$(CONFIG_TIFM_7XX1)       	+= tifm_7xx1.o
 obj-$(CONFIG_PHANTOM)		+= phantom.o
+obj-$(CONFIG_QCOM_COINCELL)	+= qcom-coincell.o
 obj-$(CONFIG_SENSORS_BH1780)	+= bh1780gli.o
 obj-$(CONFIG_SENSORS_BH1770)	+= bh1770glc.o
 obj-$(CONFIG_SENSORS_APDS990X)	+= apds990x.o
@@ -45,7 +46,6 @@ obj-$(CONFIG_ARM_CHARLCD)	+= arm-charlcd.o
 obj-$(CONFIG_PCH_PHUB)		+= pch_phub.o
 obj-y				+= ti-st/
 obj-y				+= lis3lv02d/
-obj-y				+= carma/
 obj-$(CONFIG_USB_SWITCH_FSA9480) += fsa9480.o
 obj-$(CONFIG_ALTERA_STAPL)	+=altera-stapl/
 obj-$(CONFIG_INTEL_MEI)		+= mei/
diff --git a/kernel/drivers/misc/ad525x_dpot-i2c.c b/kernel/drivers/misc/ad525x_dpot-i2c.c
index 705b881e1..4f832002d 100644
--- a/kernel/drivers/misc/ad525x_dpot-i2c.c
+++ b/kernel/drivers/misc/ad525x_dpot-i2c.c
@@ -106,7 +106,6 @@ MODULE_DEVICE_TABLE(i2c, ad_dpot_id);
 static struct i2c_driver ad_dpot_i2c_driver = {
 	.driver = {
 		.name	= "ad_dpot",
-		.owner	= THIS_MODULE,
 	},
 	.probe		= ad_dpot_i2c_probe,
 	.remove		= ad_dpot_i2c_remove,
@@ -118,4 +117,3 @@ module_i2c_driver(ad_dpot_i2c_driver);
 MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
 MODULE_DESCRIPTION("digital potentiometer I2C bus driver");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("i2c:ad_dpot");
diff --git a/kernel/drivers/misc/ad525x_dpot-spi.c b/kernel/drivers/misc/ad525x_dpot-spi.c
index f4c82eafa..39a7f517e 100644
--- a/kernel/drivers/misc/ad525x_dpot-spi.c
+++ b/kernel/drivers/misc/ad525x_dpot-spi.c
@@ -132,7 +132,6 @@ MODULE_DEVICE_TABLE(spi, ad_dpot_spi_id);
 static struct spi_driver ad_dpot_spi_driver = {
 	.driver = {
 		.name	= "ad_dpot",
-		.owner	= THIS_MODULE,
 	},
 	.probe		= ad_dpot_spi_probe,
 	.remove		= ad_dpot_spi_remove,
diff --git a/kernel/drivers/misc/altera-stapl/altera.c b/kernel/drivers/misc/altera-stapl/altera.c
index bca2630d0..f53e217e9 100644
--- a/kernel/drivers/misc/altera-stapl/altera.c
+++ b/kernel/drivers/misc/altera-stapl/altera.c
@@ -2451,7 +2451,7 @@ int altera_init(struct altera_config *config, const struct firmware *fw)
 
 	astate->config = config;
 	if (!astate->config->jtag_io) {
-		dprintk(KERN_INFO "%s: using byteblaster!\n", __func__);
+		dprintk("%s: using byteblaster!\n", __func__);
 		astate->config->jtag_io = netup_jtag_io_lpt;
 	}
 
diff --git a/kernel/drivers/misc/apds990x.c b/kernel/drivers/misc/apds990x.c
index 3739ffa9c..a3e789b85 100644
--- a/kernel/drivers/misc/apds990x.c
+++ b/kernel/drivers/misc/apds990x.c
@@ -1275,7 +1275,6 @@ static const struct dev_pm_ops apds990x_pm_ops = {
 static struct i2c_driver apds990x_driver = {
 	.driver	 = {
 		.name	= "apds990x",
-		.owner	= THIS_MODULE,
 		.pm	= &apds990x_pm_ops,
 	},
 	.probe	  = apds990x_probe,
diff --git a/kernel/drivers/misc/atmel_tclib.c b/kernel/drivers/misc/atmel_tclib.c
index 0ca05c3ec..ac24a4bd6 100644
--- a/kernel/drivers/misc/atmel_tclib.c
+++ b/kernel/drivers/misc/atmel_tclib.c
@@ -125,6 +125,10 @@ static int __init tc_probe(struct platform_device *pdev)
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
+	tc->slow_clk = devm_clk_get(&pdev->dev, "slow_clk");
+	if (IS_ERR(tc->slow_clk))
+		return PTR_ERR(tc->slow_clk);
+
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	tc->regs = devm_ioremap_resource(&pdev->dev, r);
 	if (IS_ERR(tc->regs))
diff --git a/kernel/drivers/misc/bh1770glc.c b/kernel/drivers/misc/bh1770glc.c
index b756381b8..753d7ecda 100644
--- a/kernel/drivers/misc/bh1770glc.c
+++ b/kernel/drivers/misc/bh1770glc.c
@@ -1396,7 +1396,6 @@ static const struct dev_pm_ops bh1770_pm_ops = {
 static struct i2c_driver bh1770_driver = {
 	.driver	 = {
 		.name	= "bh1770glc",
-		.owner	= THIS_MODULE,
 		.pm	= &bh1770_pm_ops,
 	},
 	.probe	  = bh1770_probe,
diff --git a/kernel/drivers/misc/bmp085-i2c.c b/kernel/drivers/misc/bmp085-i2c.c
index a7c16295b..f35c218aa 100644
--- a/kernel/drivers/misc/bmp085-i2c.c
+++ b/kernel/drivers/misc/bmp085-i2c.c
@@ -66,7 +66,6 @@ MODULE_DEVICE_TABLE(i2c, bmp085_id);
 
 static struct i2c_driver bmp085_i2c_driver = {
 	.driver = {
-		.owner	= THIS_MODULE,
 		.name	= BMP085_NAME,
 	},
 	.id_table	= bmp085_id,
diff --git a/kernel/drivers/misc/bmp085-spi.c b/kernel/drivers/misc/bmp085-spi.c
index 864ecac32..17ecbf95f 100644
--- a/kernel/drivers/misc/bmp085-spi.c
+++ b/kernel/drivers/misc/bmp085-spi.c
@@ -64,7 +64,6 @@ MODULE_DEVICE_TABLE(spi, bmp085_id);
 
 static struct spi_driver bmp085_spi_driver = {
 	.driver = {
-		.owner	= THIS_MODULE,
 		.name	= BMP085_NAME,
 		.of_match_table = bmp085_of_match
 	},
diff --git a/kernel/drivers/misc/c2port/core.c b/kernel/drivers/misc/c2port/core.c
index 464419b36..cc8645b53 100644
--- a/kernel/drivers/misc/c2port/core.c
+++ b/kernel/drivers/misc/c2port/core.c
@@ -926,7 +926,7 @@ struct c2port_device *c2port_device_register(char *name,
 
 	c2dev->dev = device_create(c2port_class, NULL, 0, c2dev,
 				   "c2port%d", c2dev->id);
-	if (unlikely(IS_ERR(c2dev->dev))) {
+	if (IS_ERR(c2dev->dev)) {
 		ret = PTR_ERR(c2dev->dev);
 		goto error_device_create;
 	}
diff --git a/kernel/drivers/misc/carma/Kconfig b/kernel/drivers/misc/carma/Kconfig
deleted file mode 100644
index 295882bfb..000000000
--- a/kernel/drivers/misc/carma/Kconfig
+++ /dev/null
@@ -1,15 +0,0 @@
-config CARMA_FPGA
-	tristate "CARMA DATA-FPGA Access Driver"
-	depends on FSL_SOC && PPC_83xx && HAS_DMA && FSL_DMA
-	default n
-	help
-	  Say Y here to include support for communicating with the data
-	  processing FPGAs on the OVRO CARMA board.
-
-config CARMA_FPGA_PROGRAM
-	tristate "CARMA DATA-FPGA Programmer"
-	depends on FSL_SOC && PPC_83xx && HAS_DMA && FSL_DMA
-	default n
-	help
-	  Say Y here to include support for programming the data processing
-	  FPGAs on the OVRO CARMA board.
diff --git a/kernel/drivers/misc/carma/Makefile b/kernel/drivers/misc/carma/Makefile
deleted file mode 100644
index ff36ac2ce..000000000
--- a/kernel/drivers/misc/carma/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-obj-$(CONFIG_CARMA_FPGA)		+= carma-fpga.o
-obj-$(CONFIG_CARMA_FPGA_PROGRAM)	+= carma-fpga-program.o
diff --git a/kernel/drivers/misc/carma/carma-fpga-program.c b/kernel/drivers/misc/carma/carma-fpga-program.c
deleted file mode 100644
index 0b1bd85e4..000000000
--- a/kernel/drivers/misc/carma/carma-fpga-program.c
+++ /dev/null
@@ -1,1182 +0,0 @@
-/*
- * CARMA Board DATA-FPGA Programmer
- *
- * Copyright (c) 2009-2011 Ira W. Snyder <iws@ovro.caltech.edu>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
-#include <linux/completion.h>
-#include <linux/miscdevice.h>
-#include <linux/dmaengine.h>
-#include <linux/fsldma.h>
-#include <linux/interrupt.h>
-#include <linux/highmem.h>
-#include <linux/vmalloc.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/leds.h>
-#include <linux/slab.h>
-#include <linux/kref.h>
-#include <linux/fs.h>
-#include <linux/io.h>
-
-/* MPC8349EMDS specific get_immrbase() */
-#include <sysdev/fsl_soc.h>
-
-static const char drv_name[] = "carma-fpga-program";
-
-/*
- * Firmware images are always this exact size
- *
- * 12849552 bytes for a CARMA Digitizer Board (EP2S90 FPGAs)
- * 18662880 bytes for a CARMA Correlator Board (EP2S130 FPGAs)
- */
-#define FW_SIZE_EP2S90		12849552
-#define FW_SIZE_EP2S130		18662880
-
-struct fpga_dev {
-	struct miscdevice miscdev;
-
-	/* Reference count */
-	struct kref ref;
-
-	/* Device Registers */
-	struct device *dev;
-	void __iomem *regs;
-	void __iomem *immr;
-
-	/* Freescale DMA Device */
-	struct dma_chan *chan;
-
-	/* Interrupts */
-	int irq, status;
-	struct completion completion;
-
-	/* FPGA Bitfile */
-	struct mutex lock;
-
-	void *vaddr;
-	struct scatterlist *sglist;
-	int sglen;
-	int nr_pages;
-	bool buf_allocated;
-
-	/* max size and written bytes */
-	size_t fw_size;
-	size_t bytes;
-};
-
-static int fpga_dma_init(struct fpga_dev *priv, int nr_pages)
-{
-	struct page *pg;
-	int i;
-
-	priv->vaddr = vmalloc_32(nr_pages << PAGE_SHIFT);
-	if (NULL == priv->vaddr) {
-		pr_debug("vmalloc_32(%d pages) failed\n", nr_pages);
-		return -ENOMEM;
-	}
-
-	pr_debug("vmalloc is at addr 0x%08lx, size=%d\n",
-				(unsigned long)priv->vaddr,
-				nr_pages << PAGE_SHIFT);
-
-	memset(priv->vaddr, 0, nr_pages << PAGE_SHIFT);
-	priv->nr_pages = nr_pages;
-
-	priv->sglist = vzalloc(priv->nr_pages * sizeof(*priv->sglist));
-	if (NULL == priv->sglist)
-		goto vzalloc_err;
-
-	sg_init_table(priv->sglist, priv->nr_pages);
-	for (i = 0; i < priv->nr_pages; i++) {
-		pg = vmalloc_to_page(priv->vaddr + i * PAGE_SIZE);
-		if (NULL == pg)
-			goto vmalloc_to_page_err;
-		sg_set_page(&priv->sglist[i], pg, PAGE_SIZE, 0);
-	}
-	return 0;
-
-vmalloc_to_page_err:
-	vfree(priv->sglist);
-	priv->sglist = NULL;
-vzalloc_err:
-	vfree(priv->vaddr);
-	priv->vaddr = NULL;
-	return -ENOMEM;
-}
-
-static int fpga_dma_map(struct fpga_dev *priv)
-{
-	priv->sglen = dma_map_sg(priv->dev, priv->sglist,
-			priv->nr_pages, DMA_TO_DEVICE);
-
-	if (0 == priv->sglen) {
-		pr_warn("%s: dma_map_sg failed\n", __func__);
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-static int fpga_dma_unmap(struct fpga_dev *priv)
-{
-	if (!priv->sglen)
-		return 0;
-
-	dma_unmap_sg(priv->dev, priv->sglist, priv->sglen, DMA_TO_DEVICE);
-	priv->sglen = 0;
-	return 0;
-}
-
-/*
- * FPGA Bitfile Helpers
- */
-
-/**
- * fpga_drop_firmware_data() - drop the bitfile image from memory
- * @priv: the driver's private data structure
- *
- * LOCKING: must hold priv->lock
- */
-static void fpga_drop_firmware_data(struct fpga_dev *priv)
-{
-	vfree(priv->sglist);
-	vfree(priv->vaddr);
-	priv->buf_allocated = false;
-	priv->bytes = 0;
-}
-
-/*
- * Private Data Reference Count
- */
-
-static void fpga_dev_remove(struct kref *ref)
-{
-	struct fpga_dev *priv = container_of(ref, struct fpga_dev, ref);
-
-	/* free any firmware image that was not programmed */
-	fpga_drop_firmware_data(priv);
-
-	mutex_destroy(&priv->lock);
-	kfree(priv);
-}
-
-/*
- * LED Trigger (could be a seperate module)
- */
-
-/*
- * NOTE: this whole thing does have the problem that whenever the led's are
- * NOTE: first set to use the fpga trigger, they could be in the wrong state
- */
-
-DEFINE_LED_TRIGGER(ledtrig_fpga);
-
-static void ledtrig_fpga_programmed(bool enabled)
-{
-	if (enabled)
-		led_trigger_event(ledtrig_fpga, LED_FULL);
-	else
-		led_trigger_event(ledtrig_fpga, LED_OFF);
-}
-
-/*
- * FPGA Register Helpers
- */
-
-/* Register Definitions */
-#define FPGA_CONFIG_CONTROL		0x40
-#define FPGA_CONFIG_STATUS		0x44
-#define FPGA_CONFIG_FIFO_SIZE		0x48
-#define FPGA_CONFIG_FIFO_USED		0x4C
-#define FPGA_CONFIG_TOTAL_BYTE_COUNT	0x50
-#define FPGA_CONFIG_CUR_BYTE_COUNT	0x54
-
-#define FPGA_FIFO_ADDRESS		0x3000
-
-static int fpga_fifo_size(void __iomem *regs)
-{
-	return ioread32be(regs + FPGA_CONFIG_FIFO_SIZE);
-}
-
-#define CFG_STATUS_ERR_MASK	0xfffe
-
-static int fpga_config_error(void __iomem *regs)
-{
-	return ioread32be(regs + FPGA_CONFIG_STATUS) & CFG_STATUS_ERR_MASK;
-}
-
-static int fpga_fifo_empty(void __iomem *regs)
-{
-	return ioread32be(regs + FPGA_CONFIG_FIFO_USED) == 0;
-}
-
-static void fpga_fifo_write(void __iomem *regs, u32 val)
-{
-	iowrite32be(val, regs + FPGA_FIFO_ADDRESS);
-}
-
-static void fpga_set_byte_count(void __iomem *regs, u32 count)
-{
-	iowrite32be(count, regs + FPGA_CONFIG_TOTAL_BYTE_COUNT);
-}
-
-#define CFG_CTL_ENABLE	(1 << 0)
-#define CFG_CTL_RESET	(1 << 1)
-#define CFG_CTL_DMA	(1 << 2)
-
-static void fpga_programmer_enable(struct fpga_dev *priv, bool dma)
-{
-	u32 val;
-
-	val = (dma) ? (CFG_CTL_ENABLE | CFG_CTL_DMA) : CFG_CTL_ENABLE;
-	iowrite32be(val, priv->regs + FPGA_CONFIG_CONTROL);
-}
-
-static void fpga_programmer_disable(struct fpga_dev *priv)
-{
-	iowrite32be(0x0, priv->regs + FPGA_CONFIG_CONTROL);
-}
-
-static void fpga_dump_registers(struct fpga_dev *priv)
-{
-	u32 control, status, size, used, total, curr;
-
-	/* good status: do nothing */
-	if (priv->status == 0)
-		return;
-
-	/* Dump all status registers */
-	control = ioread32be(priv->regs + FPGA_CONFIG_CONTROL);
-	status = ioread32be(priv->regs + FPGA_CONFIG_STATUS);
-	size = ioread32be(priv->regs + FPGA_CONFIG_FIFO_SIZE);
-	used = ioread32be(priv->regs + FPGA_CONFIG_FIFO_USED);
-	total = ioread32be(priv->regs + FPGA_CONFIG_TOTAL_BYTE_COUNT);
-	curr = ioread32be(priv->regs + FPGA_CONFIG_CUR_BYTE_COUNT);
-
-	dev_err(priv->dev, "Configuration failed, dumping status registers\n");
-	dev_err(priv->dev, "Control:    0x%.8x\n", control);
-	dev_err(priv->dev, "Status:     0x%.8x\n", status);
-	dev_err(priv->dev, "FIFO Size:  0x%.8x\n", size);
-	dev_err(priv->dev, "FIFO Used:  0x%.8x\n", used);
-	dev_err(priv->dev, "FIFO Total: 0x%.8x\n", total);
-	dev_err(priv->dev, "FIFO Curr:  0x%.8x\n", curr);
-}
-
-/*
- * FPGA Power Supply Code
- */
-
-#define CTL_PWR_CONTROL		0x2006
-#define CTL_PWR_STATUS		0x200A
-#define CTL_PWR_FAIL		0x200B
-
-#define PWR_CONTROL_ENABLE	0x01
-
-#define PWR_STATUS_ERROR_MASK	0x10
-#define PWR_STATUS_GOOD		0x0f
-
-/*
- * Determine if the FPGA power is good for all supplies
- */
-static bool fpga_power_good(struct fpga_dev *priv)
-{
-	u8 val;
-
-	val = ioread8(priv->regs + CTL_PWR_STATUS);
-	if (val & PWR_STATUS_ERROR_MASK)
-		return false;
-
-	return val == PWR_STATUS_GOOD;
-}
-
-/*
- * Disable the FPGA power supplies
- */
-static void fpga_disable_power_supplies(struct fpga_dev *priv)
-{
-	unsigned long start;
-	u8 val;
-
-	iowrite8(0x0, priv->regs + CTL_PWR_CONTROL);
-
-	/*
-	 * Wait 500ms for the power rails to discharge
-	 *
-	 * Without this delay, the CTL-CPLD state machine can get into a
-	 * state where it is waiting for the power-goods to assert, but they
-	 * never do. This only happens when enabling and disabling the
-	 * power sequencer very rapidly.
-	 *
-	 * The loop below will also wait for the power goods to de-assert,
-	 * but testing has shown that they are always disabled by the time
-	 * the sleep completes. However, omitting the sleep and only waiting
-	 * for the power-goods to de-assert was not sufficient to ensure
-	 * that the power sequencer would not wedge itself.
-	 */
-	msleep(500);
-
-	start = jiffies;
-	while (time_before(jiffies, start + HZ)) {
-		val = ioread8(priv->regs + CTL_PWR_STATUS);
-		if (!(val & PWR_STATUS_GOOD))
-			break;
-
-		usleep_range(5000, 10000);
-	}
-
-	val = ioread8(priv->regs + CTL_PWR_STATUS);
-	if (val & PWR_STATUS_GOOD) {
-		dev_err(priv->dev, "power disable failed: "
-				   "power goods: status 0x%.2x\n", val);
-	}
-
-	if (val & PWR_STATUS_ERROR_MASK) {
-		dev_err(priv->dev, "power disable failed: "
-				   "alarm bit set: status 0x%.2x\n", val);
-	}
-}
-
-/**
- * fpga_enable_power_supplies() - enable the DATA-FPGA power supplies
- * @priv: the driver's private data structure
- *
- * Enable the DATA-FPGA power supplies, waiting up to 1 second for
- * them to enable successfully.
- *
- * Returns 0 on success, -ERRNO otherwise
- */
-static int fpga_enable_power_supplies(struct fpga_dev *priv)
-{
-	unsigned long start = jiffies;
-
-	if (fpga_power_good(priv)) {
-		dev_dbg(priv->dev, "power was already good\n");
-		return 0;
-	}
-
-	iowrite8(PWR_CONTROL_ENABLE, priv->regs + CTL_PWR_CONTROL);
-	while (time_before(jiffies, start + HZ)) {
-		if (fpga_power_good(priv))
-			return 0;
-
-		usleep_range(5000, 10000);
-	}
-
-	return fpga_power_good(priv) ? 0 : -ETIMEDOUT;
-}
-
-/*
- * Determine if the FPGA power supplies are all enabled
- */
-static bool fpga_power_enabled(struct fpga_dev *priv)
-{
-	u8 val;
-
-	val = ioread8(priv->regs + CTL_PWR_CONTROL);
-	if (val & PWR_CONTROL_ENABLE)
-		return true;
-
-	return false;
-}
-
-/*
- * Determine if the FPGA's are programmed and running correctly
- */
-static bool fpga_running(struct fpga_dev *priv)
-{
-	if (!fpga_power_good(priv))
-		return false;
-
-	/* Check the config done bit */
-	return ioread32be(priv->regs + FPGA_CONFIG_STATUS) & (1 << 18);
-}
-
-/*
- * FPGA Programming Code
- */
-
-/**
- * fpga_program_block() - put a block of data into the programmer's FIFO
- * @priv: the driver's private data structure
- * @buf: the data to program
- * @count: the length of data to program (must be a multiple of 4 bytes)
- *
- * Returns 0 on success, -ERRNO otherwise
- */
-static int fpga_program_block(struct fpga_dev *priv, void *buf, size_t count)
-{
-	u32 *data = buf;
-	int size = fpga_fifo_size(priv->regs);
-	int i, len;
-	unsigned long timeout;
-
-	/* enforce correct data length for the FIFO */
-	BUG_ON(count % 4 != 0);
-
-	while (count > 0) {
-
-		/* Get the size of the block to write (maximum is FIFO_SIZE) */
-		len = min_t(size_t, count, size);
-		timeout = jiffies + HZ / 4;
-
-		/* Write the block */
-		for (i = 0; i < len / 4; i++)
-			fpga_fifo_write(priv->regs, data[i]);
-
-		/* Update the amounts left */
-		count -= len;
-		data += len / 4;
-
-		/* Wait for the fifo to empty */
-		while (true) {
-
-			if (fpga_fifo_empty(priv->regs)) {
-				break;
-			} else {
-				dev_dbg(priv->dev, "Fifo not empty\n");
-				cpu_relax();
-			}
-
-			if (fpga_config_error(priv->regs)) {
-				dev_err(priv->dev, "Error detected\n");
-				return -EIO;
-			}
-
-			if (time_after(jiffies, timeout)) {
-				dev_err(priv->dev, "Fifo drain timeout\n");
-				return -ETIMEDOUT;
-			}
-
-			usleep_range(5000, 10000);
-		}
-	}
-
-	return 0;
-}
-
-/**
- * fpga_program_cpu() - program the DATA-FPGA's using the CPU
- * @priv: the driver's private data structure
- *
- * This is useful when the DMA programming method fails. It is possible to
- * wedge the Freescale DMA controller such that the DMA programming method
- * always fails. This method has always succeeded.
- *
- * Returns 0 on success, -ERRNO otherwise
- */
-static noinline int fpga_program_cpu(struct fpga_dev *priv)
-{
-	int ret;
-	unsigned long timeout;
-
-	/* Disable the programmer */
-	fpga_programmer_disable(priv);
-
-	/* Set the total byte count */
-	fpga_set_byte_count(priv->regs, priv->bytes);
-	dev_dbg(priv->dev, "total byte count %u bytes\n", priv->bytes);
-
-	/* Enable the controller for programming */
-	fpga_programmer_enable(priv, false);
-	dev_dbg(priv->dev, "enabled the controller\n");
-
-	/* Write each chunk of the FPGA bitfile to FPGA programmer */
-	ret = fpga_program_block(priv, priv->vaddr, priv->bytes);
-	if (ret)
-		goto out_disable_controller;
-
-	/* Wait for the interrupt handler to signal that programming finished */
-	timeout = wait_for_completion_timeout(&priv->completion, 2 * HZ);
-	if (!timeout) {
-		dev_err(priv->dev, "Timed out waiting for completion\n");
-		ret = -ETIMEDOUT;
-		goto out_disable_controller;
-	}
-
-	/* Retrieve the status from the interrupt handler */
-	ret = priv->status;
-
-out_disable_controller:
-	fpga_programmer_disable(priv);
-	return ret;
-}
-
-#define FIFO_DMA_ADDRESS	0xf0003000
-#define FIFO_MAX_LEN		4096
-
-/**
- * fpga_program_dma() - program the DATA-FPGA's using the DMA engine
- * @priv: the driver's private data structure
- *
- * Program the DATA-FPGA's using the Freescale DMA engine. This requires that
- * the engine is programmed such that the hardware DMA request lines can
- * control the entire DMA transaction. The system controller FPGA then
- * completely offloads the programming from the CPU.
- *
- * Returns 0 on success, -ERRNO otherwise
- */
-static noinline int fpga_program_dma(struct fpga_dev *priv)
-{
-	struct dma_chan *chan = priv->chan;
-	struct dma_async_tx_descriptor *tx;
-	size_t num_pages, len, avail = 0;
-	struct dma_slave_config config;
-	struct scatterlist *sg;
-	struct sg_table table;
-	dma_cookie_t cookie;
-	int ret, i;
-	unsigned long timeout;
-
-	/* Disable the programmer */
-	fpga_programmer_disable(priv);
-
-	/* Allocate a scatterlist for the DMA destination */
-	num_pages = DIV_ROUND_UP(priv->bytes, FIFO_MAX_LEN);
-	ret = sg_alloc_table(&table, num_pages, GFP_KERNEL);
-	if (ret) {
-		dev_err(priv->dev, "Unable to allocate dst scatterlist\n");
-		ret = -ENOMEM;
-		goto out_return;
-	}
-
-	/*
-	 * This is an ugly hack
-	 *
-	 * We fill in a scatterlist as if it were mapped for DMA. This is
-	 * necessary because there exists no better structure for this
-	 * inside the kernel code.
-	 *
-	 * As an added bonus, we can use the DMAEngine API for all of this,
-	 * rather than inventing another extremely similar API.
-	 */
-	avail = priv->bytes;
-	for_each_sg(table.sgl, sg, num_pages, i) {
-		len = min_t(size_t, avail, FIFO_MAX_LEN);
-		sg_dma_address(sg) = FIFO_DMA_ADDRESS;
-		sg_dma_len(sg) = len;
-
-		avail -= len;
-	}
-
-	/* Map the buffer for DMA */
-	ret = fpga_dma_map(priv);
-	if (ret) {
-		dev_err(priv->dev, "Unable to map buffer for DMA\n");
-		goto out_free_table;
-	}
-
-	/*
-	 * Configure the DMA channel to transfer FIFO_SIZE / 2 bytes per
-	 * transaction, and then put it under external control
-	 */
-	memset(&config, 0, sizeof(config));
-	config.direction = DMA_MEM_TO_DEV;
-	config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	config.dst_maxburst = fpga_fifo_size(priv->regs) / 2 / 4;
-	ret = dmaengine_slave_config(chan, &config);
-	if (ret) {
-		dev_err(priv->dev, "DMA slave configuration failed\n");
-		goto out_dma_unmap;
-	}
-
-	ret = fsl_dma_external_start(chan, 1);
-	if (ret) {
-		dev_err(priv->dev, "DMA external control setup failed\n");
-		goto out_dma_unmap;
-	}
-
-	/* setup and submit the DMA transaction */
-
-	tx = dmaengine_prep_dma_sg(chan, table.sgl, num_pages,
-			priv->sglist, priv->sglen, 0);
-	if (!tx) {
-		dev_err(priv->dev, "Unable to prep DMA transaction\n");
-		ret = -ENOMEM;
-		goto out_dma_unmap;
-	}
-
-	cookie = tx->tx_submit(tx);
-	if (dma_submit_error(cookie)) {
-		dev_err(priv->dev, "Unable to submit DMA transaction\n");
-		ret = -ENOMEM;
-		goto out_dma_unmap;
-	}
-
-	dma_async_issue_pending(chan);
-
-	/* Set the total byte count */
-	fpga_set_byte_count(priv->regs, priv->bytes);
-	dev_dbg(priv->dev, "total byte count %u bytes\n", priv->bytes);
-
-	/* Enable the controller for DMA programming */
-	fpga_programmer_enable(priv, true);
-	dev_dbg(priv->dev, "enabled the controller\n");
-
-	/* Wait for the interrupt handler to signal that programming finished */
-	timeout = wait_for_completion_timeout(&priv->completion, 2 * HZ);
-	if (!timeout) {
-		dev_err(priv->dev, "Timed out waiting for completion\n");
-		ret = -ETIMEDOUT;
-		goto out_disable_controller;
-	}
-
-	/* Retrieve the status from the interrupt handler */
-	ret = priv->status;
-
-out_disable_controller:
-	fpga_programmer_disable(priv);
-out_dma_unmap:
-	fpga_dma_unmap(priv);
-out_free_table:
-	sg_free_table(&table);
-out_return:
-	return ret;
-}
-
-/*
- * Interrupt Handling
- */
-
-static irqreturn_t fpga_irq(int irq, void *dev_id)
-{
-	struct fpga_dev *priv = dev_id;
-
-	/* Save the status */
-	priv->status = fpga_config_error(priv->regs) ? -EIO : 0;
-	dev_dbg(priv->dev, "INTERRUPT status %d\n", priv->status);
-	fpga_dump_registers(priv);
-
-	/* Disabling the programmer clears the interrupt */
-	fpga_programmer_disable(priv);
-
-	/* Notify any waiters */
-	complete(&priv->completion);
-
-	return IRQ_HANDLED;
-}
-
-/*
- * SYSFS Helpers
- */
-
-/**
- * fpga_do_stop() - deconfigure (reset) the DATA-FPGA's
- * @priv: the driver's private data structure
- *
- * LOCKING: must hold priv->lock
- */
-static int fpga_do_stop(struct fpga_dev *priv)
-{
-	u32 val;
-
-	/* Set the led to unprogrammed */
-	ledtrig_fpga_programmed(false);
-
-	/* Pulse the config line to reset the FPGA's */
-	val = CFG_CTL_ENABLE | CFG_CTL_RESET;
-	iowrite32be(val, priv->regs + FPGA_CONFIG_CONTROL);
-	iowrite32be(0x0, priv->regs + FPGA_CONFIG_CONTROL);
-
-	return 0;
-}
-
-static noinline int fpga_do_program(struct fpga_dev *priv)
-{
-	int ret;
-
-	if (priv->bytes != priv->fw_size) {
-		dev_err(priv->dev, "Incorrect bitfile size: got %zu bytes, "
-				   "should be %zu bytes\n",
-				   priv->bytes, priv->fw_size);
-		return -EINVAL;
-	}
-
-	if (!fpga_power_enabled(priv)) {
-		dev_err(priv->dev, "Power not enabled\n");
-		return -EINVAL;
-	}
-
-	if (!fpga_power_good(priv)) {
-		dev_err(priv->dev, "Power not good\n");
-		return -EINVAL;
-	}
-
-	/* Set the LED to unprogrammed */
-	ledtrig_fpga_programmed(false);
-
-	/* Try to program the FPGA's using DMA */
-	ret = fpga_program_dma(priv);
-
-	/* If DMA failed or doesn't exist, try with CPU */
-	if (ret) {
-		dev_warn(priv->dev, "Falling back to CPU programming\n");
-		ret = fpga_program_cpu(priv);
-	}
-
-	if (ret) {
-		dev_err(priv->dev, "Unable to program FPGA's\n");
-		return ret;
-	}
-
-	/* Drop the firmware bitfile from memory */
-	fpga_drop_firmware_data(priv);
-
-	dev_dbg(priv->dev, "FPGA programming successful\n");
-	ledtrig_fpga_programmed(true);
-
-	return 0;
-}
-
-/*
- * File Operations
- */
-
-static int fpga_open(struct inode *inode, struct file *filp)
-{
-	/*
-	 * The miscdevice layer puts our struct miscdevice into the
-	 * filp->private_data field. We use this to find our private
-	 * data and then overwrite it with our own private structure.
-	 */
-	struct fpga_dev *priv = container_of(filp->private_data,
-					     struct fpga_dev, miscdev);
-	unsigned int nr_pages;
-	int ret;
-
-	/* We only allow one process at a time */
-	ret = mutex_lock_interruptible(&priv->lock);
-	if (ret)
-		return ret;
-
-	filp->private_data = priv;
-	kref_get(&priv->ref);
-
-	/* Truncation: drop any existing data */
-	if (filp->f_flags & O_TRUNC)
-		priv->bytes = 0;
-
-	/* Check if we have already allocated a buffer */
-	if (priv->buf_allocated)
-		return 0;
-
-	/* Allocate a buffer to hold enough data for the bitfile */
-	nr_pages = DIV_ROUND_UP(priv->fw_size, PAGE_SIZE);
-	ret = fpga_dma_init(priv, nr_pages);
-	if (ret) {
-		dev_err(priv->dev, "unable to allocate data buffer\n");
-		mutex_unlock(&priv->lock);
-		kref_put(&priv->ref, fpga_dev_remove);
-		return ret;
-	}
-
-	priv->buf_allocated = true;
-	return 0;
-}
-
-static int fpga_release(struct inode *inode, struct file *filp)
-{
-	struct fpga_dev *priv = filp->private_data;
-
-	mutex_unlock(&priv->lock);
-	kref_put(&priv->ref, fpga_dev_remove);
-	return 0;
-}
-
-static ssize_t fpga_write(struct file *filp, const char __user *buf,
-			  size_t count, loff_t *f_pos)
-{
-	struct fpga_dev *priv = filp->private_data;
-
-	/* FPGA bitfiles have an exact size: disallow anything else */
-	if (priv->bytes >= priv->fw_size)
-		return -ENOSPC;
-
-	count = min_t(size_t, priv->fw_size - priv->bytes, count);
-	if (copy_from_user(priv->vaddr + priv->bytes, buf, count))
-		return -EFAULT;
-
-	priv->bytes += count;
-	return count;
-}
-
-static ssize_t fpga_read(struct file *filp, char __user *buf, size_t count,
-			 loff_t *f_pos)
-{
-	struct fpga_dev *priv = filp->private_data;
-	return simple_read_from_buffer(buf, count, f_pos,
-				       priv->vaddr, priv->bytes);
-}
-
-static loff_t fpga_llseek(struct file *filp, loff_t offset, int origin)
-{
-	struct fpga_dev *priv = filp->private_data;
-
-	/* only read-only opens are allowed to seek */
-	if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
-		return -EINVAL;
-
-	return fixed_size_llseek(filp, offset, origin, priv->fw_size);
-}
-
-static const struct file_operations fpga_fops = {
-	.open		= fpga_open,
-	.release	= fpga_release,
-	.write		= fpga_write,
-	.read		= fpga_read,
-	.llseek		= fpga_llseek,
-};
-
-/*
- * Device Attributes
- */
-
-static ssize_t pfail_show(struct device *dev, struct device_attribute *attr,
-			  char *buf)
-{
-	struct fpga_dev *priv = dev_get_drvdata(dev);
-	u8 val;
-
-	val = ioread8(priv->regs + CTL_PWR_FAIL);
-	return snprintf(buf, PAGE_SIZE, "0x%.2x\n", val);
-}
-
-static ssize_t pgood_show(struct device *dev, struct device_attribute *attr,
-			  char *buf)
-{
-	struct fpga_dev *priv = dev_get_drvdata(dev);
-	return snprintf(buf, PAGE_SIZE, "%d\n", fpga_power_good(priv));
-}
-
-static ssize_t penable_show(struct device *dev, struct device_attribute *attr,
-			    char *buf)
-{
-	struct fpga_dev *priv = dev_get_drvdata(dev);
-	return snprintf(buf, PAGE_SIZE, "%d\n", fpga_power_enabled(priv));
-}
-
-static ssize_t penable_store(struct device *dev, struct device_attribute *attr,
-			     const char *buf, size_t count)
-{
-	struct fpga_dev *priv = dev_get_drvdata(dev);
-	unsigned long val;
-	int ret;
-
-	ret = kstrtoul(buf, 0, &val);
-	if (ret)
-		return ret;
-
-	if (val) {
-		ret = fpga_enable_power_supplies(priv);
-		if (ret)
-			return ret;
-	} else {
-		fpga_do_stop(priv);
-		fpga_disable_power_supplies(priv);
-	}
-
-	return count;
-}
-
-static ssize_t program_show(struct device *dev, struct device_attribute *attr,
-			    char *buf)
-{
-	struct fpga_dev *priv = dev_get_drvdata(dev);
-	return snprintf(buf, PAGE_SIZE, "%d\n", fpga_running(priv));
-}
-
-static ssize_t program_store(struct device *dev, struct device_attribute *attr,
-			     const char *buf, size_t count)
-{
-	struct fpga_dev *priv = dev_get_drvdata(dev);
-	unsigned long val;
-	int ret;
-
-	ret = kstrtoul(buf, 0, &val);
-	if (ret)
-		return ret;
-
-	/* We can't have an image writer and be programming simultaneously */
-	if (mutex_lock_interruptible(&priv->lock))
-		return -ERESTARTSYS;
-
-	/* Program or Reset the FPGA's */
-	ret = val ? fpga_do_program(priv) : fpga_do_stop(priv);
-	if (ret)
-		goto out_unlock;
-
-	/* Success */
-	ret = count;
-
-out_unlock:
-	mutex_unlock(&priv->lock);
-	return ret;
-}
-
-static DEVICE_ATTR(power_fail, S_IRUGO, pfail_show, NULL);
-static DEVICE_ATTR(power_good, S_IRUGO, pgood_show, NULL);
-static DEVICE_ATTR(power_enable, S_IRUGO | S_IWUSR,
-		   penable_show, penable_store);
-
-static DEVICE_ATTR(program, S_IRUGO | S_IWUSR,
-		   program_show, program_store);
-
-static struct attribute *fpga_attributes[] = {
-	&dev_attr_power_fail.attr,
-	&dev_attr_power_good.attr,
-	&dev_attr_power_enable.attr,
-	&dev_attr_program.attr,
-	NULL,
-};
-
-static const struct attribute_group fpga_attr_group = {
-	.attrs = fpga_attributes,
-};
-
-/*
- * OpenFirmware Device Subsystem
- */
-
-#define SYS_REG_VERSION		0x00
-#define SYS_REG_GEOGRAPHIC	0x10
-
-static bool dma_filter(struct dma_chan *chan, void *data)
-{
-	/*
-	 * DMA Channel #0 is the only acceptable device
-	 *
-	 * This probably won't survive an unload/load cycle of the Freescale
-	 * DMAEngine driver, but that won't be a problem
-	 */
-	return chan->chan_id == 0 && chan->device->dev_id == 0;
-}
-
-static int fpga_of_remove(struct platform_device *op)
-{
-	struct fpga_dev *priv = platform_get_drvdata(op);
-	struct device *this_device = priv->miscdev.this_device;
-
-	sysfs_remove_group(&this_device->kobj, &fpga_attr_group);
-	misc_deregister(&priv->miscdev);
-
-	free_irq(priv->irq, priv);
-	irq_dispose_mapping(priv->irq);
-
-	/* make sure the power supplies are off */
-	fpga_disable_power_supplies(priv);
-
-	/* unmap registers */
-	iounmap(priv->immr);
-	iounmap(priv->regs);
-
-	dma_release_channel(priv->chan);
-
-	/* drop our reference to the private data structure */
-	kref_put(&priv->ref, fpga_dev_remove);
-	return 0;
-}
-
-/* CTL-CPLD Version Register */
-#define CTL_CPLD_VERSION	0x2000
-
-static int fpga_of_probe(struct platform_device *op)
-{
-	struct device_node *of_node = op->dev.of_node;
-	struct device *this_device;
-	struct fpga_dev *priv;
-	dma_cap_mask_t mask;
-	u32 ver;
-	int ret;
-
-	/* Allocate private data */
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv) {
-		dev_err(&op->dev, "Unable to allocate private data\n");
-		ret = -ENOMEM;
-		goto out_return;
-	}
-
-	/* Setup the miscdevice */
-	priv->miscdev.minor = MISC_DYNAMIC_MINOR;
-	priv->miscdev.name = drv_name;
-	priv->miscdev.fops = &fpga_fops;
-
-	kref_init(&priv->ref);
-
-	platform_set_drvdata(op, priv);
-	priv->dev = &op->dev;
-	mutex_init(&priv->lock);
-	init_completion(&priv->completion);
-
-	dev_set_drvdata(priv->dev, priv);
-	dma_cap_zero(mask);
-	dma_cap_set(DMA_MEMCPY, mask);
-	dma_cap_set(DMA_SLAVE, mask);
-	dma_cap_set(DMA_SG, mask);
-
-	/* Get control of DMA channel #0 */
-	priv->chan = dma_request_channel(mask, dma_filter, NULL);
-	if (!priv->chan) {
-		dev_err(&op->dev, "Unable to acquire DMA channel #0\n");
-		ret = -ENODEV;
-		goto out_free_priv;
-	}
-
-	/* Remap the registers for use */
-	priv->regs = of_iomap(of_node, 0);
-	if (!priv->regs) {
-		dev_err(&op->dev, "Unable to ioremap registers\n");
-		ret = -ENOMEM;
-		goto out_dma_release_channel;
-	}
-
-	/* Remap the IMMR for use */
-	priv->immr = ioremap(get_immrbase(), 0x100000);
-	if (!priv->immr) {
-		dev_err(&op->dev, "Unable to ioremap IMMR\n");
-		ret = -ENOMEM;
-		goto out_unmap_regs;
-	}
-
-	/*
-	 * Check that external DMA is configured
-	 *
-	 * U-Boot does this for us, but we should check it and bail out if
-	 * there is a problem. Failing to have this register setup correctly
-	 * will cause the DMA controller to transfer a single cacheline
-	 * worth of data, then wedge itself.
-	 */
-	if ((ioread32be(priv->immr + 0x114) & 0xE00) != 0xE00) {
-		dev_err(&op->dev, "External DMA control not configured\n");
-		ret = -ENODEV;
-		goto out_unmap_immr;
-	}
-
-	/*
-	 * Check the CTL-CPLD version
-	 *
-	 * This driver uses the CTL-CPLD DATA-FPGA power sequencer, and we
-	 * don't want to run on any version of the CTL-CPLD that does not use
-	 * a compatible register layout.
-	 *
-	 * v2: changed register layout, added power sequencer
-	 * v3: added glitch filter on the i2c overcurrent/overtemp outputs
-	 */
-	ver = ioread8(priv->regs + CTL_CPLD_VERSION);
-	if (ver != 0x02 && ver != 0x03) {
-		dev_err(&op->dev, "CTL-CPLD is not version 0x02 or 0x03!\n");
-		ret = -ENODEV;
-		goto out_unmap_immr;
-	}
-
-	/* Set the exact size that the firmware image should be */
-	ver = ioread32be(priv->regs + SYS_REG_VERSION);
-	priv->fw_size = (ver & (1 << 18)) ? FW_SIZE_EP2S130 : FW_SIZE_EP2S90;
-
-	/* Find the correct IRQ number */
-	priv->irq = irq_of_parse_and_map(of_node, 0);
-	if (priv->irq == NO_IRQ) {
-		dev_err(&op->dev, "Unable to find IRQ line\n");
-		ret = -ENODEV;
-		goto out_unmap_immr;
-	}
-
-	/* Request the IRQ */
-	ret = request_irq(priv->irq, fpga_irq, IRQF_SHARED, drv_name, priv);
-	if (ret) {
-		dev_err(&op->dev, "Unable to request IRQ %d\n", priv->irq);
-		ret = -ENODEV;
-		goto out_irq_dispose_mapping;
-	}
-
-	/* Reset and stop the FPGA's, just in case */
-	fpga_do_stop(priv);
-
-	/* Register the miscdevice */
-	ret = misc_register(&priv->miscdev);
-	if (ret) {
-		dev_err(&op->dev, "Unable to register miscdevice\n");
-		goto out_free_irq;
-	}
-
-	/* Create the sysfs files */
-	this_device = priv->miscdev.this_device;
-	dev_set_drvdata(this_device, priv);
-	ret = sysfs_create_group(&this_device->kobj, &fpga_attr_group);
-	if (ret) {
-		dev_err(&op->dev, "Unable to create sysfs files\n");
-		goto out_misc_deregister;
-	}
-
-	dev_info(priv->dev, "CARMA FPGA Programmer: %s rev%s with %s FPGAs\n",
-			(ver & (1 << 17)) ? "Correlator" : "Digitizer",
-			(ver & (1 << 16)) ? "B" : "A",
-			(ver & (1 << 18)) ? "EP2S130" : "EP2S90");
-
-	return 0;
-
-out_misc_deregister:
-	misc_deregister(&priv->miscdev);
-out_free_irq:
-	free_irq(priv->irq, priv);
-out_irq_dispose_mapping:
-	irq_dispose_mapping(priv->irq);
-out_unmap_immr:
-	iounmap(priv->immr);
-out_unmap_regs:
-	iounmap(priv->regs);
-out_dma_release_channel:
-	dma_release_channel(priv->chan);
-out_free_priv:
-	kref_put(&priv->ref, fpga_dev_remove);
-out_return:
-	return ret;
-}
-
-static const struct of_device_id fpga_of_match[] = {
-	{ .compatible = "carma,fpga-programmer", },
-	{},
-};
-
-static struct platform_driver fpga_of_driver = {
-	.probe		= fpga_of_probe,
-	.remove		= fpga_of_remove,
-	.driver		= {
-		.name		= drv_name,
-		.of_match_table	= fpga_of_match,
-	},
-};
-
-/*
- * Module Init / Exit
- */
-
-static int __init fpga_init(void)
-{
-	led_trigger_register_simple("fpga", &ledtrig_fpga);
-	return platform_driver_register(&fpga_of_driver);
-}
-
-static void __exit fpga_exit(void)
-{
-	platform_driver_unregister(&fpga_of_driver);
-	led_trigger_unregister_simple(ledtrig_fpga);
-}
-
-MODULE_AUTHOR("Ira W. Snyder <iws@ovro.caltech.edu>");
-MODULE_DESCRIPTION("CARMA Board DATA-FPGA Programmer");
-MODULE_LICENSE("GPL");
-
-module_init(fpga_init);
-module_exit(fpga_exit);
diff --git a/kernel/drivers/misc/carma/carma-fpga.c b/kernel/drivers/misc/carma/carma-fpga.c
deleted file mode 100644
index 5aba3fd78..000000000
--- a/kernel/drivers/misc/carma/carma-fpga.c
+++ /dev/null
@@ -1,1507 +0,0 @@
-/*
- * CARMA DATA-FPGA Access Driver
- *
- * Copyright (c) 2009-2011 Ira W. Snyder <iws@ovro.caltech.edu>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/*
- * FPGA Memory Dump Format
- *
- * FPGA #0 control registers (32 x 32-bit words)
- * FPGA #1 control registers (32 x 32-bit words)
- * FPGA #2 control registers (32 x 32-bit words)
- * FPGA #3 control registers (32 x 32-bit words)
- * SYSFPGA control registers (32 x 32-bit words)
- * FPGA #0 correlation array (NUM_CORL0 correlation blocks)
- * FPGA #1 correlation array (NUM_CORL1 correlation blocks)
- * FPGA #2 correlation array (NUM_CORL2 correlation blocks)
- * FPGA #3 correlation array (NUM_CORL3 correlation blocks)
- *
- * Each correlation array consists of:
- *
- * Correlation Data      (2 x NUM_LAGSn x 32-bit words)
- * Pipeline Metadata     (2 x NUM_METAn x 32-bit words)
- * Quantization Counters (2 x NUM_QCNTn x 32-bit words)
- *
- * The NUM_CORLn, NUM_LAGSn, NUM_METAn, and NUM_QCNTn values come from
- * the FPGA configuration registers. They do not change once the FPGA's
- * have been programmed, they only change on re-programming.
- */
-
-/*
- * Basic Description:
- *
- * This driver is used to capture correlation spectra off of the four data
- * processing FPGAs. The FPGAs are often reprogrammed at runtime, therefore
- * this driver supports dynamic enable/disable of capture while the device
- * remains open.
- *
- * The nominal capture rate is 64Hz (every 15.625ms). To facilitate this fast
- * capture rate, all buffers are pre-allocated to avoid any potentially long
- * running memory allocations while capturing.
- *
- * There are two lists and one pointer which are used to keep track of the
- * different states of data buffers.
- *
- * 1) free list
- * This list holds all empty data buffers which are ready to receive data.
- *
- * 2) inflight pointer
- * This pointer holds the currently inflight data buffer. This buffer is having
- * data copied into it by the DMA engine.
- *
- * 3) used list
- * This list holds data buffers which have been filled, and are waiting to be
- * read by userspace.
- *
- * All buffers start life on the free list, then move successively to the
- * inflight pointer, and then to the used list. After they have been read by
- * userspace, they are moved back to the free list. The cycle repeats as long
- * as necessary.
- *
- * It should be noted that all buffers are mapped and ready for DMA when they
- * are on any of the three lists. They are only unmapped when they are in the
- * process of being read by userspace.
- */
-
-/*
- * Notes on the IRQ masking scheme:
- *
- * The IRQ masking scheme here is different than most other hardware. The only
- * way for the DATA-FPGAs to detect if the kernel has taken too long to copy
- * the data is if the status registers are not cleared before the next
- * correlation data dump is ready.
- *
- * The interrupt line is connected to the status registers, such that when they
- * are cleared, the interrupt is de-asserted. Therein lies our problem. We need
- * to schedule a long-running DMA operation and return from the interrupt
- * handler quickly, but we cannot clear the status registers.
- *
- * To handle this, the system controller FPGA has the capability to connect the
- * interrupt line to a user-controlled GPIO pin. This pin is driven high
- * (unasserted) and left that way. To mask the interrupt, we change the
- * interrupt source to the GPIO pin. Tada, we hid the interrupt. :)
- */
-
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
-#include <linux/dma-mapping.h>
-#include <linux/miscdevice.h>
-#include <linux/interrupt.h>
-#include <linux/dmaengine.h>
-#include <linux/seq_file.h>
-#include <linux/highmem.h>
-#include <linux/debugfs.h>
-#include <linux/vmalloc.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/poll.h>
-#include <linux/slab.h>
-#include <linux/kref.h>
-#include <linux/io.h>
-
-/* system controller registers */
-#define SYS_IRQ_SOURCE_CTL	0x24
-#define SYS_IRQ_OUTPUT_EN	0x28
-#define SYS_IRQ_OUTPUT_DATA	0x2C
-#define SYS_IRQ_INPUT_DATA	0x30
-#define SYS_FPGA_CONFIG_STATUS	0x44
-
-/* GPIO IRQ line assignment */
-#define IRQ_CORL_DONE		0x10
-
-/* FPGA registers */
-#define MMAP_REG_VERSION	0x00
-#define MMAP_REG_CORL_CONF1	0x08
-#define MMAP_REG_CORL_CONF2	0x0C
-#define MMAP_REG_STATUS		0x48
-
-#define SYS_FPGA_BLOCK		0xF0000000
-
-#define DATA_FPGA_START		0x400000
-#define DATA_FPGA_SIZE		0x80000
-
-static const char drv_name[] = "carma-fpga";
-
-#define NUM_FPGA	4
-
-#define MIN_DATA_BUFS	8
-#define MAX_DATA_BUFS	64
-
-struct fpga_info {
-	unsigned int num_lag_ram;
-	unsigned int blk_size;
-};
-
-struct data_buf {
-	struct list_head entry;
-	void *vaddr;
-	struct scatterlist *sglist;
-	int sglen;
-	int nr_pages;
-	size_t size;
-};
-
-struct fpga_device {
-	/* character device */
-	struct miscdevice miscdev;
-	struct device *dev;
-	struct mutex mutex;
-
-	/* reference count */
-	struct kref ref;
-
-	/* FPGA registers and information */
-	struct fpga_info info[NUM_FPGA];
-	void __iomem *regs;
-	int irq;
-
-	/* FPGA Physical Address/Size Information */
-	resource_size_t phys_addr;
-	size_t phys_size;
-
-	/* DMA structures */
-	struct sg_table corl_table;
-	unsigned int corl_nents;
-	struct dma_chan *chan;
-
-	/* Protection for all members below */
-	spinlock_t lock;
-
-	/* Device enable/disable flag */
-	bool enabled;
-
-	/* Correlation data buffers */
-	wait_queue_head_t wait;
-	struct list_head free;
-	struct list_head used;
-	struct data_buf *inflight;
-
-	/* Information about data buffers */
-	unsigned int num_dropped;
-	unsigned int num_buffers;
-	size_t bufsize;
-	struct dentry *dbg_entry;
-};
-
-struct fpga_reader {
-	struct fpga_device *priv;
-	struct data_buf *buf;
-	off_t buf_start;
-};
-
-static void fpga_device_release(struct kref *ref)
-{
-	struct fpga_device *priv = container_of(ref, struct fpga_device, ref);
-
-	/* the last reader has exited, cleanup the last bits */
-	mutex_destroy(&priv->mutex);
-	kfree(priv);
-}
-
-/*
- * Data Buffer Allocation Helpers
- */
-
-static int carma_dma_init(struct data_buf *buf, int nr_pages)
-{
-	struct page *pg;
-	int i;
-
-	buf->vaddr = vmalloc_32(nr_pages << PAGE_SHIFT);
-	if (NULL == buf->vaddr) {
-		pr_debug("vmalloc_32(%d pages) failed\n", nr_pages);
-		return -ENOMEM;
-	}
-
-	pr_debug("vmalloc is at addr 0x%08lx, size=%d\n",
-				(unsigned long)buf->vaddr,
-				nr_pages << PAGE_SHIFT);
-
-	memset(buf->vaddr, 0, nr_pages << PAGE_SHIFT);
-	buf->nr_pages = nr_pages;
-
-	buf->sglist = vzalloc(buf->nr_pages * sizeof(*buf->sglist));
-	if (NULL == buf->sglist)
-		goto vzalloc_err;
-
-	sg_init_table(buf->sglist, buf->nr_pages);
-	for (i = 0; i < buf->nr_pages; i++) {
-		pg = vmalloc_to_page(buf->vaddr + i * PAGE_SIZE);
-		if (NULL == pg)
-			goto vmalloc_to_page_err;
-		sg_set_page(&buf->sglist[i], pg, PAGE_SIZE, 0);
-	}
-	return 0;
-
-vmalloc_to_page_err:
-	vfree(buf->sglist);
-	buf->sglist = NULL;
-vzalloc_err:
-	vfree(buf->vaddr);
-	buf->vaddr = NULL;
-	return -ENOMEM;
-}
-
-static int carma_dma_map(struct device *dev, struct data_buf *buf)
-{
-	buf->sglen = dma_map_sg(dev, buf->sglist,
-			buf->nr_pages, DMA_FROM_DEVICE);
-
-	if (0 == buf->sglen) {
-		pr_warn("%s: dma_map_sg failed\n", __func__);
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-static int carma_dma_unmap(struct device *dev, struct data_buf *buf)
-{
-	if (!buf->sglen)
-		return 0;
-
-	dma_unmap_sg(dev, buf->sglist, buf->sglen, DMA_FROM_DEVICE);
-	buf->sglen = 0;
-	return 0;
-}
-
-/**
- * data_free_buffer() - free a single data buffer and all allocated memory
- * @buf: the buffer to free
- *
- * This will free all of the pages allocated to the given data buffer, and
- * then free the structure itself
- */
-static void data_free_buffer(struct data_buf *buf)
-{
-	/* It is ok to free a NULL buffer */
-	if (!buf)
-		return;
-
-	/* free all memory */
-	vfree(buf->sglist);
-	vfree(buf->vaddr);
-	kfree(buf);
-}
-
-/**
- * data_alloc_buffer() - allocate and fill a data buffer with pages
- * @bytes: the number of bytes required
- *
- * This allocates all space needed for a data buffer. It must be mapped before
- * use in a DMA transaction using carma_dma_map().
- *
- * Returns NULL on failure
- */
-static struct data_buf *data_alloc_buffer(const size_t bytes)
-{
-	unsigned int nr_pages;
-	struct data_buf *buf;
-	int ret;
-
-	/* calculate the number of pages necessary */
-	nr_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
-
-	/* allocate the buffer structure */
-	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
-	if (!buf)
-		goto out_return;
-
-	/* initialize internal fields */
-	INIT_LIST_HEAD(&buf->entry);
-	buf->size = bytes;
-
-	/* allocate the buffer */
-	ret = carma_dma_init(buf, nr_pages);
-	if (ret)
-		goto out_free_buf;
-
-	return buf;
-
-out_free_buf:
-	kfree(buf);
-out_return:
-	return NULL;
-}
-
-/**
- * data_free_buffers() - free all allocated buffers
- * @priv: the driver's private data structure
- *
- * Free all buffers allocated by the driver (except those currently in the
- * process of being read by userspace).
- *
- * LOCKING: must hold dev->mutex
- * CONTEXT: user
- */
-static void data_free_buffers(struct fpga_device *priv)
-{
-	struct data_buf *buf, *tmp;
-
-	/* the device should be stopped, no DMA in progress */
-	BUG_ON(priv->inflight != NULL);
-
-	list_for_each_entry_safe(buf, tmp, &priv->free, entry) {
-		list_del_init(&buf->entry);
-		carma_dma_unmap(priv->dev, buf);
-		data_free_buffer(buf);
-	}
-
-	list_for_each_entry_safe(buf, tmp, &priv->used, entry) {
-		list_del_init(&buf->entry);
-		carma_dma_unmap(priv->dev, buf);
-		data_free_buffer(buf);
-	}
-
-	priv->num_buffers = 0;
-	priv->bufsize = 0;
-}
-
-/**
- * data_alloc_buffers() - allocate 1 seconds worth of data buffers
- * @priv: the driver's private data structure
- *
- * Allocate enough buffers for a whole second worth of data
- *
- * This routine will attempt to degrade nicely by succeeding even if a full
- * second worth of data buffers could not be allocated, as long as a minimum
- * number were allocated. In this case, it will print a message to the kernel
- * log.
- *
- * The device must not be modifying any lists when this is called.
- *
- * CONTEXT: user
- * LOCKING: must hold dev->mutex
- *
- * Returns 0 on success, -ERRNO otherwise
- */
-static int data_alloc_buffers(struct fpga_device *priv)
-{
-	struct data_buf *buf;
-	int i, ret;
-
-	for (i = 0; i < MAX_DATA_BUFS; i++) {
-
-		/* allocate a buffer */
-		buf = data_alloc_buffer(priv->bufsize);
-		if (!buf)
-			break;
-
-		/* map it for DMA */
-		ret = carma_dma_map(priv->dev, buf);
-		if (ret) {
-			data_free_buffer(buf);
-			break;
-		}
-
-		/* add it to the list of free buffers */
-		list_add_tail(&buf->entry, &priv->free);
-		priv->num_buffers++;
-	}
-
-	/* Make sure we allocated the minimum required number of buffers */
-	if (priv->num_buffers < MIN_DATA_BUFS) {
-		dev_err(priv->dev, "Unable to allocate enough data buffers\n");
-		data_free_buffers(priv);
-		return -ENOMEM;
-	}
-
-	/* Warn if we are running in a degraded state, but do not fail */
-	if (priv->num_buffers < MAX_DATA_BUFS) {
-		dev_warn(priv->dev,
-			 "Unable to allocate %d buffers, using %d buffers instead\n",
-			 MAX_DATA_BUFS, i);
-	}
-
-	return 0;
-}
-
-/*
- * DMA Operations Helpers
- */
-
-/**
- * fpga_start_addr() - get the physical address a DATA-FPGA
- * @priv: the driver's private data structure
- * @fpga: the DATA-FPGA number (zero based)
- */
-static dma_addr_t fpga_start_addr(struct fpga_device *priv, unsigned int fpga)
-{
-	return priv->phys_addr + 0x400000 + (0x80000 * fpga);
-}
-
-/**
- * fpga_block_addr() - get the physical address of a correlation data block
- * @priv: the driver's private data structure
- * @fpga: the DATA-FPGA number (zero based)
- * @blknum: the correlation block number (zero based)
- */
-static dma_addr_t fpga_block_addr(struct fpga_device *priv, unsigned int fpga,
-				  unsigned int blknum)
-{
-	return fpga_start_addr(priv, fpga) + (0x10000 * (1 + blknum));
-}
-
-#define REG_BLOCK_SIZE	(32 * 4)
-
-/**
- * data_setup_corl_table() - create the scatterlist for correlation dumps
- * @priv: the driver's private data structure
- *
- * Create the scatterlist for transferring a correlation dump from the
- * DATA FPGAs. This structure will be reused for each buffer than needs
- * to be filled with correlation data.
- *
- * Returns 0 on success, -ERRNO otherwise
- */
-static int data_setup_corl_table(struct fpga_device *priv)
-{
-	struct sg_table *table = &priv->corl_table;
-	struct scatterlist *sg;
-	struct fpga_info *info;
-	int i, j, ret;
-
-	/* Calculate the number of entries needed */
-	priv->corl_nents = (1 + NUM_FPGA) * REG_BLOCK_SIZE;
-	for (i = 0; i < NUM_FPGA; i++)
-		priv->corl_nents += priv->info[i].num_lag_ram;
-
-	/* Allocate the scatterlist table */
-	ret = sg_alloc_table(table, priv->corl_nents, GFP_KERNEL);
-	if (ret) {
-		dev_err(priv->dev, "unable to allocate DMA table\n");
-		return ret;
-	}
-
-	/* Add the DATA FPGA registers to the scatterlist */
-	sg = table->sgl;
-	for (i = 0; i < NUM_FPGA; i++) {
-		sg_dma_address(sg) = fpga_start_addr(priv, i);
-		sg_dma_len(sg) = REG_BLOCK_SIZE;
-		sg = sg_next(sg);
-	}
-
-	/* Add the SYS-FPGA registers to the scatterlist */
-	sg_dma_address(sg) = SYS_FPGA_BLOCK;
-	sg_dma_len(sg) = REG_BLOCK_SIZE;
-	sg = sg_next(sg);
-
-	/* Add the FPGA correlation data blocks to the scatterlist */
-	for (i = 0; i < NUM_FPGA; i++) {
-		info = &priv->info[i];
-		for (j = 0; j < info->num_lag_ram; j++) {
-			sg_dma_address(sg) = fpga_block_addr(priv, i, j);
-			sg_dma_len(sg) = info->blk_size;
-			sg = sg_next(sg);
-		}
-	}
-
-	/*
-	 * All physical addresses and lengths are present in the structure
-	 * now. It can be reused for every FPGA DATA interrupt
-	 */
-	return 0;
-}
-
-/*
- * FPGA Register Access Helpers
- */
-
-static void fpga_write_reg(struct fpga_device *priv, unsigned int fpga,
-			   unsigned int reg, u32 val)
-{
-	const int fpga_start = DATA_FPGA_START + (fpga * DATA_FPGA_SIZE);
-	iowrite32be(val, priv->regs + fpga_start + reg);
-}
-
-static u32 fpga_read_reg(struct fpga_device *priv, unsigned int fpga,
-			 unsigned int reg)
-{
-	const int fpga_start = DATA_FPGA_START + (fpga * DATA_FPGA_SIZE);
-	return ioread32be(priv->regs + fpga_start + reg);
-}
-
-/**
- * data_calculate_bufsize() - calculate the data buffer size required
- * @priv: the driver's private data structure
- *
- * Calculate the total buffer size needed to hold a single block
- * of correlation data
- *
- * CONTEXT: user
- *
- * Returns 0 on success, -ERRNO otherwise
- */
-static int data_calculate_bufsize(struct fpga_device *priv)
-{
-	u32 num_corl, num_lags, num_meta, num_qcnt, num_pack;
-	u32 conf1, conf2, version;
-	u32 num_lag_ram, blk_size;
-	int i;
-
-	/* Each buffer starts with the 5 FPGA register areas */
-	priv->bufsize = (1 + NUM_FPGA) * REG_BLOCK_SIZE;
-
-	/* Read and store the configuration data for each FPGA */
-	for (i = 0; i < NUM_FPGA; i++) {
-		version = fpga_read_reg(priv, i, MMAP_REG_VERSION);
-		conf1 = fpga_read_reg(priv, i, MMAP_REG_CORL_CONF1);
-		conf2 = fpga_read_reg(priv, i, MMAP_REG_CORL_CONF2);
-
-		/* minor version 2 and later */
-		if ((version & 0x000000FF) >= 2) {
-			num_corl = (conf1 & 0x000000F0) >> 4;
-			num_pack = (conf1 & 0x00000F00) >> 8;
-			num_lags = (conf1 & 0x00FFF000) >> 12;
-			num_meta = (conf1 & 0x7F000000) >> 24;
-			num_qcnt = (conf2 & 0x00000FFF) >> 0;
-		} else {
-			num_corl = (conf1 & 0x000000F0) >> 4;
-			num_pack = 1; /* implied */
-			num_lags = (conf1 & 0x000FFF00) >> 8;
-			num_meta = (conf1 & 0x7FF00000) >> 20;
-			num_qcnt = (conf2 & 0x00000FFF) >> 0;
-		}
-
-		num_lag_ram = (num_corl + num_pack - 1) / num_pack;
-		blk_size = ((num_pack * num_lags) + num_meta + num_qcnt) * 8;
-
-		priv->info[i].num_lag_ram = num_lag_ram;
-		priv->info[i].blk_size = blk_size;
-		priv->bufsize += num_lag_ram * blk_size;
-
-		dev_dbg(priv->dev, "FPGA %d NUM_CORL: %d\n", i, num_corl);
-		dev_dbg(priv->dev, "FPGA %d NUM_PACK: %d\n", i, num_pack);
-		dev_dbg(priv->dev, "FPGA %d NUM_LAGS: %d\n", i, num_lags);
-		dev_dbg(priv->dev, "FPGA %d NUM_META: %d\n", i, num_meta);
-		dev_dbg(priv->dev, "FPGA %d NUM_QCNT: %d\n", i, num_qcnt);
-		dev_dbg(priv->dev, "FPGA %d BLK_SIZE: %d\n", i, blk_size);
-	}
-
-	dev_dbg(priv->dev, "TOTAL BUFFER SIZE: %zu bytes\n", priv->bufsize);
-	return 0;
-}
-
-/*
- * Interrupt Handling
- */
-
-/**
- * data_disable_interrupts() - stop the device from generating interrupts
- * @priv: the driver's private data structure
- *
- * Hide interrupts by switching to GPIO interrupt source
- *
- * LOCKING: must hold dev->lock
- */
-static void data_disable_interrupts(struct fpga_device *priv)
-{
-	/* hide the interrupt by switching the IRQ driver to GPIO */
-	iowrite32be(0x2F, priv->regs + SYS_IRQ_SOURCE_CTL);
-}
-
-/**
- * data_enable_interrupts() - allow the device to generate interrupts
- * @priv: the driver's private data structure
- *
- * Unhide interrupts by switching to the FPGA interrupt source. At the
- * same time, clear the DATA-FPGA status registers.
- *
- * LOCKING: must hold dev->lock
- */
-static void data_enable_interrupts(struct fpga_device *priv)
-{
-	/* clear the actual FPGA corl_done interrupt */
-	fpga_write_reg(priv, 0, MMAP_REG_STATUS, 0x0);
-	fpga_write_reg(priv, 1, MMAP_REG_STATUS, 0x0);
-	fpga_write_reg(priv, 2, MMAP_REG_STATUS, 0x0);
-	fpga_write_reg(priv, 3, MMAP_REG_STATUS, 0x0);
-
-	/* flush the writes */
-	fpga_read_reg(priv, 0, MMAP_REG_STATUS);
-	fpga_read_reg(priv, 1, MMAP_REG_STATUS);
-	fpga_read_reg(priv, 2, MMAP_REG_STATUS);
-	fpga_read_reg(priv, 3, MMAP_REG_STATUS);
-
-	/* switch back to the external interrupt source */
-	iowrite32be(0x3F, priv->regs + SYS_IRQ_SOURCE_CTL);
-}
-
-/**
- * data_dma_cb() - DMAEngine callback for DMA completion
- * @data: the driver's private data structure
- *
- * Complete a DMA transfer from the DATA-FPGA's
- *
- * This is called via the DMA callback mechanism, and will handle moving the
- * completed DMA transaction to the used list, and then wake any processes
- * waiting for new data
- *
- * CONTEXT: any, softirq expected
- */
-static void data_dma_cb(void *data)
-{
-	struct fpga_device *priv = data;
-	unsigned long flags;
-
-	spin_lock_irqsave(&priv->lock, flags);
-
-	/* If there is no inflight buffer, we've got a bug */
-	BUG_ON(priv->inflight == NULL);
-
-	/* Move the inflight buffer onto the used list */
-	list_move_tail(&priv->inflight->entry, &priv->used);
-	priv->inflight = NULL;
-
-	/*
-	 * If data dumping is still enabled, then clear the FPGA
-	 * status registers and re-enable FPGA interrupts
-	 */
-	if (priv->enabled)
-		data_enable_interrupts(priv);
-
-	spin_unlock_irqrestore(&priv->lock, flags);
-
-	/*
-	 * We've changed both the inflight and used lists, so we need
-	 * to wake up any processes that are blocking for those events
-	 */
-	wake_up(&priv->wait);
-}
-
-/**
- * data_submit_dma() - prepare and submit the required DMA to fill a buffer
- * @priv: the driver's private data structure
- * @buf: the data buffer
- *
- * Prepare and submit the necessary DMA transactions to fill a correlation
- * data buffer.
- *
- * LOCKING: must hold dev->lock
- * CONTEXT: hardirq only
- *
- * Returns 0 on success, -ERRNO otherwise
- */
-static int data_submit_dma(struct fpga_device *priv, struct data_buf *buf)
-{
-	struct scatterlist *dst_sg, *src_sg;
-	unsigned int dst_nents, src_nents;
-	struct dma_chan *chan = priv->chan;
-	struct dma_async_tx_descriptor *tx;
-	dma_cookie_t cookie;
-	dma_addr_t dst, src;
-	unsigned long dma_flags = 0;
-
-	dst_sg = buf->sglist;
-	dst_nents = buf->sglen;
-
-	src_sg = priv->corl_table.sgl;
-	src_nents = priv->corl_nents;
-
-	/*
-	 * All buffers passed to this function should be ready and mapped
-	 * for DMA already. Therefore, we don't need to do anything except
-	 * submit it to the Freescale DMA Engine for processing
-	 */
-
-	/* setup the scatterlist to scatterlist transfer */
-	tx = chan->device->device_prep_dma_sg(chan,
-					      dst_sg, dst_nents,
-					      src_sg, src_nents,
-					      0);
-	if (!tx) {
-		dev_err(priv->dev, "unable to prep scatterlist DMA\n");
-		return -ENOMEM;
-	}
-
-	/* submit the transaction to the DMA controller */
-	cookie = tx->tx_submit(tx);
-	if (dma_submit_error(cookie)) {
-		dev_err(priv->dev, "unable to submit scatterlist DMA\n");
-		return -ENOMEM;
-	}
-
-	/* Prepare the re-read of the SYS-FPGA block */
-	dst = sg_dma_address(dst_sg) + (NUM_FPGA * REG_BLOCK_SIZE);
-	src = SYS_FPGA_BLOCK;
-	tx = chan->device->device_prep_dma_memcpy(chan, dst, src,
-						  REG_BLOCK_SIZE,
-						  dma_flags);
-	if (!tx) {
-		dev_err(priv->dev, "unable to prep SYS-FPGA DMA\n");
-		return -ENOMEM;
-	}
-
-	/* Setup the callback */
-	tx->callback = data_dma_cb;
-	tx->callback_param = priv;
-
-	/* submit the transaction to the DMA controller */
-	cookie = tx->tx_submit(tx);
-	if (dma_submit_error(cookie)) {
-		dev_err(priv->dev, "unable to submit SYS-FPGA DMA\n");
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-#define CORL_DONE	0x1
-#define CORL_ERR	0x2
-
-static irqreturn_t data_irq(int irq, void *dev_id)
-{
-	struct fpga_device *priv = dev_id;
-	bool submitted = false;
-	struct data_buf *buf;
-	u32 status;
-	int i;
-
-	/* detect spurious interrupts via FPGA status */
-	for (i = 0; i < 4; i++) {
-		status = fpga_read_reg(priv, i, MMAP_REG_STATUS);
-		if (!(status & (CORL_DONE | CORL_ERR))) {
-			dev_err(priv->dev, "spurious irq detected (FPGA)\n");
-			return IRQ_NONE;
-		}
-	}
-
-	/* detect spurious interrupts via raw IRQ pin readback */
-	status = ioread32be(priv->regs + SYS_IRQ_INPUT_DATA);
-	if (status & IRQ_CORL_DONE) {
-		dev_err(priv->dev, "spurious irq detected (IRQ)\n");
-		return IRQ_NONE;
-	}
-
-	spin_lock(&priv->lock);
-
-	/*
-	 * This is an error case that should never happen.
-	 *
-	 * If this driver has a bug and manages to re-enable interrupts while
-	 * a DMA is in progress, then we will hit this statement and should
-	 * start paying attention immediately.
-	 */
-	BUG_ON(priv->inflight != NULL);
-
-	/* hide the interrupt by switching the IRQ driver to GPIO */
-	data_disable_interrupts(priv);
-
-	/* If there are no free buffers, drop this data */
-	if (list_empty(&priv->free)) {
-		priv->num_dropped++;
-		goto out;
-	}
-
-	buf = list_first_entry(&priv->free, struct data_buf, entry);
-	list_del_init(&buf->entry);
-	BUG_ON(buf->size != priv->bufsize);
-
-	/* Submit a DMA transfer to get the correlation data */
-	if (data_submit_dma(priv, buf)) {
-		dev_err(priv->dev, "Unable to setup DMA transfer\n");
-		list_move_tail(&buf->entry, &priv->free);
-		goto out;
-	}
-
-	/* Save the buffer for the DMA callback */
-	priv->inflight = buf;
-	submitted = true;
-
-	/* Start the DMA Engine */
-	dma_async_issue_pending(priv->chan);
-
-out:
-	/* If no DMA was submitted, re-enable interrupts */
-	if (!submitted)
-		data_enable_interrupts(priv);
-
-	spin_unlock(&priv->lock);
-	return IRQ_HANDLED;
-}
-
-/*
- * Realtime Device Enable Helpers
- */
-
-/**
- * data_device_enable() - enable the device for buffered dumping
- * @priv: the driver's private data structure
- *
- * Enable the device for buffered dumping. Allocates buffers and hooks up
- * the interrupt handler. When this finishes, data will come pouring in.
- *
- * LOCKING: must hold dev->mutex
- * CONTEXT: user context only
- *
- * Returns 0 on success, -ERRNO otherwise
- */
-static int data_device_enable(struct fpga_device *priv)
-{
-	bool enabled;
-	u32 val;
-	int ret;
-
-	/* multiple enables are safe: they do nothing */
-	spin_lock_irq(&priv->lock);
-	enabled = priv->enabled;
-	spin_unlock_irq(&priv->lock);
-	if (enabled)
-		return 0;
-
-	/* check that the FPGAs are programmed */
-	val = ioread32be(priv->regs + SYS_FPGA_CONFIG_STATUS);
-	if (!(val & (1 << 18))) {
-		dev_err(priv->dev, "DATA-FPGAs are not enabled\n");
-		return -ENODATA;
-	}
-
-	/* read the FPGAs to calculate the buffer size */
-	ret = data_calculate_bufsize(priv);
-	if (ret) {
-		dev_err(priv->dev, "unable to calculate buffer size\n");
-		goto out_error;
-	}
-
-	/* allocate the correlation data buffers */
-	ret = data_alloc_buffers(priv);
-	if (ret) {
-		dev_err(priv->dev, "unable to allocate buffers\n");
-		goto out_error;
-	}
-
-	/* setup the source scatterlist for dumping correlation data */
-	ret = data_setup_corl_table(priv);
-	if (ret) {
-		dev_err(priv->dev, "unable to setup correlation DMA table\n");
-		goto out_error;
-	}
-
-	/* prevent the FPGAs from generating interrupts */
-	data_disable_interrupts(priv);
-
-	/* hookup the irq handler */
-	ret = request_irq(priv->irq, data_irq, IRQF_SHARED, drv_name, priv);
-	if (ret) {
-		dev_err(priv->dev, "unable to request IRQ handler\n");
-		goto out_error;
-	}
-
-	/* allow the DMA callback to re-enable FPGA interrupts */
-	spin_lock_irq(&priv->lock);
-	priv->enabled = true;
-	spin_unlock_irq(&priv->lock);
-
-	/* allow the FPGAs to generate interrupts */
-	data_enable_interrupts(priv);
-	return 0;
-
-out_error:
-	sg_free_table(&priv->corl_table);
-	priv->corl_nents = 0;
-
-	data_free_buffers(priv);
-	return ret;
-}
-
-/**
- * data_device_disable() - disable the device for buffered dumping
- * @priv: the driver's private data structure
- *
- * Disable the device for buffered dumping. Stops new DMA transactions from
- * being generated, waits for all outstanding DMA to complete, and then frees
- * all buffers.
- *
- * LOCKING: must hold dev->mutex
- * CONTEXT: user only
- *
- * Returns 0 on success, -ERRNO otherwise
- */
-static int data_device_disable(struct fpga_device *priv)
-{
-	spin_lock_irq(&priv->lock);
-
-	/* allow multiple disable */
-	if (!priv->enabled) {
-		spin_unlock_irq(&priv->lock);
-		return 0;
-	}
-
-	/*
-	 * Mark the device disabled
-	 *
-	 * This stops DMA callbacks from re-enabling interrupts
-	 */
-	priv->enabled = false;
-
-	/* prevent the FPGAs from generating interrupts */
-	data_disable_interrupts(priv);
-
-	/* wait until all ongoing DMA has finished */
-	while (priv->inflight != NULL) {
-		spin_unlock_irq(&priv->lock);
-		wait_event(priv->wait, priv->inflight == NULL);
-		spin_lock_irq(&priv->lock);
-	}
-
-	spin_unlock_irq(&priv->lock);
-
-	/* unhook the irq handler */
-	free_irq(priv->irq, priv);
-
-	/* free the correlation table */
-	sg_free_table(&priv->corl_table);
-	priv->corl_nents = 0;
-
-	/* free all buffers: the free and used lists are not being changed */
-	data_free_buffers(priv);
-	return 0;
-}
-
-/*
- * DEBUGFS Interface
- */
-#ifdef CONFIG_DEBUG_FS
-
-/*
- * Count the number of entries in the given list
- */
-static unsigned int list_num_entries(struct list_head *list)
-{
-	struct list_head *entry;
-	unsigned int ret = 0;
-
-	list_for_each(entry, list)
-		ret++;
-
-	return ret;
-}
-
-static int data_debug_show(struct seq_file *f, void *offset)
-{
-	struct fpga_device *priv = f->private;
-
-	spin_lock_irq(&priv->lock);
-
-	seq_printf(f, "enabled: %d\n", priv->enabled);
-	seq_printf(f, "bufsize: %d\n", priv->bufsize);
-	seq_printf(f, "num_buffers: %d\n", priv->num_buffers);
-	seq_printf(f, "num_free: %d\n", list_num_entries(&priv->free));
-	seq_printf(f, "inflight: %d\n", priv->inflight != NULL);
-	seq_printf(f, "num_used: %d\n", list_num_entries(&priv->used));
-	seq_printf(f, "num_dropped: %d\n", priv->num_dropped);
-
-	spin_unlock_irq(&priv->lock);
-	return 0;
-}
-
-static int data_debug_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, data_debug_show, inode->i_private);
-}
-
-static const struct file_operations data_debug_fops = {
-	.owner		= THIS_MODULE,
-	.open		= data_debug_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int data_debugfs_init(struct fpga_device *priv)
-{
-	priv->dbg_entry = debugfs_create_file(drv_name, S_IRUGO, NULL, priv,
-					      &data_debug_fops);
-	return PTR_ERR_OR_ZERO(priv->dbg_entry);
-}
-
-static void data_debugfs_exit(struct fpga_device *priv)
-{
-	debugfs_remove(priv->dbg_entry);
-}
-
-#else
-
-static inline int data_debugfs_init(struct fpga_device *priv)
-{
-	return 0;
-}
-
-static inline void data_debugfs_exit(struct fpga_device *priv)
-{
-}
-
-#endif	/* CONFIG_DEBUG_FS */
-
-/*
- * SYSFS Attributes
- */
-
-static ssize_t data_en_show(struct device *dev, struct device_attribute *attr,
-			    char *buf)
-{
-	struct fpga_device *priv = dev_get_drvdata(dev);
-	int ret;
-
-	spin_lock_irq(&priv->lock);
-	ret = snprintf(buf, PAGE_SIZE, "%u\n", priv->enabled);
-	spin_unlock_irq(&priv->lock);
-
-	return ret;
-}
-
-static ssize_t data_en_set(struct device *dev, struct device_attribute *attr,
-			   const char *buf, size_t count)
-{
-	struct fpga_device *priv = dev_get_drvdata(dev);
-	unsigned long enable;
-	int ret;
-
-	ret = kstrtoul(buf, 0, &enable);
-	if (ret) {
-		dev_err(priv->dev, "unable to parse enable input\n");
-		return ret;
-	}
-
-	/* protect against concurrent enable/disable */
-	ret = mutex_lock_interruptible(&priv->mutex);
-	if (ret)
-		return ret;
-
-	if (enable)
-		ret = data_device_enable(priv);
-	else
-		ret = data_device_disable(priv);
-
-	if (ret) {
-		dev_err(priv->dev, "device %s failed\n",
-			enable ? "enable" : "disable");
-		count = ret;
-		goto out_unlock;
-	}
-
-out_unlock:
-	mutex_unlock(&priv->mutex);
-	return count;
-}
-
-static DEVICE_ATTR(enable, S_IWUSR | S_IRUGO, data_en_show, data_en_set);
-
-static struct attribute *data_sysfs_attrs[] = {
-	&dev_attr_enable.attr,
-	NULL,
-};
-
-static const struct attribute_group rt_sysfs_attr_group = {
-	.attrs = data_sysfs_attrs,
-};
-
-/*
- * FPGA Realtime Data Character Device
- */
-
-static int data_open(struct inode *inode, struct file *filp)
-{
-	/*
-	 * The miscdevice layer puts our struct miscdevice into the
-	 * filp->private_data field. We use this to find our private
-	 * data and then overwrite it with our own private structure.
-	 */
-	struct fpga_device *priv = container_of(filp->private_data,
-						struct fpga_device, miscdev);
-	struct fpga_reader *reader;
-	int ret;
-
-	/* allocate private data */
-	reader = kzalloc(sizeof(*reader), GFP_KERNEL);
-	if (!reader)
-		return -ENOMEM;
-
-	reader->priv = priv;
-	reader->buf = NULL;
-
-	filp->private_data = reader;
-	ret = nonseekable_open(inode, filp);
-	if (ret) {
-		dev_err(priv->dev, "nonseekable-open failed\n");
-		kfree(reader);
-		return ret;
-	}
-
-	/*
-	 * success, increase the reference count of the private data structure
-	 * so that it doesn't disappear if the device is unbound
-	 */
-	kref_get(&priv->ref);
-	return 0;
-}
-
-static int data_release(struct inode *inode, struct file *filp)
-{
-	struct fpga_reader *reader = filp->private_data;
-	struct fpga_device *priv = reader->priv;
-
-	/* free the per-reader structure */
-	data_free_buffer(reader->buf);
-	kfree(reader);
-	filp->private_data = NULL;
-
-	/* decrement our reference count to the private data */
-	kref_put(&priv->ref, fpga_device_release);
-	return 0;
-}
-
-static ssize_t data_read(struct file *filp, char __user *ubuf, size_t count,
-			 loff_t *f_pos)
-{
-	struct fpga_reader *reader = filp->private_data;
-	struct fpga_device *priv = reader->priv;
-	struct list_head *used = &priv->used;
-	bool drop_buffer = false;
-	struct data_buf *dbuf;
-	size_t avail;
-	void *data;
-	int ret;
-
-	/* check if we already have a partial buffer */
-	if (reader->buf) {
-		dbuf = reader->buf;
-		goto have_buffer;
-	}
-
-	spin_lock_irq(&priv->lock);
-
-	/* Block until there is at least one buffer on the used list */
-	while (list_empty(used)) {
-		spin_unlock_irq(&priv->lock);
-
-		if (filp->f_flags & O_NONBLOCK)
-			return -EAGAIN;
-
-		ret = wait_event_interruptible(priv->wait, !list_empty(used));
-		if (ret)
-			return ret;
-
-		spin_lock_irq(&priv->lock);
-	}
-
-	/* Grab the first buffer off of the used list */
-	dbuf = list_first_entry(used, struct data_buf, entry);
-	list_del_init(&dbuf->entry);
-
-	spin_unlock_irq(&priv->lock);
-
-	/* Buffers are always mapped: unmap it */
-	carma_dma_unmap(priv->dev, dbuf);
-
-	/* save the buffer for later */
-	reader->buf = dbuf;
-	reader->buf_start = 0;
-
-have_buffer:
-	/* Get the number of bytes available */
-	avail = dbuf->size - reader->buf_start;
-	data = dbuf->vaddr + reader->buf_start;
-
-	/* Get the number of bytes we can transfer */
-	count = min(count, avail);
-
-	/* Copy the data to the userspace buffer */
-	if (copy_to_user(ubuf, data, count))
-		return -EFAULT;
-
-	/* Update the amount of available space */
-	avail -= count;
-
-	/*
-	 * If there is still some data available, save the buffer for the
-	 * next userspace call to read() and return
-	 */
-	if (avail > 0) {
-		reader->buf_start += count;
-		reader->buf = dbuf;
-		return count;
-	}
-
-	/*
-	 * Get the buffer ready to be reused for DMA
-	 *
-	 * If it fails, we pretend that the read never happed and return
-	 * -EFAULT to userspace. The read will be retried.
-	 */
-	ret = carma_dma_map(priv->dev, dbuf);
-	if (ret) {
-		dev_err(priv->dev, "unable to remap buffer for DMA\n");
-		return -EFAULT;
-	}
-
-	/* Lock against concurrent enable/disable */
-	spin_lock_irq(&priv->lock);
-
-	/* the reader is finished with this buffer */
-	reader->buf = NULL;
-
-	/*
-	 * One of two things has happened, the device is disabled, or the
-	 * device has been reconfigured underneath us. In either case, we
-	 * should just throw away the buffer.
-	 *
-	 * Lockdep complains if this is done under the spinlock, so we
-	 * handle it during the unlock path.
-	 */
-	if (!priv->enabled || dbuf->size != priv->bufsize) {
-		drop_buffer = true;
-		goto out_unlock;
-	}
-
-	/* The buffer is safe to reuse, so add it back to the free list */
-	list_add_tail(&dbuf->entry, &priv->free);
-
-out_unlock:
-	spin_unlock_irq(&priv->lock);
-
-	if (drop_buffer) {
-		carma_dma_unmap(priv->dev, dbuf);
-		data_free_buffer(dbuf);
-	}
-
-	return count;
-}
-
-static unsigned int data_poll(struct file *filp, struct poll_table_struct *tbl)
-{
-	struct fpga_reader *reader = filp->private_data;
-	struct fpga_device *priv = reader->priv;
-	unsigned int mask = 0;
-
-	poll_wait(filp, &priv->wait, tbl);
-
-	if (!list_empty(&priv->used))
-		mask |= POLLIN | POLLRDNORM;
-
-	return mask;
-}
-
-static int data_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-	struct fpga_reader *reader = filp->private_data;
-	struct fpga_device *priv = reader->priv;
-	unsigned long offset, vsize, psize, addr;
-
-	/* VMA properties */
-	offset = vma->vm_pgoff << PAGE_SHIFT;
-	vsize = vma->vm_end - vma->vm_start;
-	psize = priv->phys_size - offset;
-	addr = (priv->phys_addr + offset) >> PAGE_SHIFT;
-
-	/* Check against the FPGA region's physical memory size */
-	if (vsize > psize) {
-		dev_err(priv->dev, "requested mmap mapping too large\n");
-		return -EINVAL;
-	}
-
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-	return io_remap_pfn_range(vma, vma->vm_start, addr, vsize,
-				  vma->vm_page_prot);
-}
-
-static const struct file_operations data_fops = {
-	.owner		= THIS_MODULE,
-	.open		= data_open,
-	.release	= data_release,
-	.read		= data_read,
-	.poll		= data_poll,
-	.mmap		= data_mmap,
-	.llseek		= no_llseek,
-};
-
-/*
- * OpenFirmware Device Subsystem
- */
-
-static bool dma_filter(struct dma_chan *chan, void *data)
-{
-	/*
-	 * DMA Channel #0 is used for the FPGA Programmer, so ignore it
-	 *
-	 * This probably won't survive an unload/load cycle of the Freescale
-	 * DMAEngine driver, but that won't be a problem
-	 */
-	if (chan->chan_id == 0 && chan->device->dev_id == 0)
-		return false;
-
-	return true;
-}
-
-static int data_of_probe(struct platform_device *op)
-{
-	struct device_node *of_node = op->dev.of_node;
-	struct device *this_device;
-	struct fpga_device *priv;
-	struct resource res;
-	dma_cap_mask_t mask;
-	int ret;
-
-	/* Allocate private data */
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv) {
-		dev_err(&op->dev, "Unable to allocate device private data\n");
-		ret = -ENOMEM;
-		goto out_return;
-	}
-
-	platform_set_drvdata(op, priv);
-	priv->dev = &op->dev;
-	kref_init(&priv->ref);
-	mutex_init(&priv->mutex);
-
-	dev_set_drvdata(priv->dev, priv);
-	spin_lock_init(&priv->lock);
-	INIT_LIST_HEAD(&priv->free);
-	INIT_LIST_HEAD(&priv->used);
-	init_waitqueue_head(&priv->wait);
-
-	/* Setup the misc device */
-	priv->miscdev.minor = MISC_DYNAMIC_MINOR;
-	priv->miscdev.name = drv_name;
-	priv->miscdev.fops = &data_fops;
-
-	/* Get the physical address of the FPGA registers */
-	ret = of_address_to_resource(of_node, 0, &res);
-	if (ret) {
-		dev_err(&op->dev, "Unable to find FPGA physical address\n");
-		ret = -ENODEV;
-		goto out_free_priv;
-	}
-
-	priv->phys_addr = res.start;
-	priv->phys_size = resource_size(&res);
-
-	/* ioremap the registers for use */
-	priv->regs = of_iomap(of_node, 0);
-	if (!priv->regs) {
-		dev_err(&op->dev, "Unable to ioremap registers\n");
-		ret = -ENOMEM;
-		goto out_free_priv;
-	}
-
-	dma_cap_zero(mask);
-	dma_cap_set(DMA_MEMCPY, mask);
-	dma_cap_set(DMA_INTERRUPT, mask);
-	dma_cap_set(DMA_SLAVE, mask);
-	dma_cap_set(DMA_SG, mask);
-
-	/* Request a DMA channel */
-	priv->chan = dma_request_channel(mask, dma_filter, NULL);
-	if (!priv->chan) {
-		dev_err(&op->dev, "Unable to request DMA channel\n");
-		ret = -ENODEV;
-		goto out_unmap_regs;
-	}
-
-	/* Find the correct IRQ number */
-	priv->irq = irq_of_parse_and_map(of_node, 0);
-	if (priv->irq == NO_IRQ) {
-		dev_err(&op->dev, "Unable to find IRQ line\n");
-		ret = -ENODEV;
-		goto out_release_dma;
-	}
-
-	/* Drive the GPIO for FPGA IRQ high (no interrupt) */
-	iowrite32be(IRQ_CORL_DONE, priv->regs + SYS_IRQ_OUTPUT_DATA);
-
-	/* Register the miscdevice */
-	ret = misc_register(&priv->miscdev);
-	if (ret) {
-		dev_err(&op->dev, "Unable to register miscdevice\n");
-		goto out_irq_dispose_mapping;
-	}
-
-	/* Create the debugfs files */
-	ret = data_debugfs_init(priv);
-	if (ret) {
-		dev_err(&op->dev, "Unable to create debugfs files\n");
-		goto out_misc_deregister;
-	}
-
-	/* Create the sysfs files */
-	this_device = priv->miscdev.this_device;
-	dev_set_drvdata(this_device, priv);
-	ret = sysfs_create_group(&this_device->kobj, &rt_sysfs_attr_group);
-	if (ret) {
-		dev_err(&op->dev, "Unable to create sysfs files\n");
-		goto out_data_debugfs_exit;
-	}
-
-	dev_info(&op->dev, "CARMA FPGA Realtime Data Driver Loaded\n");
-	return 0;
-
-out_data_debugfs_exit:
-	data_debugfs_exit(priv);
-out_misc_deregister:
-	misc_deregister(&priv->miscdev);
-out_irq_dispose_mapping:
-	irq_dispose_mapping(priv->irq);
-out_release_dma:
-	dma_release_channel(priv->chan);
-out_unmap_regs:
-	iounmap(priv->regs);
-out_free_priv:
-	kref_put(&priv->ref, fpga_device_release);
-out_return:
-	return ret;
-}
-
-static int data_of_remove(struct platform_device *op)
-{
-	struct fpga_device *priv = platform_get_drvdata(op);
-	struct device *this_device = priv->miscdev.this_device;
-
-	/* remove all sysfs files, now the device cannot be re-enabled */
-	sysfs_remove_group(&this_device->kobj, &rt_sysfs_attr_group);
-
-	/* remove all debugfs files */
-	data_debugfs_exit(priv);
-
-	/* disable the device from generating data */
-	data_device_disable(priv);
-
-	/* remove the character device to stop new readers from appearing */
-	misc_deregister(&priv->miscdev);
-
-	/* cleanup everything not needed by readers */
-	irq_dispose_mapping(priv->irq);
-	dma_release_channel(priv->chan);
-	iounmap(priv->regs);
-
-	/* release our reference */
-	kref_put(&priv->ref, fpga_device_release);
-	return 0;
-}
-
-static const struct of_device_id data_of_match[] = {
-	{ .compatible = "carma,carma-fpga", },
-	{},
-};
-
-static struct platform_driver data_of_driver = {
-	.probe		= data_of_probe,
-	.remove		= data_of_remove,
-	.driver		= {
-		.name		= drv_name,
-		.of_match_table	= data_of_match,
-	},
-};
-
-module_platform_driver(data_of_driver);
-
-MODULE_AUTHOR("Ira W. Snyder <iws@ovro.caltech.edu>");
-MODULE_DESCRIPTION("CARMA DATA-FPGA Access Driver");
-MODULE_LICENSE("GPL");
diff --git a/kernel/drivers/misc/cxl/Kconfig b/kernel/drivers/misc/cxl/Kconfig
index a990b39b4..8756d06e2 100644
--- a/kernel/drivers/misc/cxl/Kconfig
+++ b/kernel/drivers/misc/cxl/Kconfig
@@ -7,10 +7,20 @@ config CXL_BASE
 	default n
 	select PPC_COPRO_BASE
 
+config CXL_KERNEL_API
+	bool
+	default n
+
+config CXL_EEH
+	bool
+	default n
+
 config CXL
 	tristate "Support for IBM Coherent Accelerators (CXL)"
-	depends on PPC_POWERNV && PCI_MSI
+	depends on PPC_POWERNV && PCI_MSI && EEH
 	select CXL_BASE
+	select CXL_KERNEL_API
+	select CXL_EEH
 	default m
 	help
 	  Select this option to enable driver support for IBM Coherent
diff --git a/kernel/drivers/misc/cxl/Makefile b/kernel/drivers/misc/cxl/Makefile
index edb494d3f..6982f603f 100644
--- a/kernel/drivers/misc/cxl/Makefile
+++ b/kernel/drivers/misc/cxl/Makefile
@@ -1,4 +1,8 @@
-cxl-y				+= main.o file.o irq.o fault.o native.o context.o sysfs.o debugfs.o pci.o trace.o
+ccflags-y := -Werror -Wno-unused-const-variable
+
+cxl-y				+= main.o file.o irq.o fault.o native.o
+cxl-y				+= context.o sysfs.o debugfs.o pci.o trace.o
+cxl-y				+= vphb.o api.o
 obj-$(CONFIG_CXL)		+= cxl.o
 obj-$(CONFIG_CXL_BASE)		+= base.o
 
diff --git a/kernel/drivers/misc/cxl/api.c b/kernel/drivers/misc/cxl/api.c
new file mode 100644
index 000000000..103baf0e0
--- /dev/null
+++ b/kernel/drivers/misc/cxl/api.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <misc/cxl.h>
+#include <linux/fs.h>
+
+#include "cxl.h"
+
+struct cxl_context *cxl_dev_context_init(struct pci_dev *dev)
+{
+	struct address_space *mapping;
+	struct cxl_afu *afu;
+	struct cxl_context  *ctx;
+	int rc;
+
+	afu = cxl_pci_to_afu(dev);
+
+	get_device(&afu->dev);
+	ctx = cxl_context_alloc();
+	if (IS_ERR(ctx)) {
+		rc = PTR_ERR(ctx);
+		goto err_dev;
+	}
+
+	ctx->kernelapi = true;
+
+	/*
+	 * Make our own address space since we won't have one from the
+	 * filesystem like the user api has, and even if we do associate a file
+	 * with this context we don't want to use the global anonymous inode's
+	 * address space as that can invalidate unrelated users:
+	 */
+	mapping = kmalloc(sizeof(struct address_space), GFP_KERNEL);
+	if (!mapping) {
+		rc = -ENOMEM;
+		goto err_ctx;
+	}
+	address_space_init_once(mapping);
+
+	/* Make it a slave context.  We can promote it later? */
+	rc = cxl_context_init(ctx, afu, false, mapping);
+	if (rc)
+		goto err_mapping;
+
+	cxl_assign_psn_space(ctx);
+
+	return ctx;
+
+err_mapping:
+	kfree(mapping);
+err_ctx:
+	kfree(ctx);
+err_dev:
+	put_device(&afu->dev);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(cxl_dev_context_init);
+
+struct cxl_context *cxl_get_context(struct pci_dev *dev)
+{
+	return dev->dev.archdata.cxl_ctx;
+}
+EXPORT_SYMBOL_GPL(cxl_get_context);
+
+struct device *cxl_get_phys_dev(struct pci_dev *dev)
+{
+	struct cxl_afu *afu;
+
+	afu = cxl_pci_to_afu(dev);
+
+	return afu->adapter->dev.parent;
+}
+EXPORT_SYMBOL_GPL(cxl_get_phys_dev);
+
+int cxl_release_context(struct cxl_context *ctx)
+{
+	if (ctx->status >= STARTED)
+		return -EBUSY;
+
+	put_device(&ctx->afu->dev);
+
+	cxl_context_free(ctx);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_release_context);
+
+int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
+{
+	if (num == 0)
+		num = ctx->afu->pp_irqs;
+	return afu_allocate_irqs(ctx, num);
+}
+EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs);
+
+void cxl_free_afu_irqs(struct cxl_context *ctx)
+{
+	afu_irq_name_free(ctx);
+	cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+}
+EXPORT_SYMBOL_GPL(cxl_free_afu_irqs);
+
+static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num)
+{
+	__u16 range;
+	int r;
+
+	WARN_ON(num == 0);
+
+	for (r = 0; r < CXL_IRQ_RANGES; r++) {
+		range = ctx->irqs.range[r];
+		if (num < range) {
+			return ctx->irqs.offset[r] + num;
+		}
+		num -= range;
+	}
+	return 0;
+}
+
+int cxl_map_afu_irq(struct cxl_context *ctx, int num,
+		    irq_handler_t handler, void *cookie, char *name)
+{
+	irq_hw_number_t hwirq;
+
+	/*
+	 * Find interrupt we are to register.
+	 */
+	hwirq = cxl_find_afu_irq(ctx, num);
+	if (!hwirq)
+		return -ENOENT;
+
+	return cxl_map_irq(ctx->afu->adapter, hwirq, handler, cookie, name);
+}
+EXPORT_SYMBOL_GPL(cxl_map_afu_irq);
+
+void cxl_unmap_afu_irq(struct cxl_context *ctx, int num, void *cookie)
+{
+	irq_hw_number_t hwirq;
+	unsigned int virq;
+
+	hwirq = cxl_find_afu_irq(ctx, num);
+	if (!hwirq)
+		return;
+
+	virq = irq_find_mapping(NULL, hwirq);
+	if (virq)
+		cxl_unmap_irq(virq, cookie);
+}
+EXPORT_SYMBOL_GPL(cxl_unmap_afu_irq);
+
+/*
+ * Start a context
+ * Code here similar to afu_ioctl_start_work().
+ */
+int cxl_start_context(struct cxl_context *ctx, u64 wed,
+		      struct task_struct *task)
+{
+	int rc = 0;
+	bool kernel = true;
+
+	pr_devel("%s: pe: %i\n", __func__, ctx->pe);
+
+	mutex_lock(&ctx->status_mutex);
+	if (ctx->status == STARTED)
+		goto out; /* already started */
+
+	if (task) {
+		ctx->pid = get_task_pid(task, PIDTYPE_PID);
+		get_pid(ctx->pid);
+		kernel = false;
+	}
+
+	cxl_ctx_get();
+
+	if ((rc = cxl_attach_process(ctx, kernel, wed , 0))) {
+		put_pid(ctx->pid);
+		cxl_ctx_put();
+		goto out;
+	}
+
+	ctx->status = STARTED;
+out:
+	mutex_unlock(&ctx->status_mutex);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(cxl_start_context);
+
+int cxl_process_element(struct cxl_context *ctx)
+{
+	return ctx->pe;
+}
+EXPORT_SYMBOL_GPL(cxl_process_element);
+
+/* Stop a context.  Returns 0 on success, otherwise -Errno */
+int cxl_stop_context(struct cxl_context *ctx)
+{
+	return __detach_context(ctx);
+}
+EXPORT_SYMBOL_GPL(cxl_stop_context);
+
+void cxl_set_master(struct cxl_context *ctx)
+{
+	ctx->master = true;
+	cxl_assign_psn_space(ctx);
+}
+EXPORT_SYMBOL_GPL(cxl_set_master);
+
+/* wrappers around afu_* file ops which are EXPORTED */
+int cxl_fd_open(struct inode *inode, struct file *file)
+{
+	return afu_open(inode, file);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_open);
+int cxl_fd_release(struct inode *inode, struct file *file)
+{
+	return afu_release(inode, file);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_release);
+long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	return afu_ioctl(file, cmd, arg);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_ioctl);
+int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm)
+{
+	return afu_mmap(file, vm);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_mmap);
+unsigned int cxl_fd_poll(struct file *file, struct poll_table_struct *poll)
+{
+	return afu_poll(file, poll);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_poll);
+ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count,
+			loff_t *off)
+{
+	return afu_read(file, buf, count, off);
+}
+EXPORT_SYMBOL_GPL(cxl_fd_read);
+
+#define PATCH_FOPS(NAME) if (!fops->NAME) fops->NAME = afu_fops.NAME
+
+/* Get a struct file and fd for a context and attach the ops */
+struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops,
+			int *fd)
+{
+	struct file *file;
+	int rc, flags, fdtmp;
+
+	flags = O_RDWR | O_CLOEXEC;
+
+	/* This code is similar to anon_inode_getfd() */
+	rc = get_unused_fd_flags(flags);
+	if (rc < 0)
+		return ERR_PTR(rc);
+	fdtmp = rc;
+
+	/*
+	 * Patch the file ops.  Needs to be careful that this is rentrant safe.
+	 */
+	if (fops) {
+		PATCH_FOPS(open);
+		PATCH_FOPS(poll);
+		PATCH_FOPS(read);
+		PATCH_FOPS(release);
+		PATCH_FOPS(unlocked_ioctl);
+		PATCH_FOPS(compat_ioctl);
+		PATCH_FOPS(mmap);
+	} else /* use default ops */
+		fops = (struct file_operations *)&afu_fops;
+
+	file = anon_inode_getfile("cxl", fops, ctx, flags);
+	if (IS_ERR(file))
+		goto err_fd;
+
+	file->f_mapping = ctx->mapping;
+
+	*fd = fdtmp;
+	return file;
+
+err_fd:
+	put_unused_fd(fdtmp);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(cxl_get_fd);
+
+struct cxl_context *cxl_fops_get_context(struct file *file)
+{
+	return file->private_data;
+}
+EXPORT_SYMBOL_GPL(cxl_fops_get_context);
+
+int cxl_start_work(struct cxl_context *ctx,
+		   struct cxl_ioctl_start_work *work)
+{
+	int rc;
+
+	/* code taken from afu_ioctl_start_work */
+	if (!(work->flags & CXL_START_WORK_NUM_IRQS))
+		work->num_interrupts = ctx->afu->pp_irqs;
+	else if ((work->num_interrupts < ctx->afu->pp_irqs) ||
+		 (work->num_interrupts > ctx->afu->irqs_max)) {
+		return -EINVAL;
+	}
+
+	rc = afu_register_irqs(ctx, work->num_interrupts);
+	if (rc)
+		return rc;
+
+	rc = cxl_start_context(ctx, work->work_element_descriptor, current);
+	if (rc < 0) {
+		afu_release_irqs(ctx, ctx);
+		return rc;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_start_work);
+
+void __iomem *cxl_psa_map(struct cxl_context *ctx)
+{
+	struct cxl_afu *afu = ctx->afu;
+	int rc;
+
+	rc = cxl_afu_check_and_enable(afu);
+	if (rc)
+		return NULL;
+
+	pr_devel("%s: psn_phys%llx size:%llx\n",
+		 __func__, afu->psn_phys, afu->adapter->ps_size);
+	return ioremap(ctx->psn_phys, ctx->psn_size);
+}
+EXPORT_SYMBOL_GPL(cxl_psa_map);
+
+void cxl_psa_unmap(void __iomem *addr)
+{
+	iounmap(addr);
+}
+EXPORT_SYMBOL_GPL(cxl_psa_unmap);
+
+int cxl_afu_reset(struct cxl_context *ctx)
+{
+	struct cxl_afu *afu = ctx->afu;
+	int rc;
+
+	rc = __cxl_afu_reset(afu);
+	if (rc)
+		return rc;
+
+	return cxl_afu_check_and_enable(afu);
+}
+EXPORT_SYMBOL_GPL(cxl_afu_reset);
+
+void cxl_perst_reloads_same_image(struct cxl_afu *afu,
+				  bool perst_reloads_same_image)
+{
+	afu->adapter->perst_same_image = perst_reloads_same_image;
+}
+EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image);
diff --git a/kernel/drivers/misc/cxl/base.c b/kernel/drivers/misc/cxl/base.c
index 0654ad836..a9f0dd325 100644
--- a/kernel/drivers/misc/cxl/base.c
+++ b/kernel/drivers/misc/cxl/base.c
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <linux/rcupdate.h>
 #include <asm/errno.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
 #include "cxl.h"
 
 /* protected by rcu */
diff --git a/kernel/drivers/misc/cxl/context.c b/kernel/drivers/misc/cxl/context.c
index e4dc8cdf6..2faa1270d 100644
--- a/kernel/drivers/misc/cxl/context.c
+++ b/kernel/drivers/misc/cxl/context.c
@@ -126,6 +126,18 @@ static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	if (ctx->status != STARTED) {
 		mutex_unlock(&ctx->status_mutex);
 		pr_devel("%s: Context not started, failing problem state access\n", __func__);
+		if (ctx->mmio_err_ff) {
+			if (!ctx->ff_page) {
+				ctx->ff_page = alloc_page(GFP_USER);
+				if (!ctx->ff_page)
+					return VM_FAULT_OOM;
+				memset(page_address(ctx->ff_page), 0xff, PAGE_SIZE);
+			}
+			get_page(ctx->ff_page);
+			vmf->page = ctx->ff_page;
+			vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+			return 0;
+		}
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -145,8 +157,16 @@ static const struct vm_operations_struct cxl_mmap_vmops = {
  */
 int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma)
 {
+	u64 start = vma->vm_pgoff << PAGE_SHIFT;
 	u64 len = vma->vm_end - vma->vm_start;
-	len = min(len, ctx->psn_size);
+
+	if (ctx->afu->current_mode == CXL_MODE_DEDICATED) {
+		if (start + len > ctx->afu->adapter->ps_size)
+			return -EINVAL;
+	} else {
+		if (start + len > ctx->psn_size)
+			return -EINVAL;
+	}
 
 	if (ctx->afu->current_mode != CXL_MODE_DEDICATED) {
 		/* make sure there is a valid per process space for this AFU */
@@ -174,7 +194,7 @@ int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma)
  * return until all outstanding interrupts for this context have completed. The
  * hardware should no longer access *ctx after this has returned.
  */
-static void __detach_context(struct cxl_context *ctx)
+int __detach_context(struct cxl_context *ctx)
 {
 	enum cxl_context_status status;
 
@@ -183,12 +203,17 @@ static void __detach_context(struct cxl_context *ctx)
 	ctx->status = CLOSED;
 	mutex_unlock(&ctx->status_mutex);
 	if (status != STARTED)
-		return;
+		return -EBUSY;
 
-	WARN_ON(cxl_detach_process(ctx));
-	afu_release_irqs(ctx);
+	/* Only warn if we detached while the link was OK.
+	 * If detach fails when hw is down, we don't care.
+	 */
+	WARN_ON(cxl_detach_process(ctx) &&
+		cxl_adapter_link_ok(ctx->afu->adapter));
 	flush_work(&ctx->fault_work); /* Only needed for dedicated process */
-	wake_up_all(&ctx->wq);
+	put_pid(ctx->pid);
+	cxl_ctx_put();
+	return 0;
 }
 
 /*
@@ -199,7 +224,14 @@ static void __detach_context(struct cxl_context *ctx)
  */
 void cxl_context_detach(struct cxl_context *ctx)
 {
-	__detach_context(ctx);
+	int rc;
+
+	rc = __detach_context(ctx);
+	if (rc)
+		return;
+
+	afu_release_irqs(ctx, ctx);
+	wake_up_all(&ctx->wq);
 }
 
 /*
@@ -216,7 +248,7 @@ void cxl_context_detach_all(struct cxl_afu *afu)
 		 * Anything done in here needs to be setup before the IDR is
 		 * created and torn down after the IDR removed
 		 */
-		__detach_context(ctx);
+		cxl_context_detach(ctx);
 
 		/*
 		 * We are force detaching - remove any active PSA mappings so
@@ -232,16 +264,27 @@ void cxl_context_detach_all(struct cxl_afu *afu)
 	mutex_unlock(&afu->contexts_lock);
 }
 
-void cxl_context_free(struct cxl_context *ctx)
+static void reclaim_ctx(struct rcu_head *rcu)
 {
-	mutex_lock(&ctx->afu->contexts_lock);
-	idr_remove(&ctx->afu->contexts_idr, ctx->pe);
-	mutex_unlock(&ctx->afu->contexts_lock);
-	synchronize_rcu();
+	struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu);
 
 	free_page((u64)ctx->sstp);
+	if (ctx->ff_page)
+		__free_page(ctx->ff_page);
 	ctx->sstp = NULL;
+	if (ctx->kernelapi)
+		kfree(ctx->mapping);
+
+	if (ctx->irq_bitmap)
+		kfree(ctx->irq_bitmap);
 
-	put_pid(ctx->pid);
 	kfree(ctx);
 }
+
+void cxl_context_free(struct cxl_context *ctx)
+{
+	mutex_lock(&ctx->afu->contexts_lock);
+	idr_remove(&ctx->afu->contexts_idr, ctx->pe);
+	mutex_unlock(&ctx->afu->contexts_lock);
+	call_rcu(&ctx->rcu, reclaim_ctx);
+}
diff --git a/kernel/drivers/misc/cxl/cxl.h b/kernel/drivers/misc/cxl/cxl.h
index a1cee4767..0cfb9c129 100644
--- a/kernel/drivers/misc/cxl/cxl.h
+++ b/kernel/drivers/misc/cxl/cxl.h
@@ -18,10 +18,11 @@
 #include <linux/pid.h>
 #include <linux/io.h>
 #include <linux/pci.h>
+#include <linux/fs.h>
 #include <asm/cputable.h>
 #include <asm/mmu.h>
 #include <asm/reg.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
 
 #include <uapi/misc/cxl.h>
 
@@ -33,7 +34,7 @@ extern uint cxl_verbose;
  * Bump version each time a user API change is made, whether it is
  * backwards compatible ot not.
  */
-#define CXL_API_VERSION 1
+#define CXL_API_VERSION 2
 #define CXL_API_VERSION_COMPATIBLE 1
 
 /*
@@ -82,8 +83,10 @@ static const cxl_p1_reg_t CXL_PSL_AFUSEL  = {0x00B0};
 /* 0x00C0:7EFF Implementation dependent area */
 static const cxl_p1_reg_t CXL_PSL_FIR1      = {0x0100};
 static const cxl_p1_reg_t CXL_PSL_FIR2      = {0x0108};
+static const cxl_p1_reg_t CXL_PSL_Timebase  = {0x0110};
 static const cxl_p1_reg_t CXL_PSL_VERSION   = {0x0118};
 static const cxl_p1_reg_t CXL_PSL_RESLCKTO  = {0x0128};
+static const cxl_p1_reg_t CXL_PSL_TB_CTLSTAT = {0x0140};
 static const cxl_p1_reg_t CXL_PSL_FIR_CNTL  = {0x0148};
 static const cxl_p1_reg_t CXL_PSL_DSNDCTL   = {0x0150};
 static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158};
@@ -151,6 +154,9 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An     = {0x0A0};
 #define CXL_PSL_SPAP_Size_Shift 4
 #define CXL_PSL_SPAP_V    0x0000000000000001ULL
 
+/****** CXL_PSL_Control ****************************************************/
+#define CXL_PSL_Control_tb 0x0000000000000001ULL
+
 /****** CXL_PSL_DLCNTL *****************************************************/
 #define CXL_PSL_DLCNTL_D (0x1ull << (63-28))
 #define CXL_PSL_DLCNTL_C (0x1ull << (63-29))
@@ -315,8 +321,6 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An     = {0x0A0};
 #define CXL_MAX_SLICES 4
 #define MAX_AFU_MMIO_REGS 3
 
-#define CXL_MODE_DEDICATED   0x1
-#define CXL_MODE_DIRECTED    0x2
 #define CXL_MODE_TIME_SLICED 0x4
 #define CXL_SUPPORTED_MODES (CXL_MODE_DEDICATED | CXL_MODE_DIRECTED)
 
@@ -362,6 +366,10 @@ struct cxl_afu {
 	struct mutex spa_mutex;
 	spinlock_t afu_cntl_lock;
 
+	/* AFU error buffer fields and bin attribute for sysfs */
+	u64 eb_len, eb_offset;
+	struct bin_attribute attr_eb;
+
 	/*
 	 * Only the first part of the SPA is used for the process element
 	 * linked list. The only other part that software needs to worry about
@@ -375,6 +383,9 @@ struct cxl_afu {
 	int spa_max_procs;
 	unsigned int psl_virq;
 
+	/* pointer to the vphb */
+	struct pci_controller *phb;
+
 	int pp_irqs;
 	int irqs_max;
 	int num_procs;
@@ -412,6 +423,9 @@ struct cxl_context {
 	/* Used to unmap any mmaps when force detaching */
 	struct address_space *mapping;
 	struct mutex mapping_lock;
+	struct page *ff_page;
+	bool mmio_err_ff;
+	bool kernelapi;
 
 	spinlock_t sste_lock; /* Protects segment table entries */
 	struct cxl_sste *sstp;
@@ -455,6 +469,8 @@ struct cxl_context {
 	bool pending_irq;
 	bool pending_fault;
 	bool pending_afu_err;
+
+	struct rcu_head rcu;
 };
 
 struct cxl {
@@ -485,6 +501,7 @@ struct cxl {
 	bool user_image_loaded;
 	bool perst_loads_image;
 	bool perst_select_user;
+	bool perst_same_image;
 };
 
 int cxl_alloc_one_irq(struct cxl *adapter);
@@ -523,16 +540,33 @@ struct cxl_process_element {
 	__be32 software_state;
 } __packed;
 
+static inline bool cxl_adapter_link_ok(struct cxl *cxl)
+{
+	struct pci_dev *pdev;
+
+	pdev = to_pci_dev(cxl->dev.parent);
+	return !pci_channel_offline(pdev);
+}
+
 static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg)
 {
 	WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE));
 	return cxl->p1_mmio + cxl_reg_off(reg);
 }
 
-#define cxl_p1_write(cxl, reg, val) \
-	out_be64(_cxl_p1_addr(cxl, reg), val)
-#define cxl_p1_read(cxl, reg) \
-	in_be64(_cxl_p1_addr(cxl, reg))
+static inline void cxl_p1_write(struct cxl *cxl, cxl_p1_reg_t reg, u64 val)
+{
+	if (likely(cxl_adapter_link_ok(cxl)))
+		out_be64(_cxl_p1_addr(cxl, reg), val);
+}
+
+static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg)
+{
+	if (likely(cxl_adapter_link_ok(cxl)))
+		return in_be64(_cxl_p1_addr(cxl, reg));
+	else
+		return ~0ULL;
+}
 
 static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg)
 {
@@ -540,29 +574,62 @@ static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg
 	return afu->p1n_mmio + cxl_reg_off(reg);
 }
 
-#define cxl_p1n_write(afu, reg, val) \
-	out_be64(_cxl_p1n_addr(afu, reg), val)
-#define cxl_p1n_read(afu, reg) \
-	in_be64(_cxl_p1n_addr(afu, reg))
+static inline void cxl_p1n_write(struct cxl_afu *afu, cxl_p1n_reg_t reg, u64 val)
+{
+	if (likely(cxl_adapter_link_ok(afu->adapter)))
+		out_be64(_cxl_p1n_addr(afu, reg), val);
+}
+
+static inline u64 cxl_p1n_read(struct cxl_afu *afu, cxl_p1n_reg_t reg)
+{
+	if (likely(cxl_adapter_link_ok(afu->adapter)))
+		return in_be64(_cxl_p1n_addr(afu, reg));
+	else
+		return ~0ULL;
+}
 
 static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg)
 {
 	return afu->p2n_mmio + cxl_reg_off(reg);
 }
 
-#define cxl_p2n_write(afu, reg, val) \
-	out_be64(_cxl_p2n_addr(afu, reg), val)
-#define cxl_p2n_read(afu, reg) \
-	in_be64(_cxl_p2n_addr(afu, reg))
+static inline void cxl_p2n_write(struct cxl_afu *afu, cxl_p2n_reg_t reg, u64 val)
+{
+	if (likely(cxl_adapter_link_ok(afu->adapter)))
+		out_be64(_cxl_p2n_addr(afu, reg), val);
+}
+
+static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg)
+{
+	if (likely(cxl_adapter_link_ok(afu->adapter)))
+		return in_be64(_cxl_p2n_addr(afu, reg));
+	else
+		return ~0ULL;
+}
 
+static inline u64 cxl_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off)
+{
+	if (likely(cxl_adapter_link_ok(afu->adapter)))
+		return in_le64((afu)->afu_desc_mmio + (afu)->crs_offset +
+			       ((cr) * (afu)->crs_len) + (off));
+	else
+		return ~0ULL;
+}
 
-#define cxl_afu_cr_read64(afu, cr, off) \
-	in_le64((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off))
-#define cxl_afu_cr_read32(afu, cr, off) \
-	in_le32((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off))
+static inline u32 cxl_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off)
+{
+	if (likely(cxl_adapter_link_ok(afu->adapter)))
+		return in_le32((afu)->afu_desc_mmio + (afu)->crs_offset +
+			       ((cr) * (afu)->crs_len) + (off));
+	else
+		return 0xffffffff;
+}
 u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off);
 u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off);
 
+ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
+				loff_t off, size_t count);
+
 
 struct cxl_calls {
 	void (*cxl_slbia)(struct mm_struct *mm);
@@ -574,6 +641,9 @@ void unregister_cxl_calls(struct cxl_calls *calls);
 int cxl_alloc_adapter_nr(struct cxl *adapter);
 void cxl_remove_adapter_nr(struct cxl *adapter);
 
+int cxl_alloc_spa(struct cxl_afu *afu);
+void cxl_release_spa(struct cxl_afu *afu);
+
 int cxl_file_init(void);
 void cxl_file_exit(void);
 int cxl_register_adapter(struct cxl *adapter);
@@ -606,7 +676,8 @@ void cxl_release_psl_err_irq(struct cxl *adapter);
 int cxl_register_serr_irq(struct cxl_afu *afu);
 void cxl_release_serr_irq(struct cxl_afu *afu);
 int afu_register_irqs(struct cxl_context *ctx, u32 count);
-void afu_release_irqs(struct cxl_context *ctx);
+void afu_release_irqs(struct cxl_context *ctx, void *cookie);
+void afu_irq_name_free(struct cxl_context *ctx);
 irqreturn_t cxl_slice_irq_err(int irq, void *data);
 
 int cxl_debugfs_init(void);
@@ -629,6 +700,10 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
 		     struct address_space *mapping);
 void cxl_context_free(struct cxl_context *ctx);
 int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma);
+unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq,
+			 irq_handler_t handler, void *cookie, const char *name);
+void cxl_unmap_irq(unsigned int virq, void *cookie);
+int __detach_context(struct cxl_context *ctx);
 
 /* This matches the layout of the H_COLLECT_CA_INT_INFO retbuf */
 struct cxl_irq_info {
@@ -642,6 +717,7 @@ struct cxl_irq_info {
 	u64 padding[3]; /* to match the expected retbuf size for plpar_hcall9 */
 };
 
+void cxl_assign_psn_space(struct cxl_context *ctx);
 int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed,
 			    u64 amr);
 int cxl_detach_process(struct cxl_context *ctx);
@@ -653,11 +729,24 @@ int cxl_check_error(struct cxl_afu *afu);
 int cxl_afu_slbia(struct cxl_afu *afu);
 int cxl_tlb_slb_invalidate(struct cxl *adapter);
 int cxl_afu_disable(struct cxl_afu *afu);
-int cxl_afu_reset(struct cxl_afu *afu);
+int __cxl_afu_reset(struct cxl_afu *afu);
+int cxl_afu_check_and_enable(struct cxl_afu *afu);
 int cxl_psl_purge(struct cxl_afu *afu);
 
 void cxl_stop_trace(struct cxl *cxl);
+int cxl_pci_vphb_add(struct cxl_afu *afu);
+void cxl_pci_vphb_reconfigure(struct cxl_afu *afu);
+void cxl_pci_vphb_remove(struct cxl_afu *afu);
 
 extern struct pci_driver cxl_pci_driver;
+int afu_allocate_irqs(struct cxl_context *ctx, u32 count);
+
+int afu_open(struct inode *inode, struct file *file);
+int afu_release(struct inode *inode, struct file *file);
+long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int afu_mmap(struct file *file, struct vm_area_struct *vm);
+unsigned int afu_poll(struct file *file, struct poll_table_struct *poll);
+ssize_t afu_read(struct file *file, char __user *buf, size_t count, loff_t *off);
+extern const struct file_operations afu_fops;
 
 #endif
diff --git a/kernel/drivers/misc/cxl/debugfs.c b/kernel/drivers/misc/cxl/debugfs.c
index 825c41258..18df6f44a 100644
--- a/kernel/drivers/misc/cxl/debugfs.c
+++ b/kernel/drivers/misc/cxl/debugfs.c
@@ -48,7 +48,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_io_x64, debugfs_io_u64_get, debugfs_io_u64_set, "0x
 static struct dentry *debugfs_create_io_x64(const char *name, umode_t mode,
 					    struct dentry *parent, u64 __iomem *value)
 {
-	return debugfs_create_file(name, mode, parent, (void *)value, &fops_io_x64);
+	return debugfs_create_file(name, mode, parent, (void __force *)value, &fops_io_x64);
 }
 
 int cxl_debugfs_adapter_add(struct cxl *adapter)
diff --git a/kernel/drivers/misc/cxl/fault.c b/kernel/drivers/misc/cxl/fault.c
index 5286b8b70..25a5418c5 100644
--- a/kernel/drivers/misc/cxl/fault.c
+++ b/kernel/drivers/misc/cxl/fault.c
@@ -172,8 +172,8 @@ void cxl_handle_fault(struct work_struct *fault_work)
 		container_of(fault_work, struct cxl_context, fault_work);
 	u64 dsisr = ctx->dsisr;
 	u64 dar = ctx->dar;
-	struct task_struct *task;
-	struct mm_struct *mm;
+	struct task_struct *task = NULL;
+	struct mm_struct *mm = NULL;
 
 	if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr ||
 	    cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar ||
@@ -194,17 +194,19 @@ void cxl_handle_fault(struct work_struct *fault_work)
 	pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. "
 		"DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar);
 
-	if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) {
-		pr_devel("cxl_handle_fault unable to get task %i\n",
-			 pid_nr(ctx->pid));
-		cxl_ack_ae(ctx);
-		return;
-	}
-	if (!(mm = get_task_mm(task))) {
-		pr_devel("cxl_handle_fault unable to get mm %i\n",
-			 pid_nr(ctx->pid));
-		cxl_ack_ae(ctx);
-		goto out;
+	if (!ctx->kernel) {
+		if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) {
+			pr_devel("cxl_handle_fault unable to get task %i\n",
+				 pid_nr(ctx->pid));
+			cxl_ack_ae(ctx);
+			return;
+		}
+		if (!(mm = get_task_mm(task))) {
+			pr_devel("cxl_handle_fault unable to get mm %i\n",
+				 pid_nr(ctx->pid));
+			cxl_ack_ae(ctx);
+			goto out;
+		}
 	}
 
 	if (dsisr & CXL_PSL_DSISR_An_DS)
@@ -214,9 +216,11 @@ void cxl_handle_fault(struct work_struct *fault_work)
 	else
 		WARN(1, "cxl_handle_fault has nothing to handle\n");
 
-	mmput(mm);
+	if (mm)
+		mmput(mm);
 out:
-	put_task_struct(task);
+	if (task)
+		put_task_struct(task);
 }
 
 static void cxl_prefault_one(struct cxl_context *ctx, u64 ea)
diff --git a/kernel/drivers/misc/cxl/file.c b/kernel/drivers/misc/cxl/file.c
index 2364bcadb..7ccd2998b 100644
--- a/kernel/drivers/misc/cxl/file.c
+++ b/kernel/drivers/misc/cxl/file.c
@@ -73,6 +73,11 @@ static int __afu_open(struct inode *inode, struct file *file, bool master)
 	if (!afu->current_mode)
 		goto err_put_afu;
 
+	if (!cxl_adapter_link_ok(adapter)) {
+		rc = -EIO;
+		goto err_put_afu;
+	}
+
 	if (!(ctx = cxl_context_alloc())) {
 		rc = -ENOMEM;
 		goto err_put_afu;
@@ -96,7 +101,8 @@ err_put_adapter:
 	put_device(&adapter->dev);
 	return rc;
 }
-static int afu_open(struct inode *inode, struct file *file)
+
+int afu_open(struct inode *inode, struct file *file)
 {
 	return __afu_open(inode, file, false);
 }
@@ -106,7 +112,7 @@ static int afu_master_open(struct inode *inode, struct file *file)
 	return __afu_open(inode, file, true);
 }
 
-static int afu_release(struct inode *inode, struct file *file)
+int afu_release(struct inode *inode, struct file *file)
 {
 	struct cxl_context *ctx = file->private_data;
 
@@ -114,9 +120,16 @@ static int afu_release(struct inode *inode, struct file *file)
 		 __func__, ctx->pe);
 	cxl_context_detach(ctx);
 
-	mutex_lock(&ctx->mapping_lock);
-	ctx->mapping = NULL;
-	mutex_unlock(&ctx->mapping_lock);
+
+	/*
+	 * Delete the context's mapping pointer, unless it's created by the
+	 * kernel API, in which case leave it so it can be freed by reclaim_ctx()
+	 */
+	if (!ctx->kernelapi) {
+		mutex_lock(&ctx->mapping_lock);
+		ctx->mapping = NULL;
+		mutex_unlock(&ctx->mapping_lock);
+	}
 
 	put_device(&ctx->afu->dev);
 
@@ -128,7 +141,6 @@ static int afu_release(struct inode *inode, struct file *file)
 	 */
 	cxl_context_free(ctx);
 
-	cxl_ctx_put();
 	return 0;
 }
 
@@ -179,6 +191,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 	if (work.flags & CXL_START_WORK_AMR)
 		amr = work.amr & mfspr(SPRN_UAMOR);
 
+	ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF);
+
 	/*
 	 * We grab the PID here and not in the file open to allow for the case
 	 * where a process (master, some daemon, etc) has opened the chardev on
@@ -191,7 +205,7 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 
 	if ((rc = cxl_attach_process(ctx, false, work.work_element_descriptor,
 				     amr))) {
-		afu_release_irqs(ctx);
+		afu_release_irqs(ctx, ctx);
 		goto out;
 	}
 
@@ -212,19 +226,44 @@ static long afu_ioctl_process_element(struct cxl_context *ctx,
 	return 0;
 }
 
-static long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static long afu_ioctl_get_afu_id(struct cxl_context *ctx,
+				 struct cxl_afu_id __user *upafuid)
+{
+	struct cxl_afu_id afuid = { 0 };
+
+	afuid.card_id = ctx->afu->adapter->adapter_num;
+	afuid.afu_offset = ctx->afu->slice;
+	afuid.afu_mode = ctx->afu->current_mode;
+
+	/* set the flag bit in case the afu is a slave */
+	if (ctx->afu->current_mode == CXL_MODE_DIRECTED && !ctx->master)
+		afuid.flags |= CXL_AFUID_FLAG_SLAVE;
+
+	if (copy_to_user(upafuid, &afuid, sizeof(afuid)))
+		return -EFAULT;
+
+	return 0;
+}
+
+long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct cxl_context *ctx = file->private_data;
 
 	if (ctx->status == CLOSED)
 		return -EIO;
 
+	if (!cxl_adapter_link_ok(ctx->afu->adapter))
+		return -EIO;
+
 	pr_devel("afu_ioctl\n");
 	switch (cmd) {
 	case CXL_IOCTL_START_WORK:
 		return afu_ioctl_start_work(ctx, (struct cxl_ioctl_start_work __user *)arg);
 	case CXL_IOCTL_GET_PROCESS_ELEMENT:
 		return afu_ioctl_process_element(ctx, (__u32 __user *)arg);
+	case CXL_IOCTL_GET_AFU_ID:
+		return afu_ioctl_get_afu_id(ctx, (struct cxl_afu_id __user *)
+					    arg);
 	}
 	return -EINVAL;
 }
@@ -235,7 +274,7 @@ static long afu_compat_ioctl(struct file *file, unsigned int cmd,
 	return afu_ioctl(file, cmd, arg);
 }
 
-static int afu_mmap(struct file *file, struct vm_area_struct *vm)
+int afu_mmap(struct file *file, struct vm_area_struct *vm)
 {
 	struct cxl_context *ctx = file->private_data;
 
@@ -243,10 +282,13 @@ static int afu_mmap(struct file *file, struct vm_area_struct *vm)
 	if (ctx->status != STARTED)
 		return -EIO;
 
+	if (!cxl_adapter_link_ok(ctx->afu->adapter))
+		return -EIO;
+
 	return cxl_context_iomap(ctx, vm);
 }
 
-static unsigned int afu_poll(struct file *file, struct poll_table_struct *poll)
+unsigned int afu_poll(struct file *file, struct poll_table_struct *poll)
 {
 	struct cxl_context *ctx = file->private_data;
 	int mask = 0;
@@ -278,7 +320,7 @@ static inline int ctx_event_pending(struct cxl_context *ctx)
 	    ctx->pending_afu_err || (ctx->status == CLOSED));
 }
 
-static ssize_t afu_read(struct file *file, char __user *buf, size_t count,
+ssize_t afu_read(struct file *file, char __user *buf, size_t count,
 			loff_t *off)
 {
 	struct cxl_context *ctx = file->private_data;
@@ -287,6 +329,9 @@ static ssize_t afu_read(struct file *file, char __user *buf, size_t count,
 	int rc;
 	DEFINE_WAIT(wait);
 
+	if (!cxl_adapter_link_ok(ctx->afu->adapter))
+		return -EIO;
+
 	if (count < CXL_READ_MIN_SIZE)
 		return -EINVAL;
 
@@ -297,6 +342,11 @@ static ssize_t afu_read(struct file *file, char __user *buf, size_t count,
 		if (ctx_event_pending(ctx))
 			break;
 
+		if (!cxl_adapter_link_ok(ctx->afu->adapter)) {
+			rc = -EIO;
+			goto out;
+		}
+
 		if (file->f_flags & O_NONBLOCK) {
 			rc = -EAGAIN;
 			goto out;
@@ -359,7 +409,11 @@ out:
 	return rc;
 }
 
-static const struct file_operations afu_fops = {
+/* 
+ * Note: if this is updated, we need to update api.c to patch the new ones in
+ * too
+ */
+const struct file_operations afu_fops = {
 	.owner		= THIS_MODULE,
 	.open           = afu_open,
 	.poll		= afu_poll,
@@ -493,7 +547,7 @@ int __init cxl_file_init(void)
 	 * If these change we really need to update API.  Either change some
 	 * flags or update API version number CXL_API_VERSION.
 	 */
-	BUILD_BUG_ON(CXL_API_VERSION != 1);
+	BUILD_BUG_ON(CXL_API_VERSION != 2);
 	BUILD_BUG_ON(sizeof(struct cxl_ioctl_start_work) != 64);
 	BUILD_BUG_ON(sizeof(struct cxl_event_header) != 8);
 	BUILD_BUG_ON(sizeof(struct cxl_event_afu_interrupt) != 8);
diff --git a/kernel/drivers/misc/cxl/irq.c b/kernel/drivers/misc/cxl/irq.c
index c8929c526..09a406058 100644
--- a/kernel/drivers/misc/cxl/irq.c
+++ b/kernel/drivers/misc/cxl/irq.c
@@ -14,7 +14,7 @@
 #include <linux/slab.h>
 #include <linux/pid.h>
 #include <asm/cputable.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
 
 #include "cxl.h"
 #include "trace.h"
@@ -30,12 +30,12 @@ static irqreturn_t handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr, u6
 	serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An);
 	afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An);
 
-	dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%.16llx\n", errstat);
-	dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%.16llx\n", fir1);
-	dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%.16llx\n", fir2);
-	dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%.16llx\n", serr);
-	dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%.16llx\n", fir_slice);
-	dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%.16llx\n", afu_debug);
+	dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat);
+	dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1);
+	dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2);
+	dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%016llx\n", serr);
+	dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice);
+	dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug);
 
 	dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n");
 	cxl_stop_trace(ctx->afu->adapter);
@@ -54,10 +54,10 @@ irqreturn_t cxl_slice_irq_err(int irq, void *data)
 	fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An);
 	errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
 	afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An);
-	dev_crit(&afu->dev, "PSL_SERR_An: 0x%.16llx\n", serr);
-	dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%.16llx\n", fir_slice);
-	dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%.16llx\n", errstat);
-	dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%.16llx\n", afu_debug);
+	dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr);
+	dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice);
+	dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%016llx\n", errstat);
+	dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug);
 
 	cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
 
@@ -72,7 +72,7 @@ static irqreturn_t cxl_irq_err(int irq, void *data)
 	WARN(1, "CXL ERROR interrupt %i\n", irq);
 
 	err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE);
-	dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%.16llx\n", err_ivte);
+	dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%016llx\n", err_ivte);
 
 	dev_crit(&adapter->dev, "STOPPING CXL TRACE\n");
 	cxl_stop_trace(adapter);
@@ -80,7 +80,7 @@ static irqreturn_t cxl_irq_err(int irq, void *data)
 	fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1);
 	fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2);
 
-	dev_crit(&adapter->dev, "PSL_FIR1: 0x%.16llx\nPSL_FIR2: 0x%.16llx\n", fir1, fir2);
+	dev_crit(&adapter->dev, "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", fir1, fir2);
 
 	return IRQ_HANDLED;
 }
@@ -147,7 +147,7 @@ static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info)
 	if (dsisr & CXL_PSL_DSISR_An_PE)
 		return handle_psl_slice_error(ctx, dsisr, irq_info->errstat);
 	if (dsisr & CXL_PSL_DSISR_An_AE) {
-		pr_devel("CXL interrupt: AFU Error %.llx\n", irq_info->afu_err);
+		pr_devel("CXL interrupt: AFU Error 0x%016llx\n", irq_info->afu_err);
 
 		if (ctx->pending_afu_err) {
 			/*
@@ -158,7 +158,7 @@ static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info)
 			 * probably best that we log them somewhere:
 			 */
 			dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error "
-					    "undelivered to pe %i: %.llx\n",
+					    "undelivered to pe %i: 0x%016llx\n",
 					    ctx->pe, irq_info->afu_err);
 		} else {
 			spin_lock(&ctx->lock);
@@ -211,8 +211,8 @@ static irqreturn_t cxl_irq_multiplexed(int irq, void *data)
 	}
 	rcu_read_unlock();
 
-	WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %.16llx DAR"
-		" %.16llx\n(Possible AFU HW issue - was a term/remove acked"
+	WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %016llx DAR"
+		" %016llx\n(Possible AFU HW issue - was a term/remove acked"
 		" with outstanding transactions?)\n", ph, irq_info.dsisr,
 		irq_info.dar);
 	return fail_psl_irq(afu, &irq_info);
@@ -341,6 +341,9 @@ int cxl_register_psl_err_irq(struct cxl *adapter)
 
 void cxl_release_psl_err_irq(struct cxl *adapter)
 {
+	if (adapter->err_virq != irq_find_mapping(NULL, adapter->err_hwirq))
+		return;
+
 	cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000);
 	cxl_unmap_irq(adapter->err_virq, adapter);
 	cxl_release_one_irq(adapter, adapter->err_hwirq);
@@ -374,6 +377,9 @@ int cxl_register_serr_irq(struct cxl_afu *afu)
 
 void cxl_release_serr_irq(struct cxl_afu *afu)
 {
+	if (afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq))
+		return;
+
 	cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000);
 	cxl_unmap_irq(afu->serr_virq, afu);
 	cxl_release_one_irq(afu->adapter, afu->serr_hwirq);
@@ -400,6 +406,9 @@ int cxl_register_psl_irq(struct cxl_afu *afu)
 
 void cxl_release_psl_irq(struct cxl_afu *afu)
 {
+	if (afu->psl_virq != irq_find_mapping(NULL, afu->psl_hwirq))
+		return;
+
 	cxl_unmap_irq(afu->psl_virq, afu);
 	cxl_release_one_irq(afu->adapter, afu->psl_hwirq);
 	kfree(afu->psl_irq_name);
@@ -416,12 +425,14 @@ void afu_irq_name_free(struct cxl_context *ctx)
 	}
 }
 
-int afu_register_irqs(struct cxl_context *ctx, u32 count)
+int afu_allocate_irqs(struct cxl_context *ctx, u32 count)
 {
-	irq_hw_number_t hwirq;
 	int rc, r, i, j = 1;
 	struct cxl_irq_name *irq_name;
 
+	/* Initialize the list head to hold irq names */
+	INIT_LIST_HEAD(&ctx->irq_names);
+
 	if ((rc = cxl_alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, count)))
 		return rc;
 
@@ -433,13 +444,12 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count)
 	ctx->irq_bitmap = kcalloc(BITS_TO_LONGS(count),
 				  sizeof(*ctx->irq_bitmap), GFP_KERNEL);
 	if (!ctx->irq_bitmap)
-		return -ENOMEM;
+		goto out;
 
 	/*
 	 * Allocate names first.  If any fail, bail out before allocating
 	 * actual hardware IRQs.
 	 */
-	INIT_LIST_HEAD(&ctx->irq_names);
 	for (r = 1; r < CXL_IRQ_RANGES; r++) {
 		for (i = 0; i < ctx->irqs.range[r]; i++) {
 			irq_name = kmalloc(sizeof(struct cxl_irq_name),
@@ -458,6 +468,19 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count)
 			j++;
 		}
 	}
+	return 0;
+
+out:
+	cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+	afu_irq_name_free(ctx);
+	return -ENOMEM;
+}
+
+static void afu_register_hwirqs(struct cxl_context *ctx)
+{
+	irq_hw_number_t hwirq;
+	struct cxl_irq_name *irq_name;
+	int r,i;
 
 	/* We've allocated all memory now, so let's do the irq allocations */
 	irq_name = list_first_entry(&ctx->irq_names, struct cxl_irq_name, list);
@@ -469,15 +492,21 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count)
 			irq_name = list_next_entry(irq_name, list);
 		}
 	}
+}
 
-	return 0;
+int afu_register_irqs(struct cxl_context *ctx, u32 count)
+{
+	int rc;
 
-out:
-	afu_irq_name_free(ctx);
-	return -ENOMEM;
-}
+	rc = afu_allocate_irqs(ctx, count);
+	if (rc)
+		return rc;
+
+	afu_register_hwirqs(ctx);
+	return 0;
+ }
 
-void afu_release_irqs(struct cxl_context *ctx)
+void afu_release_irqs(struct cxl_context *ctx, void *cookie)
 {
 	irq_hw_number_t hwirq;
 	unsigned int virq;
@@ -488,10 +517,12 @@ void afu_release_irqs(struct cxl_context *ctx)
 		for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) {
 			virq = irq_find_mapping(NULL, hwirq);
 			if (virq)
-				cxl_unmap_irq(virq, ctx);
+				cxl_unmap_irq(virq, cookie);
 		}
 	}
 
 	afu_irq_name_free(ctx);
 	cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
+
+	ctx->irq_count = 0;
 }
diff --git a/kernel/drivers/misc/cxl/main.c b/kernel/drivers/misc/cxl/main.c
index de350dd46..9fde75ed4 100644
--- a/kernel/drivers/misc/cxl/main.c
+++ b/kernel/drivers/misc/cxl/main.c
@@ -20,7 +20,7 @@
 #include <linux/idr.h>
 #include <linux/pci.h>
 #include <asm/cputable.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
 
 #include "cxl.h"
 #include "trace.h"
@@ -222,6 +222,7 @@ static void exit_cxl(void)
 	cxl_debugfs_exit();
 	cxl_file_exit();
 	unregister_cxl_calls(&cxl_calls);
+	idr_destroy(&cxl_adapter_idr);
 }
 
 module_init(init_cxl);
diff --git a/kernel/drivers/misc/cxl/native.c b/kernel/drivers/misc/cxl/native.c
index 29185fc61..f40909793 100644
--- a/kernel/drivers/misc/cxl/native.c
+++ b/kernel/drivers/misc/cxl/native.c
@@ -15,7 +15,7 @@
 #include <linux/mm.h>
 #include <linux/uaccess.h>
 #include <asm/synch.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
 
 #include "cxl.h"
 #include "trace.h"
@@ -41,7 +41,14 @@ static int afu_control(struct cxl_afu *afu, u64 command,
 			rc = -EBUSY;
 			goto out;
 		}
-		pr_devel_ratelimited("AFU control... (0x%.16llx)\n",
+
+		if (!cxl_adapter_link_ok(afu->adapter)) {
+			afu->enabled = enabled;
+			rc = -EIO;
+			goto out;
+		}
+
+		pr_devel_ratelimited("AFU control... (0x%016llx)\n",
 				     AFU_Cntl | command);
 		cpu_relax();
 		AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
@@ -73,7 +80,7 @@ int cxl_afu_disable(struct cxl_afu *afu)
 }
 
 /* This will disable as well as reset */
-int cxl_afu_reset(struct cxl_afu *afu)
+int __cxl_afu_reset(struct cxl_afu *afu)
 {
 	pr_devel("AFU reset request\n");
 
@@ -83,8 +90,12 @@ int cxl_afu_reset(struct cxl_afu *afu)
 			   false);
 }
 
-static int afu_check_and_enable(struct cxl_afu *afu)
+int cxl_afu_check_and_enable(struct cxl_afu *afu)
 {
+	if (!cxl_adapter_link_ok(afu->adapter)) {
+		WARN(1, "Refusing to enable afu while link down!\n");
+		return -EIO;
+	}
 	if (afu->enabled)
 		return 0;
 	return afu_enable(afu);
@@ -103,6 +114,12 @@ int cxl_psl_purge(struct cxl_afu *afu)
 
 	pr_devel("PSL purge request\n");
 
+	if (!cxl_adapter_link_ok(afu->adapter)) {
+		dev_warn(&afu->dev, "PSL Purge called with link down, ignoring\n");
+		rc = -EIO;
+		goto out;
+	}
+
 	if ((AFU_Cntl & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) {
 		WARN(1, "psl_purge request while AFU not disabled!\n");
 		cxl_afu_disable(afu);
@@ -119,14 +136,19 @@ int cxl_psl_purge(struct cxl_afu *afu)
 			rc = -EBUSY;
 			goto out;
 		}
+		if (!cxl_adapter_link_ok(afu->adapter)) {
+			rc = -EIO;
+			goto out;
+		}
+
 		dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
-		pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%.16llx  PSL_DSISR: 0x%.16llx\n", PSL_CNTL, dsisr);
+		pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%016llx  PSL_DSISR: 0x%016llx\n", PSL_CNTL, dsisr);
 		if (dsisr & CXL_PSL_DSISR_TRANS) {
 			dar = cxl_p2n_read(afu, CXL_PSL_DAR_An);
-			dev_notice(&afu->dev, "PSL purge terminating pending translation, DSISR: 0x%.16llx, DAR: 0x%.16llx\n", dsisr, dar);
+			dev_notice(&afu->dev, "PSL purge terminating pending translation, DSISR: 0x%016llx, DAR: 0x%016llx\n", dsisr, dar);
 			cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
 		} else if (dsisr) {
-			dev_notice(&afu->dev, "PSL purge acknowledging pending non-translation fault, DSISR: 0x%.16llx\n", dsisr);
+			dev_notice(&afu->dev, "PSL purge acknowledging pending non-translation fault, DSISR: 0x%016llx\n", dsisr);
 			cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
 		} else {
 			cpu_relax();
@@ -161,10 +183,8 @@ static int spa_max_procs(int spa_size)
 	return ((spa_size / 8) - 96) / 17;
 }
 
-static int alloc_spa(struct cxl_afu *afu)
+int cxl_alloc_spa(struct cxl_afu *afu)
 {
-	u64 spap;
-
 	/* Work out how many pages to allocate */
 	afu->spa_order = 0;
 	do {
@@ -183,6 +203,13 @@ static int alloc_spa(struct cxl_afu *afu)
 	pr_devel("spa pages: %i afu->spa_max_procs: %i   afu->num_procs: %i\n",
 		 1<<afu->spa_order, afu->spa_max_procs, afu->num_procs);
 
+	return 0;
+}
+
+static void attach_spa(struct cxl_afu *afu)
+{
+	u64 spap;
+
 	afu->sw_command_status = (__be64 *)((char *)afu->spa +
 					    ((afu->spa_max_procs + 3) * 128));
 
@@ -191,14 +218,19 @@ static int alloc_spa(struct cxl_afu *afu)
 	spap |= CXL_PSL_SPAP_V;
 	pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n", afu->spa, afu->spa_max_procs, afu->sw_command_status, spap);
 	cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap);
-
-	return 0;
 }
 
-static void release_spa(struct cxl_afu *afu)
+static inline void detach_spa(struct cxl_afu *afu)
 {
 	cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0);
-	free_pages((unsigned long) afu->spa, afu->spa_order);
+}
+
+void cxl_release_spa(struct cxl_afu *afu)
+{
+	if (afu->spa) {
+		free_pages((unsigned long) afu->spa, afu->spa_order);
+		afu->spa = NULL;
+	}
 }
 
 int cxl_tlb_slb_invalidate(struct cxl *adapter)
@@ -215,6 +247,8 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter)
 			dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n");
 			return -EBUSY;
 		}
+		if (!cxl_adapter_link_ok(adapter))
+			return -EIO;
 		cpu_relax();
 	}
 
@@ -224,6 +258,8 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter)
 			dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n");
 			return -EBUSY;
 		}
+		if (!cxl_adapter_link_ok(adapter))
+			return -EIO;
 		cpu_relax();
 	}
 	return 0;
@@ -240,6 +276,11 @@ int cxl_afu_slbia(struct cxl_afu *afu)
 			dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n");
 			return -EBUSY;
 		}
+		/* If the adapter has gone down, we can assume that we
+		 * will PERST it and that will invalidate everything.
+		 */
+		if (!cxl_adapter_link_ok(afu->adapter))
+			return -EIO;
 		cpu_relax();
 	}
 	return 0;
@@ -279,6 +320,8 @@ static void slb_invalid(struct cxl_context *ctx)
 	cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID);
 
 	while (1) {
+		if (!cxl_adapter_link_ok(adapter))
+			break;
 		slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA);
 		if (!(slbia & CXL_TLB_SLB_P))
 			break;
@@ -308,6 +351,11 @@ static int do_process_element_cmd(struct cxl_context *ctx,
 			rc = -EBUSY;
 			goto out;
 		}
+		if (!cxl_adapter_link_ok(ctx->afu->adapter)) {
+			dev_warn(&ctx->afu->dev, "WARNING: Device link down, aborting Process Element Command!\n");
+			rc = -EIO;
+			goto out;
+		}
 		state = be64_to_cpup(ctx->afu->sw_command_status);
 		if (state == ~0ULL) {
 			pr_err("cxl: Error adding process element to AFU\n");
@@ -355,8 +403,13 @@ static int terminate_process_element(struct cxl_context *ctx)
 
 	mutex_lock(&ctx->afu->spa_mutex);
 	pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe);
-	rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE,
-				    CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T);
+	/* We could be asked to terminate when the hw is down. That
+	 * should always succeed: it's not running if the hw has gone
+	 * away and is being reset.
+	 */
+	if (cxl_adapter_link_ok(ctx->afu->adapter))
+		rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE,
+					    CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T);
 	ctx->elem->software_state = 0;	/* Remove Valid bit */
 	pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe);
 	mutex_unlock(&ctx->afu->spa_mutex);
@@ -369,7 +422,14 @@ static int remove_process_element(struct cxl_context *ctx)
 
 	mutex_lock(&ctx->afu->spa_mutex);
 	pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe);
-	if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0)))
+
+	/* We could be asked to remove when the hw is down. Again, if
+	 * the hw is down, the PE is gone, so we succeed.
+	 */
+	if (cxl_adapter_link_ok(ctx->afu->adapter))
+		rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0);
+
+	if (!rc)
 		ctx->pe_inserted = false;
 	slb_invalid(ctx);
 	pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe);
@@ -379,7 +439,7 @@ static int remove_process_element(struct cxl_context *ctx)
 }
 
 
-static void assign_psn_space(struct cxl_context *ctx)
+void cxl_assign_psn_space(struct cxl_context *ctx)
 {
 	if (!ctx->afu->pp_size || ctx->master) {
 		ctx->psn_phys = ctx->afu->psn_phys;
@@ -397,15 +457,18 @@ static int activate_afu_directed(struct cxl_afu *afu)
 
 	dev_info(&afu->dev, "Activating AFU directed mode\n");
 
-	if (alloc_spa(afu))
-		return -ENOMEM;
+	afu->num_procs = afu->max_procs_virtualised;
+	if (afu->spa == NULL) {
+		if (cxl_alloc_spa(afu))
+			return -ENOMEM;
+	}
+	attach_spa(afu);
 
 	cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_AFU);
 	cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL);
 	cxl_p1n_write(afu, CXL_PSL_ID_An, CXL_PSL_ID_An_F | CXL_PSL_ID_An_L);
 
 	afu->current_mode = CXL_MODE_DIRECTED;
-	afu->num_procs = afu->max_procs_virtualised;
 
 	if ((rc = cxl_chardev_m_afu_add(afu)))
 		return rc;
@@ -430,34 +493,46 @@ err:
 #define set_endian(sr) ((sr) &= ~(CXL_PSL_SR_An_LE))
 #endif
 
+static u64 calculate_sr(struct cxl_context *ctx)
+{
+	u64 sr = 0;
+
+	set_endian(sr);
+	if (ctx->master)
+		sr |= CXL_PSL_SR_An_MP;
+	if (mfspr(SPRN_LPCR) & LPCR_TC)
+		sr |= CXL_PSL_SR_An_TC;
+	if (ctx->kernel) {
+		sr |= CXL_PSL_SR_An_R | (mfmsr() & MSR_SF);
+		sr |= CXL_PSL_SR_An_HV;
+	} else {
+		sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R;
+		sr &= ~(CXL_PSL_SR_An_HV);
+		if (!test_tsk_thread_flag(current, TIF_32BIT))
+			sr |= CXL_PSL_SR_An_SF;
+	}
+	return sr;
+}
+
 static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr)
 {
-	u64 sr;
+	u32 pid;
 	int r, result;
 
-	assign_psn_space(ctx);
+	cxl_assign_psn_space(ctx);
 
 	ctx->elem->ctxtime = 0; /* disable */
 	ctx->elem->lpid = cpu_to_be32(mfspr(SPRN_LPID));
 	ctx->elem->haurp = 0; /* disable */
 	ctx->elem->sdr = cpu_to_be64(mfspr(SPRN_SDR1));
 
-	sr = 0;
-	if (ctx->master)
-		sr |= CXL_PSL_SR_An_MP;
-	if (mfspr(SPRN_LPCR) & LPCR_TC)
-		sr |= CXL_PSL_SR_An_TC;
-	/* HV=0, PR=1, R=1 for userspace
-	 * For kernel contexts: this would need to change
-	 */
-	sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R;
-	set_endian(sr);
-	sr &= ~(CXL_PSL_SR_An_HV);
-	if (!test_tsk_thread_flag(current, TIF_32BIT))
-		sr |= CXL_PSL_SR_An_SF;
-	ctx->elem->common.pid = cpu_to_be32(current->pid);
+	pid = current->pid;
+	if (ctx->kernel)
+		pid = 0;
 	ctx->elem->common.tid = 0;
-	ctx->elem->sr = cpu_to_be64(sr);
+	ctx->elem->common.pid = cpu_to_be32(pid);
+
+	ctx->elem->sr = cpu_to_be64(calculate_sr(ctx));
 
 	ctx->elem->common.csrp = 0; /* disable */
 	ctx->elem->common.aurp0 = 0; /* disable */
@@ -477,12 +552,10 @@ static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr)
 	ctx->elem->common.wed = cpu_to_be64(wed);
 
 	/* first guy needs to enable */
-	if ((result = afu_check_and_enable(ctx->afu)))
+	if ((result = cxl_afu_check_and_enable(ctx->afu)))
 		return result;
 
-	add_process_element(ctx);
-
-	return 0;
+	return add_process_element(ctx);
 }
 
 static int deactivate_afu_directed(struct cxl_afu *afu)
@@ -495,12 +568,10 @@ static int deactivate_afu_directed(struct cxl_afu *afu)
 	cxl_sysfs_afu_m_remove(afu);
 	cxl_chardev_afu_remove(afu);
 
-	cxl_afu_reset(afu);
+	__cxl_afu_reset(afu);
 	cxl_afu_disable(afu);
 	cxl_psl_purge(afu);
 
-	release_spa(afu);
-
 	return 0;
 }
 
@@ -530,20 +601,15 @@ static int activate_dedicated_process(struct cxl_afu *afu)
 static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr)
 {
 	struct cxl_afu *afu = ctx->afu;
-	u64 sr;
+	u64 pid;
 	int rc;
 
-	sr = 0;
-	set_endian(sr);
-	if (ctx->master)
-		sr |= CXL_PSL_SR_An_MP;
-	if (mfspr(SPRN_LPCR) & LPCR_TC)
-		sr |= CXL_PSL_SR_An_TC;
-	sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R;
-	if (!test_tsk_thread_flag(current, TIF_32BIT))
-		sr |= CXL_PSL_SR_An_SF;
-	cxl_p2n_write(afu, CXL_PSL_PID_TID_An, (u64)current->pid << 32);
-	cxl_p1n_write(afu, CXL_PSL_SR_An, sr);
+	pid = (u64)current->pid << 32;
+	if (ctx->kernel)
+		pid = 0;
+	cxl_p2n_write(afu, CXL_PSL_PID_TID_An, pid);
+
+	cxl_p1n_write(afu, CXL_PSL_SR_An, calculate_sr(ctx));
 
 	if ((rc = cxl_write_sstp(afu, ctx->sstp0, ctx->sstp1)))
 		return rc;
@@ -564,9 +630,9 @@ static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr)
 	cxl_p2n_write(afu, CXL_PSL_AMR_An, amr);
 
 	/* master only context for dedicated */
-	assign_psn_space(ctx);
+	cxl_assign_psn_space(ctx);
 
-	if ((rc = cxl_afu_reset(afu)))
+	if ((rc = __cxl_afu_reset(afu)))
 		return rc;
 
 	cxl_p2n_write(afu, CXL_PSL_WED_An, wed);
@@ -607,6 +673,11 @@ int cxl_afu_activate_mode(struct cxl_afu *afu, int mode)
 	if (!(mode & afu->modes_supported))
 		return -EINVAL;
 
+	if (!cxl_adapter_link_ok(afu->adapter)) {
+		WARN(1, "Device link is down, refusing to activate!\n");
+		return -EIO;
+	}
+
 	if (mode == CXL_MODE_DIRECTED)
 		return activate_afu_directed(afu);
 	if (mode == CXL_MODE_DEDICATED)
@@ -617,6 +688,11 @@ int cxl_afu_activate_mode(struct cxl_afu *afu, int mode)
 
 int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr)
 {
+	if (!cxl_adapter_link_ok(ctx->afu->adapter)) {
+		WARN(1, "Device link is down, refusing to attach process!\n");
+		return -EIO;
+	}
+
 	ctx->kernel = kernel;
 	if (ctx->afu->current_mode == CXL_MODE_DIRECTED)
 		return attach_afu_directed(ctx, wed, amr);
@@ -629,7 +705,7 @@ int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr)
 
 static inline int detach_process_native_dedicated(struct cxl_context *ctx)
 {
-	cxl_afu_reset(ctx->afu);
+	__cxl_afu_reset(ctx->afu);
 	cxl_afu_disable(ctx->afu);
 	cxl_psl_purge(ctx->afu);
 	return 0;
@@ -661,6 +737,12 @@ int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info)
 {
 	u64 pidtid;
 
+	/* If the adapter has gone away, we can't get any meaningful
+	 * information.
+	 */
+	if (!cxl_adapter_link_ok(afu->adapter))
+		return -EIO;
+
 	info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
 	info->dar = cxl_p2n_read(afu, CXL_PSL_DAR_An);
 	info->dsr = cxl_p2n_read(afu, CXL_PSL_DSR_An);
@@ -677,7 +759,7 @@ static void recover_psl_err(struct cxl_afu *afu, u64 errstat)
 {
 	u64 dsisr;
 
-	pr_devel("RECOVERING FROM PSL ERROR... (0x%.16llx)\n", errstat);
+	pr_devel("RECOVERING FROM PSL ERROR... (0x%016llx)\n", errstat);
 
 	/* Clear PSL_DSISR[PE] */
 	dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
diff --git a/kernel/drivers/misc/cxl/pci.c b/kernel/drivers/misc/cxl/pci.c
index 4f1b0bdb9..be2c8e248 100644
--- a/kernel/drivers/misc/cxl/pci.c
+++ b/kernel/drivers/misc/cxl/pci.c
@@ -24,6 +24,7 @@
 #include <asm/io.h>
 
 #include "cxl.h"
+#include <misc/cxl.h>
 
 
 #define CXL_PCI_VSEC_ID	0x1280
@@ -90,6 +91,7 @@
 /* This works a little different than the p1/p2 register accesses to make it
  * easier to pull out individual fields */
 #define AFUD_READ(afu, off)		in_be64(afu->afu_desc_mmio + off)
+#define AFUD_READ_LE(afu, off)		in_le64(afu->afu_desc_mmio + off)
 #define EXTRACT_PPC_BIT(val, bit)	(!!(val & PPC_BIT(bit)))
 #define EXTRACT_PPC_BITS(val, bs, be)	((val & PPC_BITMASK(bs, be)) >> PPC_BITLSHIFT(be))
 
@@ -132,7 +134,7 @@ u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off)
 	return (val >> ((off & 0x3) * 8)) & 0xff;
 }
 
-static DEFINE_PCI_DEVICE_TABLE(cxl_pci_tbl) = {
+static const struct pci_device_id cxl_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x04cf), },
@@ -204,7 +206,7 @@ static void dump_cxl_config_space(struct pci_dev *dev)
 	dev_info(&dev->dev, "p1 regs: %#llx, len: %#llx\n",
 		p1_base(dev), p1_size(dev));
 	dev_info(&dev->dev, "p2 regs: %#llx, len: %#llx\n",
-		p1_base(dev), p2_size(dev));
+		p2_base(dev), p2_size(dev));
 	dev_info(&dev->dev, "BAR 4/5: %#llx, len: %#llx\n",
 		pci_resource_start(dev, 4), pci_resource_len(dev, 4));
 
@@ -286,7 +288,8 @@ static void dump_cxl_config_space(struct pci_dev *dev)
 
 static void dump_afu_descriptor(struct cxl_afu *afu)
 {
-	u64 val;
+	u64 val, afu_cr_num, afu_cr_off, afu_cr_len;
+	int i;
 
 #define show_reg(name, what) \
 	dev_info(&afu->dev, "afu desc: %30s: %#llx\n", name, what)
@@ -296,6 +299,7 @@ static void dump_afu_descriptor(struct cxl_afu *afu)
 	show_reg("num_of_processes", AFUD_NUM_PROCS(val));
 	show_reg("num_of_afu_CRs", AFUD_NUM_CRS(val));
 	show_reg("req_prog_mode", val & 0xffffULL);
+	afu_cr_num = AFUD_NUM_CRS(val);
 
 	val = AFUD_READ(afu, 0x8);
 	show_reg("Reserved", val);
@@ -307,8 +311,10 @@ static void dump_afu_descriptor(struct cxl_afu *afu)
 	val = AFUD_READ_CR(afu);
 	show_reg("Reserved", (val >> (63-7)) & 0xff);
 	show_reg("AFU_CR_len", AFUD_CR_LEN(val));
+	afu_cr_len = AFUD_CR_LEN(val) * 256;
 
 	val = AFUD_READ_CR_OFF(afu);
+	afu_cr_off = val;
 	show_reg("AFU_CR_offset", val);
 
 	val = AFUD_READ_PPPSA(afu);
@@ -325,6 +331,11 @@ static void dump_afu_descriptor(struct cxl_afu *afu)
 	val = AFUD_READ_EB_OFF(afu);
 	show_reg("AFU_EB_offset", val);
 
+	for (i = 0; i < afu_cr_num; i++) {
+		val = AFUD_READ_LE(afu, afu_cr_off + i * afu_cr_len);
+		show_reg("CR Vendor", val & 0xffff);
+		show_reg("CR Device", (val >> 16) & 0xffff);
+	}
 #undef show_reg
 }
 
@@ -359,6 +370,55 @@ static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev
 	return 0;
 }
 
+#define TBSYNC_CNT(n) (((u64)n & 0x7) << (63-6))
+#define _2048_250MHZ_CYCLES 1
+
+static int cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
+{
+	u64 psl_tb;
+	int delta;
+	unsigned int retry = 0;
+	struct device_node *np;
+
+	if (!(np = pnv_pci_get_phb_node(dev)))
+		return -ENODEV;
+
+	/* Do not fail when CAPP timebase sync is not supported by OPAL */
+	of_node_get(np);
+	if (! of_get_property(np, "ibm,capp-timebase-sync", NULL)) {
+		of_node_put(np);
+		pr_err("PSL: Timebase sync: OPAL support missing\n");
+		return 0;
+	}
+	of_node_put(np);
+
+	/*
+	 * Setup PSL Timebase Control and Status register
+	 * with the recommended Timebase Sync Count value
+	 */
+	cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT,
+		     TBSYNC_CNT(2 * _2048_250MHZ_CYCLES));
+
+	/* Enable PSL Timebase */
+	cxl_p1_write(adapter, CXL_PSL_Control, 0x0000000000000000);
+	cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb);
+
+	/* Wait until CORE TB and PSL TB difference <= 16usecs */
+	do {
+		msleep(1);
+		if (retry++ > 5) {
+			pr_err("PSL: Timebase sync: giving up!\n");
+			return -EIO;
+		}
+		psl_tb = cxl_p1_read(adapter, CXL_PSL_Timebase);
+		delta = mftb() - psl_tb;
+		if (delta < 0)
+			delta = -delta;
+	} while (tb_to_ns(delta) > 16000);
+
+	return 0;
+}
+
 static int init_implementation_afu_regs(struct cxl_afu *afu)
 {
 	/* read/write masks for this slice */
@@ -529,10 +589,18 @@ err:
 
 static void cxl_unmap_slice_regs(struct cxl_afu *afu)
 {
-	if (afu->p1n_mmio)
+	if (afu->p2n_mmio) {
 		iounmap(afu->p2n_mmio);
-	if (afu->p1n_mmio)
+		afu->p2n_mmio = NULL;
+	}
+	if (afu->p1n_mmio) {
 		iounmap(afu->p1n_mmio);
+		afu->p1n_mmio = NULL;
+	}
+	if (afu->afu_desc_mmio) {
+		iounmap(afu->afu_desc_mmio);
+		afu->afu_desc_mmio = NULL;
+	}
 }
 
 static void cxl_release_afu(struct device *dev)
@@ -541,6 +609,9 @@ static void cxl_release_afu(struct device *dev)
 
 	pr_devel("cxl_release_afu\n");
 
+	idr_destroy(&afu->contexts_idr);
+	cxl_release_spa(afu);
+
 	kfree(afu);
 }
 
@@ -593,6 +664,22 @@ static int cxl_read_afu_descriptor(struct cxl_afu *afu)
 	afu->crs_len = AFUD_CR_LEN(val) * 256;
 	afu->crs_offset = AFUD_READ_CR_OFF(afu);
 
+
+	/* eb_len is in multiple of 4K */
+	afu->eb_len = AFUD_EB_LEN(AFUD_READ_EB(afu)) * 4096;
+	afu->eb_offset = AFUD_READ_EB_OFF(afu);
+
+	/* eb_off is 4K aligned so lower 12 bits are always zero */
+	if (EXTRACT_PPC_BITS(afu->eb_offset, 0, 11) != 0) {
+		dev_warn(&afu->dev,
+			 "Invalid AFU error buffer offset %Lx\n",
+			 afu->eb_offset);
+		dev_info(&afu->dev,
+			 "Ignoring AFU error buffer in the descriptor\n");
+		/* indicate that no afu buffer exists */
+		afu->eb_len = 0;
+	}
+
 	return 0;
 }
 
@@ -630,8 +717,8 @@ static int sanitise_afu_regs(struct cxl_afu *afu)
 	 */
 	reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An);
 	if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) {
-		dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#.16llx\n", reg);
-		if (cxl_afu_reset(afu))
+		dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#016llx\n", reg);
+		if (__cxl_afu_reset(afu))
 			return -EIO;
 		if (cxl_afu_disable(afu))
 			return -EIO;
@@ -651,7 +738,7 @@ static int sanitise_afu_regs(struct cxl_afu *afu)
 	cxl_p2n_write(afu, CXL_SSTP0_An, 0x0000000000000000);
 	reg = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
 	if (reg) {
-		dev_warn(&afu->dev, "AFU had pending DSISR: %#.16llx\n", reg);
+		dev_warn(&afu->dev, "AFU had pending DSISR: %#016llx\n", reg);
 		if (reg & CXL_PSL_DSISR_TRANS)
 			cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
 		else
@@ -660,57 +747,126 @@ static int sanitise_afu_regs(struct cxl_afu *afu)
 	reg = cxl_p1n_read(afu, CXL_PSL_SERR_An);
 	if (reg) {
 		if (reg & ~0xffff)
-			dev_warn(&afu->dev, "AFU had pending SERR: %#.16llx\n", reg);
+			dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg);
 		cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff);
 	}
 	reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
 	if (reg) {
-		dev_warn(&afu->dev, "AFU had pending error status: %#.16llx\n", reg);
+		dev_warn(&afu->dev, "AFU had pending error status: %#016llx\n", reg);
 		cxl_p2n_write(afu, CXL_PSL_ErrStat_An, reg);
 	}
 
 	return 0;
 }
 
-static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
+#define ERR_BUFF_MAX_COPY_SIZE PAGE_SIZE
+/*
+ * afu_eb_read:
+ * Called from sysfs and reads the afu error info buffer. The h/w only supports
+ * 4/8 bytes aligned access. So in case the requested offset/count arent 8 byte
+ * aligned the function uses a bounce buffer which can be max PAGE_SIZE.
+ */
+ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
+				loff_t off, size_t count)
 {
-	struct cxl_afu *afu;
-	bool free = true;
-	int rc;
+	loff_t aligned_start, aligned_end;
+	size_t aligned_length;
+	void *tbuf;
+	const void __iomem *ebuf = afu->afu_desc_mmio + afu->eb_offset;
+
+	if (count == 0 || off < 0 || (size_t)off >= afu->eb_len)
+		return 0;
+
+	/* calculate aligned read window */
+	count = min((size_t)(afu->eb_len - off), count);
+	aligned_start = round_down(off, 8);
+	aligned_end = round_up(off + count, 8);
+	aligned_length = aligned_end - aligned_start;
+
+	/* max we can copy in one read is PAGE_SIZE */
+	if (aligned_length > ERR_BUFF_MAX_COPY_SIZE) {
+		aligned_length = ERR_BUFF_MAX_COPY_SIZE;
+		count = ERR_BUFF_MAX_COPY_SIZE - (off & 0x7);
+	}
 
-	if (!(afu = cxl_alloc_afu(adapter, slice)))
+	/* use bounce buffer for copy */
+	tbuf = (void *)__get_free_page(GFP_TEMPORARY);
+	if (!tbuf)
 		return -ENOMEM;
 
-	if ((rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice)))
-		goto err1;
+	/* perform aligned read from the mmio region */
+	memcpy_fromio(tbuf, ebuf + aligned_start, aligned_length);
+	memcpy(buf, tbuf + (off & 0x7), count);
+
+	free_page((unsigned long)tbuf);
+
+	return count;
+}
+
+static int cxl_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev)
+{
+	int rc;
 
 	if ((rc = cxl_map_slice_regs(afu, adapter, dev)))
-		goto err1;
+		return rc;
 
 	if ((rc = sanitise_afu_regs(afu)))
-		goto err2;
+		goto err1;
 
 	/* We need to reset the AFU before we can read the AFU descriptor */
-	if ((rc = cxl_afu_reset(afu)))
-		goto err2;
+	if ((rc = __cxl_afu_reset(afu)))
+		goto err1;
 
 	if (cxl_verbose)
 		dump_afu_descriptor(afu);
 
 	if ((rc = cxl_read_afu_descriptor(afu)))
-		goto err2;
+		goto err1;
 
 	if ((rc = cxl_afu_descriptor_looks_ok(afu)))
-		goto err2;
+		goto err1;
 
 	if ((rc = init_implementation_afu_regs(afu)))
-		goto err2;
+		goto err1;
 
 	if ((rc = cxl_register_serr_irq(afu)))
-		goto err2;
+		goto err1;
 
 	if ((rc = cxl_register_psl_irq(afu)))
-		goto err3;
+		goto err2;
+
+	return 0;
+
+err2:
+	cxl_release_serr_irq(afu);
+err1:
+	cxl_unmap_slice_regs(afu);
+	return rc;
+}
+
+static void cxl_deconfigure_afu(struct cxl_afu *afu)
+{
+	cxl_release_psl_irq(afu);
+	cxl_release_serr_irq(afu);
+	cxl_unmap_slice_regs(afu);
+}
+
+static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
+{
+	struct cxl_afu *afu;
+	int rc;
+
+	afu = cxl_alloc_afu(adapter, slice);
+	if (!afu)
+		return -ENOMEM;
+
+	rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice);
+	if (rc)
+		goto err_free;
+
+	rc = cxl_configure_afu(afu, adapter, dev);
+	if (rc)
+		goto err_free;
 
 	/* Don't care if this fails */
 	cxl_debugfs_afu_add(afu);
@@ -725,29 +881,23 @@ static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
 	if ((rc = cxl_sysfs_afu_add(afu)))
 		goto err_put1;
 
-
-	if ((rc = cxl_afu_select_best_mode(afu)))
-		goto err_put2;
-
 	adapter->afu[afu->slice] = afu;
 
+	if ((rc = cxl_pci_vphb_add(afu)))
+		dev_info(&afu->dev, "Can't register vPHB\n");
+
 	return 0;
 
-err_put2:
-	cxl_sysfs_afu_remove(afu);
 err_put1:
-	device_unregister(&afu->dev);
-	free = false;
+	cxl_deconfigure_afu(afu);
 	cxl_debugfs_afu_remove(afu);
-	cxl_release_psl_irq(afu);
-err3:
-	cxl_release_serr_irq(afu);
-err2:
-	cxl_unmap_slice_regs(afu);
-err1:
-	if (free)
-		kfree(afu);
+	device_unregister(&afu->dev);
 	return rc;
+
+err_free:
+	kfree(afu);
+	return rc;
+
 }
 
 static void cxl_remove_afu(struct cxl_afu *afu)
@@ -767,10 +917,7 @@ static void cxl_remove_afu(struct cxl_afu *afu)
 	cxl_context_detach_all(afu);
 	cxl_afu_deactivate_mode(afu);
 
-	cxl_release_psl_irq(afu);
-	cxl_release_serr_irq(afu);
-	cxl_unmap_slice_regs(afu);
-
+	cxl_deconfigure_afu(afu);
 	device_unregister(&afu->dev);
 }
 
@@ -779,6 +926,12 @@ int cxl_reset(struct cxl *adapter)
 	struct pci_dev *dev = to_pci_dev(adapter->dev.parent);
 	int rc;
 
+	if (adapter->perst_same_image) {
+		dev_warn(&dev->dev,
+			 "cxl: refusing to reset/reflash when perst_reloads_same_image is set.\n");
+		return -EINVAL;
+	}
+
 	dev_info(&dev->dev, "CXL reset\n");
 
 	/* pcie_warm_reset requests a fundamental pci reset which includes a
@@ -799,7 +952,7 @@ static int cxl_map_adapter_regs(struct cxl *adapter, struct pci_dev *dev)
 	if (pci_request_region(dev, 0, "priv 1 regs"))
 		goto err2;
 
-	pr_devel("cxl_map_adapter_regs: p1: %#.16llx %#llx, p2: %#.16llx %#llx",
+	pr_devel("cxl_map_adapter_regs: p1: %#016llx %#llx, p2: %#016llx %#llx",
 			p1_base(dev), p1_size(dev), p2_base(dev), p2_size(dev));
 
 	if (!(adapter->p1_mmio = ioremap(p1_base(dev), p1_size(dev))))
@@ -823,10 +976,16 @@ err1:
 
 static void cxl_unmap_adapter_regs(struct cxl *adapter)
 {
-	if (adapter->p1_mmio)
+	if (adapter->p1_mmio) {
 		iounmap(adapter->p1_mmio);
-	if (adapter->p2_mmio)
+		adapter->p1_mmio = NULL;
+		pci_release_region(to_pci_dev(adapter->dev.parent), 2);
+	}
+	if (adapter->p2_mmio) {
 		iounmap(adapter->p2_mmio);
+		adapter->p2_mmio = NULL;
+		pci_release_region(to_pci_dev(adapter->dev.parent), 0);
+	}
 }
 
 static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev)
@@ -838,13 +997,13 @@ static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev)
 	u8 image_state;
 
 	if (!(vsec = find_cxl_vsec(dev))) {
-		dev_err(&adapter->dev, "ABORTING: CXL VSEC not found!\n");
+		dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n");
 		return -ENODEV;
 	}
 
 	CXL_READ_VSEC_LENGTH(dev, vsec, &vseclen);
 	if (vseclen < CXL_VSEC_MIN_SIZE) {
-		pr_err("ABORTING: CXL VSEC too short\n");
+		dev_err(&dev->dev, "ABORTING: CXL VSEC too short\n");
 		return -EINVAL;
 	}
 
@@ -855,7 +1014,6 @@ static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev)
 	CXL_READ_VSEC_BASE_IMAGE(dev, vsec, &adapter->base_image);
 	CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state);
 	adapter->user_image_loaded = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED);
-	adapter->perst_loads_image = true;
 	adapter->perst_select_user = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED);
 
 	CXL_READ_VSEC_NAFUS(dev, vsec, &adapter->slices);
@@ -877,30 +1035,56 @@ static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev)
 	return 0;
 }
 
+/*
+ * Workaround a PCIe Host Bridge defect on some cards, that can cause
+ * malformed Transaction Layer Packet (TLP) errors to be erroneously
+ * reported. Mask this error in the Uncorrectable Error Mask Register.
+ *
+ * The upper nibble of the PSL revision is used to distinguish between
+ * different cards. The affected ones have it set to 0.
+ */
+static void cxl_fixup_malformed_tlp(struct cxl *adapter, struct pci_dev *dev)
+{
+	int aer;
+	u32 data;
+
+	if (adapter->psl_rev & 0xf000)
+		return;
+	if (!(aer = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR)))
+		return;
+	pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &data);
+	if (data & PCI_ERR_UNC_MALF_TLP)
+		if (data & PCI_ERR_UNC_INTN)
+			return;
+	data |= PCI_ERR_UNC_MALF_TLP;
+	data |= PCI_ERR_UNC_INTN;
+	pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, data);
+}
+
 static int cxl_vsec_looks_ok(struct cxl *adapter, struct pci_dev *dev)
 {
 	if (adapter->vsec_status & CXL_STATUS_SECOND_PORT)
 		return -EBUSY;
 
 	if (adapter->vsec_status & CXL_UNSUPPORTED_FEATURES) {
-		dev_err(&adapter->dev, "ABORTING: CXL requires unsupported features\n");
+		dev_err(&dev->dev, "ABORTING: CXL requires unsupported features\n");
 		return -EINVAL;
 	}
 
 	if (!adapter->slices) {
 		/* Once we support dynamic reprogramming we can use the card if
 		 * it supports loadable AFUs */
-		dev_err(&adapter->dev, "ABORTING: Device has no AFUs\n");
+		dev_err(&dev->dev, "ABORTING: Device has no AFUs\n");
 		return -EINVAL;
 	}
 
 	if (!adapter->afu_desc_off || !adapter->afu_desc_size) {
-		dev_err(&adapter->dev, "ABORTING: VSEC shows no AFU descriptors\n");
+		dev_err(&dev->dev, "ABORTING: VSEC shows no AFU descriptors\n");
 		return -EINVAL;
 	}
 
 	if (adapter->ps_size > p2_size(dev) - adapter->ps_off) {
-		dev_err(&adapter->dev, "ABORTING: Problem state size larger than "
+		dev_err(&dev->dev, "ABORTING: Problem state size larger than "
 				   "available in BAR2: 0x%llx > 0x%llx\n",
 			 adapter->ps_size, p2_size(dev) - adapter->ps_off);
 		return -EINVAL;
@@ -915,78 +1099,140 @@ static void cxl_release_adapter(struct device *dev)
 
 	pr_devel("cxl_release_adapter\n");
 
+	cxl_remove_adapter_nr(adapter);
+
 	kfree(adapter);
 }
 
-static struct cxl *cxl_alloc_adapter(struct pci_dev *dev)
+static struct cxl *cxl_alloc_adapter(void)
 {
 	struct cxl *adapter;
 
 	if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL)))
 		return NULL;
 
-	adapter->dev.parent = &dev->dev;
-	adapter->dev.release = cxl_release_adapter;
-	pci_set_drvdata(dev, adapter);
 	spin_lock_init(&adapter->afu_list_lock);
 
+	if (cxl_alloc_adapter_nr(adapter))
+		goto err1;
+
+	if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))
+		goto err2;
+
 	return adapter;
+
+err2:
+	cxl_remove_adapter_nr(adapter);
+err1:
+	kfree(adapter);
+	return NULL;
 }
 
+#define CXL_PSL_ErrIVTE_tberror (0x1ull << (63-31))
+
 static int sanitise_adapter_regs(struct cxl *adapter)
 {
-	cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000);
+	/* Clear PSL tberror bit by writing 1 to it */
+	cxl_p1_write(adapter, CXL_PSL_ErrIVTE, CXL_PSL_ErrIVTE_tberror);
 	return cxl_tlb_slb_invalidate(adapter);
 }
 
-static struct cxl *cxl_init_adapter(struct pci_dev *dev)
+/* This should contain *only* operations that can safely be done in
+ * both creation and recovery.
+ */
+static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev)
 {
-	struct cxl *adapter;
-	bool free = true;
 	int rc;
 
+	adapter->dev.parent = &dev->dev;
+	adapter->dev.release = cxl_release_adapter;
+	pci_set_drvdata(dev, adapter);
 
-	if (!(adapter = cxl_alloc_adapter(dev)))
-		return ERR_PTR(-ENOMEM);
+	rc = pci_enable_device(dev);
+	if (rc) {
+		dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc);
+		return rc;
+	}
 
-	if ((rc = switch_card_to_cxl(dev)))
-		goto err1;
+	if ((rc = cxl_read_vsec(adapter, dev)))
+		return rc;
 
-	if ((rc = cxl_alloc_adapter_nr(adapter)))
-		goto err1;
+	if ((rc = cxl_vsec_looks_ok(adapter, dev)))
+	        return rc;
 
-	if ((rc = dev_set_name(&adapter->dev, "card%i", adapter->adapter_num)))
-		goto err2;
+	cxl_fixup_malformed_tlp(adapter, dev);
 
-	if ((rc = cxl_read_vsec(adapter, dev)))
-		goto err2;
+	if ((rc = setup_cxl_bars(dev)))
+		return rc;
 
-	if ((rc = cxl_vsec_looks_ok(adapter, dev)))
-		goto err2;
+	if ((rc = switch_card_to_cxl(dev)))
+		return rc;
 
 	if ((rc = cxl_update_image_control(adapter)))
-		goto err2;
+		return rc;
 
 	if ((rc = cxl_map_adapter_regs(adapter, dev)))
-		goto err2;
+		return rc;
 
 	if ((rc = sanitise_adapter_regs(adapter)))
-		goto err2;
+		goto err;
 
 	if ((rc = init_implementation_adapter_regs(adapter, dev)))
-		goto err3;
+		goto err;
 
 	if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_CAPI)))
-		goto err3;
+		goto err;
 
 	/* If recovery happened, the last step is to turn on snooping.
 	 * In the non-recovery case this has no effect */
-	if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON))) {
-		goto err3;
-	}
+	if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON)))
+		goto err;
+
+	if ((rc = cxl_setup_psl_timebase(adapter, dev)))
+		goto err;
 
 	if ((rc = cxl_register_psl_err_irq(adapter)))
-		goto err3;
+		goto err;
+
+	return 0;
+
+err:
+	cxl_unmap_adapter_regs(adapter);
+	return rc;
+
+}
+
+static void cxl_deconfigure_adapter(struct cxl *adapter)
+{
+	struct pci_dev *pdev = to_pci_dev(adapter->dev.parent);
+
+	cxl_release_psl_err_irq(adapter);
+	cxl_unmap_adapter_regs(adapter);
+
+	pci_disable_device(pdev);
+}
+
+static struct cxl *cxl_init_adapter(struct pci_dev *dev)
+{
+	struct cxl *adapter;
+	int rc;
+
+	adapter = cxl_alloc_adapter();
+	if (!adapter)
+		return ERR_PTR(-ENOMEM);
+
+	/* Set defaults for parameters which need to persist over
+	 * configure/reconfigure
+	 */
+	adapter->perst_loads_image = true;
+	adapter->perst_same_image = false;
+
+	rc = cxl_configure_adapter(adapter, dev);
+	if (rc) {
+		pci_disable_device(dev);
+		cxl_release_adapter(&adapter->dev);
+		return ERR_PTR(rc);
+	}
 
 	/* Don't care if this one fails: */
 	cxl_debugfs_adapter_add(adapter);
@@ -1004,37 +1250,25 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev)
 	return adapter;
 
 err_put1:
-	device_unregister(&adapter->dev);
-	free = false;
+	/* This should mirror cxl_remove_adapter, except without the
+	 * sysfs parts
+	 */
 	cxl_debugfs_adapter_remove(adapter);
-	cxl_release_psl_err_irq(adapter);
-err3:
-	cxl_unmap_adapter_regs(adapter);
-err2:
-	cxl_remove_adapter_nr(adapter);
-err1:
-	if (free)
-		kfree(adapter);
+	cxl_deconfigure_adapter(adapter);
+	device_unregister(&adapter->dev);
 	return ERR_PTR(rc);
 }
 
 static void cxl_remove_adapter(struct cxl *adapter)
 {
-	struct pci_dev *pdev = to_pci_dev(adapter->dev.parent);
-
-	pr_devel("cxl_release_adapter\n");
+	pr_devel("cxl_remove_adapter\n");
 
 	cxl_sysfs_adapter_remove(adapter);
 	cxl_debugfs_adapter_remove(adapter);
-	cxl_release_psl_err_irq(adapter);
-	cxl_unmap_adapter_regs(adapter);
-	cxl_remove_adapter_nr(adapter);
 
-	device_unregister(&adapter->dev);
+	cxl_deconfigure_adapter(adapter);
 
-	pci_release_region(pdev, 0);
-	pci_release_region(pdev, 2);
-	pci_disable_device(pdev);
+	device_unregister(&adapter->dev);
 }
 
 static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
@@ -1046,14 +1280,6 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	if (cxl_verbose)
 		dump_cxl_config_space(dev);
 
-	if ((rc = setup_cxl_bars(dev)))
-		return rc;
-
-	if ((rc = pci_enable_device(dev))) {
-		dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc);
-		return rc;
-	}
-
 	adapter = cxl_init_adapter(dev);
 	if (IS_ERR(adapter)) {
 		dev_err(&dev->dev, "cxl_init_adapter failed: %li\n", PTR_ERR(adapter));
@@ -1061,8 +1287,14 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	}
 
 	for (slice = 0; slice < adapter->slices; slice++) {
-		if ((rc = cxl_init_afu(adapter, slice, dev)))
+		if ((rc = cxl_init_afu(adapter, slice, dev))) {
 			dev_err(&dev->dev, "AFU %i failed to initialise: %i\n", slice, rc);
+			continue;
+		}
+
+		rc = cxl_afu_select_best_mode(adapter->afu[slice]);
+		if (rc)
+			dev_err(&dev->dev, "AFU %i failed to start: %i\n", slice, rc);
 	}
 
 	return 0;
@@ -1071,22 +1303,277 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
 static void cxl_remove(struct pci_dev *dev)
 {
 	struct cxl *adapter = pci_get_drvdata(dev);
-	int afu;
-
-	dev_warn(&dev->dev, "pci remove\n");
+	struct cxl_afu *afu;
+	int i;
 
 	/*
 	 * Lock to prevent someone grabbing a ref through the adapter list as
 	 * we are removing it
 	 */
-	for (afu = 0; afu < adapter->slices; afu++)
-		cxl_remove_afu(adapter->afu[afu]);
+	for (i = 0; i < adapter->slices; i++) {
+		afu = adapter->afu[i];
+		cxl_pci_vphb_remove(afu);
+		cxl_remove_afu(afu);
+	}
 	cxl_remove_adapter(adapter);
 }
 
+static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu,
+						pci_channel_state_t state)
+{
+	struct pci_dev *afu_dev;
+	pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET;
+	pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET;
+
+	/* There should only be one entry, but go through the list
+	 * anyway
+	 */
+	list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
+		if (!afu_dev->driver)
+			continue;
+
+		afu_dev->error_state = state;
+
+		if (afu_dev->driver->err_handler)
+			afu_result = afu_dev->driver->err_handler->error_detected(afu_dev,
+										  state);
+		/* Disconnect trumps all, NONE trumps NEED_RESET */
+		if (afu_result == PCI_ERS_RESULT_DISCONNECT)
+			result = PCI_ERS_RESULT_DISCONNECT;
+		else if ((afu_result == PCI_ERS_RESULT_NONE) &&
+			 (result == PCI_ERS_RESULT_NEED_RESET))
+			result = PCI_ERS_RESULT_NONE;
+	}
+	return result;
+}
+
+static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev,
+					       pci_channel_state_t state)
+{
+	struct cxl *adapter = pci_get_drvdata(pdev);
+	struct cxl_afu *afu;
+	pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET;
+	int i;
+
+	/* At this point, we could still have an interrupt pending.
+	 * Let's try to get them out of the way before they do
+	 * anything we don't like.
+	 */
+	schedule();
+
+	/* If we're permanently dead, give up. */
+	if (state == pci_channel_io_perm_failure) {
+		/* Tell the AFU drivers; but we don't care what they
+		 * say, we're going away.
+		 */
+		for (i = 0; i < adapter->slices; i++) {
+			afu = adapter->afu[i];
+			cxl_vphb_error_detected(afu, state);
+		}
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	/* Are we reflashing?
+	 *
+	 * If we reflash, we could come back as something entirely
+	 * different, including a non-CAPI card. As such, by default
+	 * we don't participate in the process. We'll be unbound and
+	 * the slot re-probed. (TODO: check EEH doesn't blindly rebind
+	 * us!)
+	 *
+	 * However, this isn't the entire story: for reliablity
+	 * reasons, we usually want to reflash the FPGA on PERST in
+	 * order to get back to a more reliable known-good state.
+	 *
+	 * This causes us a bit of a problem: if we reflash we can't
+	 * trust that we'll come back the same - we could have a new
+	 * image and been PERSTed in order to load that
+	 * image. However, most of the time we actually *will* come
+	 * back the same - for example a regular EEH event.
+	 *
+	 * Therefore, we allow the user to assert that the image is
+	 * indeed the same and that we should continue on into EEH
+	 * anyway.
+	 */
+	if (adapter->perst_loads_image && !adapter->perst_same_image) {
+		/* TODO take the PHB out of CXL mode */
+		dev_info(&pdev->dev, "reflashing, so opting out of EEH!\n");
+		return PCI_ERS_RESULT_NONE;
+	}
+
+	/*
+	 * At this point, we want to try to recover.  We'll always
+	 * need a complete slot reset: we don't trust any other reset.
+	 *
+	 * Now, we go through each AFU:
+	 *  - We send the driver, if bound, an error_detected callback.
+	 *    We expect it to clean up, but it can also tell us to give
+	 *    up and permanently detach the card. To simplify things, if
+	 *    any bound AFU driver doesn't support EEH, we give up on EEH.
+	 *
+	 *  - We detach all contexts associated with the AFU. This
+	 *    does not free them, but puts them into a CLOSED state
+	 *    which causes any the associated files to return useful
+	 *    errors to userland. It also unmaps, but does not free,
+	 *    any IRQs.
+	 *
+	 *  - We clean up our side: releasing and unmapping resources we hold
+	 *    so we can wire them up again when the hardware comes back up.
+	 *
+	 * Driver authors should note:
+	 *
+	 *  - Any contexts you create in your kernel driver (except
+	 *    those associated with anonymous file descriptors) are
+	 *    your responsibility to free and recreate. Likewise with
+	 *    any attached resources.
+	 *
+	 *  - We will take responsibility for re-initialising the
+	 *    device context (the one set up for you in
+	 *    cxl_pci_enable_device_hook and accessed through
+	 *    cxl_get_context). If you've attached IRQs or other
+	 *    resources to it, they remains yours to free.
+	 *
+	 * You can call the same functions to release resources as you
+	 * normally would: we make sure that these functions continue
+	 * to work when the hardware is down.
+	 *
+	 * Two examples:
+	 *
+	 * 1) If you normally free all your resources at the end of
+	 *    each request, or if you use anonymous FDs, your
+	 *    error_detected callback can simply set a flag to tell
+	 *    your driver not to start any new calls. You can then
+	 *    clear the flag in the resume callback.
+	 *
+	 * 2) If you normally allocate your resources on startup:
+	 *     * Set a flag in error_detected as above.
+	 *     * Let CXL detach your contexts.
+	 *     * In slot_reset, free the old resources and allocate new ones.
+	 *     * In resume, clear the flag to allow things to start.
+	 */
+	for (i = 0; i < adapter->slices; i++) {
+		afu = adapter->afu[i];
+
+		result = cxl_vphb_error_detected(afu, state);
+
+		/* Only continue if everyone agrees on NEED_RESET */
+		if (result != PCI_ERS_RESULT_NEED_RESET)
+			return result;
+
+		cxl_context_detach_all(afu);
+		cxl_afu_deactivate_mode(afu);
+		cxl_deconfigure_afu(afu);
+	}
+	cxl_deconfigure_adapter(adapter);
+
+	return result;
+}
+
+static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev)
+{
+	struct cxl *adapter = pci_get_drvdata(pdev);
+	struct cxl_afu *afu;
+	struct cxl_context *ctx;
+	struct pci_dev *afu_dev;
+	pci_ers_result_t afu_result = PCI_ERS_RESULT_RECOVERED;
+	pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED;
+	int i;
+
+	if (cxl_configure_adapter(adapter, pdev))
+		goto err;
+
+	for (i = 0; i < adapter->slices; i++) {
+		afu = adapter->afu[i];
+
+		if (cxl_configure_afu(afu, adapter, pdev))
+			goto err;
+
+		if (cxl_afu_select_best_mode(afu))
+			goto err;
+
+		cxl_pci_vphb_reconfigure(afu);
+
+		list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
+			/* Reset the device context.
+			 * TODO: make this less disruptive
+			 */
+			ctx = cxl_get_context(afu_dev);
+
+			if (ctx && cxl_release_context(ctx))
+				goto err;
+
+			ctx = cxl_dev_context_init(afu_dev);
+			if (!ctx)
+				goto err;
+
+			afu_dev->dev.archdata.cxl_ctx = ctx;
+
+			if (cxl_afu_check_and_enable(afu))
+				goto err;
+
+			afu_dev->error_state = pci_channel_io_normal;
+
+			/* If there's a driver attached, allow it to
+			 * chime in on recovery. Drivers should check
+			 * if everything has come back OK, but
+			 * shouldn't start new work until we call
+			 * their resume function.
+			 */
+			if (!afu_dev->driver)
+				continue;
+
+			if (afu_dev->driver->err_handler &&
+			    afu_dev->driver->err_handler->slot_reset)
+				afu_result = afu_dev->driver->err_handler->slot_reset(afu_dev);
+
+			if (afu_result == PCI_ERS_RESULT_DISCONNECT)
+				result = PCI_ERS_RESULT_DISCONNECT;
+		}
+	}
+	return result;
+
+err:
+	/* All the bits that happen in both error_detected and cxl_remove
+	 * should be idempotent, so we don't need to worry about leaving a mix
+	 * of unconfigured and reconfigured resources.
+	 */
+	dev_err(&pdev->dev, "EEH recovery failed. Asking to be disconnected.\n");
+	return PCI_ERS_RESULT_DISCONNECT;
+}
+
+static void cxl_pci_resume(struct pci_dev *pdev)
+{
+	struct cxl *adapter = pci_get_drvdata(pdev);
+	struct cxl_afu *afu;
+	struct pci_dev *afu_dev;
+	int i;
+
+	/* Everything is back now. Drivers should restart work now.
+	 * This is not the place to be checking if everything came back up
+	 * properly, because there's no return value: do that in slot_reset.
+	 */
+	for (i = 0; i < adapter->slices; i++) {
+		afu = adapter->afu[i];
+
+		list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
+			if (afu_dev->driver && afu_dev->driver->err_handler &&
+			    afu_dev->driver->err_handler->resume)
+				afu_dev->driver->err_handler->resume(afu_dev);
+		}
+	}
+}
+
+static const struct pci_error_handlers cxl_err_handler = {
+	.error_detected = cxl_pci_error_detected,
+	.slot_reset = cxl_pci_slot_reset,
+	.resume = cxl_pci_resume,
+};
+
 struct pci_driver cxl_pci_driver = {
 	.name = "cxl-pci",
 	.id_table = cxl_pci_tbl,
 	.probe = cxl_probe,
 	.remove = cxl_remove,
+	.shutdown = cxl_remove,
+	.err_handler = &cxl_err_handler,
 };
diff --git a/kernel/drivers/misc/cxl/sysfs.c b/kernel/drivers/misc/cxl/sysfs.c
index d0c38c7bc..02006f710 100644
--- a/kernel/drivers/misc/cxl/sysfs.c
+++ b/kernel/drivers/misc/cxl/sysfs.c
@@ -112,12 +112,38 @@ static ssize_t load_image_on_perst_store(struct device *device,
 	return count;
 }
 
+static ssize_t perst_reloads_same_image_show(struct device *device,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->perst_same_image);
+}
+
+static ssize_t perst_reloads_same_image_store(struct device *device,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct cxl *adapter = to_cxl_adapter(device);
+	int rc;
+	int val;
+
+	rc = sscanf(buf, "%i", &val);
+	if ((rc != 1) || !(val == 1 || val == 0))
+		return -EINVAL;
+
+	adapter->perst_same_image = (val == 1 ? true : false);
+	return count;
+}
+
 static struct device_attribute adapter_attrs[] = {
 	__ATTR_RO(caia_version),
 	__ATTR_RO(psl_revision),
 	__ATTR_RO(base_image),
 	__ATTR_RO(image_loaded),
 	__ATTR_RW(load_image_on_perst),
+	__ATTR_RW(perst_reloads_same_image),
 	__ATTR(reset, S_IWUSR, NULL, reset_adapter_store),
 };
 
@@ -185,7 +211,7 @@ static ssize_t reset_store_afu(struct device *device,
 		goto err;
 	}
 
-	if ((rc = cxl_afu_reset(afu)))
+	if ((rc = __cxl_afu_reset(afu)))
 		goto err;
 
 	rc = count;
@@ -356,6 +382,16 @@ static ssize_t api_version_compatible_show(struct device *device,
 	return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION_COMPATIBLE);
 }
 
+static ssize_t afu_eb_read(struct file *filp, struct kobject *kobj,
+			       struct bin_attribute *bin_attr, char *buf,
+			       loff_t off, size_t count)
+{
+	struct cxl_afu *afu = to_cxl_afu(container_of(kobj,
+						      struct device, kobj));
+
+	return cxl_afu_read_err_buffer(afu, buf, off, count);
+}
+
 static struct device_attribute afu_attrs[] = {
 	__ATTR_RO(mmio_size),
 	__ATTR_RO(irqs_min),
@@ -433,12 +469,7 @@ static ssize_t afu_read_config(struct file *filp, struct kobject *kobj,
 	struct afu_config_record *cr = to_cr(kobj);
 	struct cxl_afu *afu = to_cxl_afu(container_of(kobj->parent, struct device, kobj));
 
-	u64 i, j, val, size = afu->crs_len;
-
-	if (off > size)
-		return 0;
-	if (off + count > size)
-		count = size - off;
+	u64 i, j, val;
 
 	for (i = 0; i < count;) {
 		val = cxl_afu_cr_read64(afu, cr->cr, off & ~0x7);
@@ -534,6 +565,10 @@ void cxl_sysfs_afu_remove(struct cxl_afu *afu)
 	struct afu_config_record *cr, *tmp;
 	int i;
 
+	/* remove the err buffer bin attribute */
+	if (afu->eb_len)
+		device_remove_bin_file(&afu->dev, &afu->attr_eb);
+
 	for (i = 0; i < ARRAY_SIZE(afu_attrs); i++)
 		device_remove_file(&afu->dev, &afu_attrs[i]);
 
@@ -555,6 +590,24 @@ int cxl_sysfs_afu_add(struct cxl_afu *afu)
 			goto err;
 	}
 
+	/* conditionally create the add the binary file for error info buffer */
+	if (afu->eb_len) {
+		sysfs_attr_init(&afu->attr_eb.attr);
+
+		afu->attr_eb.attr.name = "afu_err_buff";
+		afu->attr_eb.attr.mode = S_IRUGO;
+		afu->attr_eb.size = afu->eb_len;
+		afu->attr_eb.read = afu_eb_read;
+
+		rc = device_create_bin_file(&afu->dev, &afu->attr_eb);
+		if (rc) {
+			dev_err(&afu->dev,
+				"Unable to create eb attr for the afu. Err(%d)\n",
+				rc);
+			goto err;
+		}
+	}
+
 	for (i = 0; i < afu->crs_num; i++) {
 		cr = cxl_sysfs_afu_new_cr(afu, i);
 		if (IS_ERR(cr)) {
@@ -570,6 +623,9 @@ err1:
 	cxl_sysfs_afu_remove(afu);
 	return rc;
 err:
+	/* reset the eb_len as we havent created the bin attr */
+	afu->eb_len = 0;
+
 	for (i--; i >= 0; i--)
 		device_remove_file(&afu->dev, &afu_attrs[i]);
 	return rc;
diff --git a/kernel/drivers/misc/cxl/trace.h b/kernel/drivers/misc/cxl/trace.h
index ae434d878..6e1e2adfb 100644
--- a/kernel/drivers/misc/cxl/trace.h
+++ b/kernel/drivers/misc/cxl/trace.h
@@ -105,7 +105,7 @@ TRACE_EVENT(cxl_attach,
 		__entry->num_interrupts = num_interrupts;
 	),
 
-	TP_printk("afu%i.%i pid=%i pe=%i wed=0x%.16llx irqs=%i amr=0x%llx",
+	TP_printk("afu%i.%i pid=%i pe=%i wed=0x%016llx irqs=%i amr=0x%llx",
 		__entry->card,
 		__entry->afu,
 		__entry->pid,
@@ -177,7 +177,7 @@ TRACE_EVENT(cxl_psl_irq,
 		__entry->dar = dar;
 	),
 
-	TP_printk("afu%i.%i pe=%i irq=%i dsisr=%s dar=0x%.16llx",
+	TP_printk("afu%i.%i pe=%i irq=%i dsisr=%s dar=0x%016llx",
 		__entry->card,
 		__entry->afu,
 		__entry->pe,
@@ -233,7 +233,7 @@ TRACE_EVENT(cxl_ste_miss,
 		__entry->dar = dar;
 	),
 
-	TP_printk("afu%i.%i pe=%i dar=0x%.16llx",
+	TP_printk("afu%i.%i pe=%i dar=0x%016llx",
 		__entry->card,
 		__entry->afu,
 		__entry->pe,
@@ -264,7 +264,7 @@ TRACE_EVENT(cxl_ste_write,
 		__entry->v = v;
 	),
 
-	TP_printk("afu%i.%i pe=%i SSTE[%i] E=0x%.16llx V=0x%.16llx",
+	TP_printk("afu%i.%i pe=%i SSTE[%i] E=0x%016llx V=0x%016llx",
 		__entry->card,
 		__entry->afu,
 		__entry->pe,
@@ -295,7 +295,7 @@ TRACE_EVENT(cxl_pte_miss,
 		__entry->dar = dar;
 	),
 
-	TP_printk("afu%i.%i pe=%i dsisr=%s dar=0x%.16llx",
+	TP_printk("afu%i.%i pe=%i dsisr=%s dar=0x%016llx",
 		__entry->card,
 		__entry->afu,
 		__entry->pe,
diff --git a/kernel/drivers/misc/cxl/vphb.c b/kernel/drivers/misc/cxl/vphb.c
new file mode 100644
index 000000000..cbd4331fb
--- /dev/null
+++ b/kernel/drivers/misc/cxl/vphb.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/pci.h>
+#include <misc/cxl.h>
+#include "cxl.h"
+
+static int cxl_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
+{
+	if (dma_mask < DMA_BIT_MASK(64)) {
+		pr_info("%s only 64bit DMA supported on CXL", __func__);
+		return -EIO;
+	}
+
+	*(pdev->dev.dma_mask) = dma_mask;
+	return 0;
+}
+
+static int cxl_pci_probe_mode(struct pci_bus *bus)
+{
+	return PCI_PROBE_NORMAL;
+}
+
+static int cxl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+	return -ENODEV;
+}
+
+static void cxl_teardown_msi_irqs(struct pci_dev *pdev)
+{
+	/*
+	 * MSI should never be set but need still need to provide this call
+	 * back.
+	 */
+}
+
+static bool cxl_pci_enable_device_hook(struct pci_dev *dev)
+{
+	struct pci_controller *phb;
+	struct cxl_afu *afu;
+	struct cxl_context *ctx;
+
+	phb = pci_bus_to_host(dev->bus);
+	afu = (struct cxl_afu *)phb->private_data;
+
+	if (!cxl_adapter_link_ok(afu->adapter)) {
+		dev_warn(&dev->dev, "%s: Device link is down, refusing to enable AFU\n", __func__);
+		return false;
+	}
+
+	set_dma_ops(&dev->dev, &dma_direct_ops);
+	set_dma_offset(&dev->dev, PAGE_OFFSET);
+
+	/*
+	 * Allocate a context to do cxl things too.  If we eventually do real
+	 * DMA ops, we'll need a default context to attach them to
+	 */
+	ctx = cxl_dev_context_init(dev);
+	if (!ctx)
+		return false;
+	dev->dev.archdata.cxl_ctx = ctx;
+
+	return (cxl_afu_check_and_enable(afu) == 0);
+}
+
+static void cxl_pci_disable_device(struct pci_dev *dev)
+{
+	struct cxl_context *ctx = cxl_get_context(dev);
+
+	if (ctx) {
+		if (ctx->status == STARTED) {
+			dev_err(&dev->dev, "Default context started\n");
+			return;
+		}
+		dev->dev.archdata.cxl_ctx = NULL;
+		cxl_release_context(ctx);
+	}
+}
+
+static resource_size_t cxl_pci_window_alignment(struct pci_bus *bus,
+						unsigned long type)
+{
+	return 1;
+}
+
+static void cxl_pci_reset_secondary_bus(struct pci_dev *dev)
+{
+	/* Should we do an AFU reset here ? */
+}
+
+static int cxl_pcie_cfg_record(u8 bus, u8 devfn)
+{
+	return (bus << 8) + devfn;
+}
+
+static unsigned long cxl_pcie_cfg_addr(struct pci_controller* phb,
+				       u8 bus, u8 devfn, int offset)
+{
+	int record = cxl_pcie_cfg_record(bus, devfn);
+
+	return (unsigned long)phb->cfg_addr + ((unsigned long)phb->cfg_data * record) + offset;
+}
+
+
+static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
+				int offset, int len,
+				volatile void __iomem **ioaddr,
+				u32 *mask, int *shift)
+{
+	struct pci_controller *phb;
+	struct cxl_afu *afu;
+	unsigned long addr;
+
+	phb = pci_bus_to_host(bus);
+	if (phb == NULL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	afu = (struct cxl_afu *)phb->private_data;
+
+	if (cxl_pcie_cfg_record(bus->number, devfn) > afu->crs_num)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	if (offset >= (unsigned long)phb->cfg_data)
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	addr = cxl_pcie_cfg_addr(phb, bus->number, devfn, offset);
+
+	*ioaddr = (void *)(addr & ~0x3ULL);
+	*shift = ((addr & 0x3) * 8);
+	switch (len) {
+	case 1:
+		*mask = 0xff;
+		break;
+	case 2:
+		*mask = 0xffff;
+		break;
+	default:
+		*mask = 0xffffffff;
+		break;
+	}
+	return 0;
+}
+
+
+static inline bool cxl_config_link_ok(struct pci_bus *bus)
+{
+	struct pci_controller *phb;
+	struct cxl_afu *afu;
+
+	/* Config space IO is based on phb->cfg_addr, which is based on
+	 * afu_desc_mmio. This isn't safe to read/write when the link
+	 * goes down, as EEH tears down MMIO space.
+	 *
+	 * Check if the link is OK before proceeding.
+	 */
+
+	phb = pci_bus_to_host(bus);
+	if (phb == NULL)
+		return false;
+	afu = (struct cxl_afu *)phb->private_data;
+	return cxl_adapter_link_ok(afu->adapter);
+}
+
+static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
+				int offset, int len, u32 *val)
+{
+	volatile void __iomem *ioaddr;
+	int shift, rc;
+	u32 mask;
+
+	rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr,
+				  &mask, &shift);
+	if (rc)
+		return rc;
+
+	if (!cxl_config_link_ok(bus))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	/* Can only read 32 bits */
+	*val = (in_le32(ioaddr) >> shift) & mask;
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
+				 int offset, int len, u32 val)
+{
+	volatile void __iomem *ioaddr;
+	u32 v, mask;
+	int shift, rc;
+
+	rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr,
+				  &mask, &shift);
+	if (rc)
+		return rc;
+
+	if (!cxl_config_link_ok(bus))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	/* Can only write 32 bits so do read-modify-write */
+	mask <<= shift;
+	val <<= shift;
+
+	v = (in_le32(ioaddr) & ~mask) | (val & mask);
+
+	out_le32(ioaddr, v);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops cxl_pcie_pci_ops =
+{
+	.read = cxl_pcie_read_config,
+	.write = cxl_pcie_write_config,
+};
+
+
+static struct pci_controller_ops cxl_pci_controller_ops =
+{
+	.probe_mode = cxl_pci_probe_mode,
+	.enable_device_hook = cxl_pci_enable_device_hook,
+	.disable_device = cxl_pci_disable_device,
+	.release_device = cxl_pci_disable_device,
+	.window_alignment = cxl_pci_window_alignment,
+	.reset_secondary_bus = cxl_pci_reset_secondary_bus,
+	.setup_msi_irqs = cxl_setup_msi_irqs,
+	.teardown_msi_irqs = cxl_teardown_msi_irqs,
+	.dma_set_mask = cxl_dma_set_mask,
+};
+
+int cxl_pci_vphb_add(struct cxl_afu *afu)
+{
+	struct pci_dev *phys_dev;
+	struct pci_controller *phb, *phys_phb;
+
+	phys_dev = to_pci_dev(afu->adapter->dev.parent);
+	phys_phb = pci_bus_to_host(phys_dev->bus);
+
+	/* Alloc and setup PHB data structure */
+	phb = pcibios_alloc_controller(phys_phb->dn);
+
+	if (!phb)
+		return -ENODEV;
+
+	/* Setup parent in sysfs */
+	phb->parent = &phys_dev->dev;
+
+	/* Setup the PHB using arch provided callback */
+	phb->ops = &cxl_pcie_pci_ops;
+	phb->cfg_addr = afu->afu_desc_mmio + afu->crs_offset;
+	phb->cfg_data = (void *)(u64)afu->crs_len;
+	phb->private_data = afu;
+	phb->controller_ops = cxl_pci_controller_ops;
+
+	/* Scan the bus */
+	pcibios_scan_phb(phb);
+	if (phb->bus == NULL)
+		return -ENXIO;
+
+	/* Claim resources. This might need some rework as well depending
+	 * whether we are doing probe-only or not, like assigning unassigned
+	 * resources etc...
+	 */
+	pcibios_claim_one_bus(phb->bus);
+
+	/* Add probed PCI devices to the device model */
+	pci_bus_add_devices(phb->bus);
+
+	afu->phb = phb;
+
+	return 0;
+}
+
+void cxl_pci_vphb_reconfigure(struct cxl_afu *afu)
+{
+	/* When we are reconfigured, the AFU's MMIO space is unmapped
+	 * and remapped. We need to reflect this in the PHB's view of
+	 * the world.
+	 */
+	afu->phb->cfg_addr = afu->afu_desc_mmio + afu->crs_offset;
+}
+
+void cxl_pci_vphb_remove(struct cxl_afu *afu)
+{
+	struct pci_controller *phb;
+
+	/* If there is no configuration record we won't have one of these */
+	if (!afu || !afu->phb)
+		return;
+
+	phb = afu->phb;
+	afu->phb = NULL;
+
+	pci_remove_root_bus(phb->bus);
+	pcibios_free_controller(phb);
+}
+
+struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev)
+{
+	struct pci_controller *phb;
+
+	phb = pci_bus_to_host(dev->bus);
+
+	return (struct cxl_afu *)phb->private_data;
+}
+EXPORT_SYMBOL_GPL(cxl_pci_to_afu);
+
+unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev)
+{
+	return cxl_pcie_cfg_record(dev->bus->number, dev->devfn);
+}
+EXPORT_SYMBOL_GPL(cxl_pci_to_cfg_record);
diff --git a/kernel/drivers/misc/ds1682.c b/kernel/drivers/misc/ds1682.c
index b909fb302..c7112276a 100644
--- a/kernel/drivers/misc/ds1682.c
+++ b/kernel/drivers/misc/ds1682.c
@@ -148,12 +148,6 @@ static ssize_t ds1682_eeprom_read(struct file *filp, struct kobject *kobj,
 	dev_dbg(&client->dev, "ds1682_eeprom_read(p=%p, off=%lli, c=%zi)\n",
 		buf, off, count);
 
-	if (off >= DS1682_EEPROM_SIZE)
-		return 0;
-
-	if (off + count > DS1682_EEPROM_SIZE)
-		count = DS1682_EEPROM_SIZE - off;
-
 	rc = i2c_smbus_read_i2c_block_data(client, DS1682_REG_EEPROM + off,
 					   count, buf);
 	if (rc < 0)
@@ -171,12 +165,6 @@ static ssize_t ds1682_eeprom_write(struct file *filp, struct kobject *kobj,
 	dev_dbg(&client->dev, "ds1682_eeprom_write(p=%p, off=%lli, c=%zi)\n",
 		buf, off, count);
 
-	if (off >= DS1682_EEPROM_SIZE)
-		return -ENOSPC;
-
-	if (off + count > DS1682_EEPROM_SIZE)
-		count = DS1682_EEPROM_SIZE - off;
-
 	/* Write out to the device */
 	if (i2c_smbus_write_i2c_block_data(client, DS1682_REG_EEPROM + off,
 					   count, buf) < 0)
diff --git a/kernel/drivers/misc/eeprom/Kconfig b/kernel/drivers/misc/eeprom/Kconfig
index 9536852fd..04f2e1fa9 100644
--- a/kernel/drivers/misc/eeprom/Kconfig
+++ b/kernel/drivers/misc/eeprom/Kconfig
@@ -96,17 +96,4 @@ config EEPROM_DIGSY_MTC_CFG
 
 	  If unsure, say N.
 
-config EEPROM_SUNXI_SID
-	tristate "Allwinner sunxi security ID support"
-	depends on ARCH_SUNXI && SYSFS
-	help
-	  This is a driver for the 'security ID' available on various Allwinner
-	  devices.
-
-	  Due to the potential risks involved with changing e-fuses,
-	  this driver is read-only.
-
-	  This driver can also be built as a module. If so, the module
-	  will be called sunxi_sid.
-
 endmenu
diff --git a/kernel/drivers/misc/eeprom/Makefile b/kernel/drivers/misc/eeprom/Makefile
index 9507aec95..fc1e81d29 100644
--- a/kernel/drivers/misc/eeprom/Makefile
+++ b/kernel/drivers/misc/eeprom/Makefile
@@ -4,5 +4,4 @@ obj-$(CONFIG_EEPROM_LEGACY)	+= eeprom.o
 obj-$(CONFIG_EEPROM_MAX6875)	+= max6875.o
 obj-$(CONFIG_EEPROM_93CX6)	+= eeprom_93cx6.o
 obj-$(CONFIG_EEPROM_93XX46)	+= eeprom_93xx46.o
-obj-$(CONFIG_EEPROM_SUNXI_SID)	+= sunxi_sid.o
 obj-$(CONFIG_EEPROM_DIGSY_MTC_CFG) += digsy_mtc_eeprom.o
diff --git a/kernel/drivers/misc/eeprom/at24.c b/kernel/drivers/misc/eeprom/at24.c
index 2d3db81be..5d7c0900f 100644
--- a/kernel/drivers/misc/eeprom/at24.c
+++ b/kernel/drivers/misc/eeprom/at24.c
@@ -21,6 +21,7 @@
 #include <linux/bitops.h>
 #include <linux/jiffies.h>
 #include <linux/of.h>
+#include <linux/acpi.h>
 #include <linux/i2c.h>
 #include <linux/platform_data/at24.h>
 
@@ -131,6 +132,12 @@ static const struct i2c_device_id at24_ids[] = {
 };
 MODULE_DEVICE_TABLE(i2c, at24_ids);
 
+static const struct acpi_device_id at24_acpi_ids[] = {
+	{ "INT3499", AT24_DEVICE_MAGIC(8192 / 8, 0) },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, at24_acpi_ids);
+
 /*-------------------------------------------------------------------------*/
 
 /*
@@ -186,19 +193,11 @@ static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf,
 	if (count > io_limit)
 		count = io_limit;
 
-	switch (at24->use_smbus) {
-	case I2C_SMBUS_I2C_BLOCK_DATA:
+	if (at24->use_smbus) {
 		/* Smaller eeproms can work given some SMBus extension calls */
 		if (count > I2C_SMBUS_BLOCK_MAX)
 			count = I2C_SMBUS_BLOCK_MAX;
-		break;
-	case I2C_SMBUS_WORD_DATA:
-		count = 2;
-		break;
-	case I2C_SMBUS_BYTE_DATA:
-		count = 1;
-		break;
-	default:
+	} else {
 		/*
 		 * When we have a better choice than SMBus calls, use a
 		 * combined I2C message. Write address; then read up to
@@ -229,27 +228,10 @@ static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf,
 	timeout = jiffies + msecs_to_jiffies(write_timeout);
 	do {
 		read_time = jiffies;
-		switch (at24->use_smbus) {
-		case I2C_SMBUS_I2C_BLOCK_DATA:
-			status = i2c_smbus_read_i2c_block_data(client, offset,
-					count, buf);
-			break;
-		case I2C_SMBUS_WORD_DATA:
-			status = i2c_smbus_read_word_data(client, offset);
-			if (status >= 0) {
-				buf[0] = status & 0xff;
-				buf[1] = status >> 8;
-				status = count;
-			}
-			break;
-		case I2C_SMBUS_BYTE_DATA:
-			status = i2c_smbus_read_byte_data(client, offset);
-			if (status >= 0) {
-				buf[0] = status;
-				status = count;
-			}
-			break;
-		default:
+		if (at24->use_smbus) {
+			status = i2c_smbus_read_i2c_block_data_or_emulated(client, offset,
+									   count, buf);
+		} else {
 			status = i2c_transfer(client->adapter, msg, 2);
 			if (status == 2)
 				status = count;
@@ -438,9 +420,6 @@ static ssize_t at24_bin_write(struct file *filp, struct kobject *kobj,
 {
 	struct at24_data *at24;
 
-	if (unlikely(off >= attr->size))
-		return -EFBIG;
-
 	at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
 	return at24_write(at24, buf, off, count);
 }
@@ -495,21 +474,29 @@ static void at24_get_ofdata(struct i2c_client *client,
 static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
 	struct at24_platform_data chip;
+	kernel_ulong_t magic = 0;
 	bool writable;
 	int use_smbus = 0;
 	int use_smbus_write = 0;
 	struct at24_data *at24;
 	int err;
 	unsigned i, num_addresses;
-	kernel_ulong_t magic;
 
 	if (client->dev.platform_data) {
 		chip = *(struct at24_platform_data *)client->dev.platform_data;
 	} else {
-		if (!id->driver_data)
+		if (id) {
+			magic = id->driver_data;
+		} else {
+			const struct acpi_device_id *aid;
+
+			aid = acpi_match_device(at24_acpi_ids, &client->dev);
+			if (aid)
+				magic = aid->driver_data;
+		}
+		if (!magic)
 			return -ENODEV;
 
-		magic = id->driver_data;
 		chip.byte_len = BIT(magic & AT24_BITMASK(AT24_SIZE_BYTELEN));
 		magic >>= AT24_SIZE_BYTELEN;
 		chip.flags = magic & AT24_BITMASK(AT24_SIZE_FLAGS);
@@ -689,7 +676,7 @@ static int at24_remove(struct i2c_client *client)
 static struct i2c_driver at24_driver = {
 	.driver = {
 		.name = "at24",
-		.owner = THIS_MODULE,
+		.acpi_match_table = ACPI_PTR(at24_acpi_ids),
 	},
 	.probe = at24_probe,
 	.remove = at24_remove,
diff --git a/kernel/drivers/misc/eeprom/at25.c b/kernel/drivers/misc/eeprom/at25.c
index 0a1af93ec..f850ef556 100644
--- a/kernel/drivers/misc/eeprom/at25.c
+++ b/kernel/drivers/misc/eeprom/at25.c
@@ -462,7 +462,6 @@ MODULE_DEVICE_TABLE(of, at25_of_match);
 static struct spi_driver at25_driver = {
 	.driver = {
 		.name		= "at25",
-		.owner		= THIS_MODULE,
 		.of_match_table = at25_of_match,
 	},
 	.probe		= at25_probe,
diff --git a/kernel/drivers/misc/eeprom/eeprom.c b/kernel/drivers/misc/eeprom/eeprom.c
index b432873de..7342fd637 100644
--- a/kernel/drivers/misc/eeprom/eeprom.c
+++ b/kernel/drivers/misc/eeprom/eeprom.c
@@ -88,11 +88,6 @@ static ssize_t eeprom_read(struct file *filp, struct kobject *kobj,
 	struct eeprom_data *data = i2c_get_clientdata(client);
 	u8 slice;
 
-	if (off > EEPROM_SIZE)
-		return 0;
-	if (off + count > EEPROM_SIZE)
-		count = EEPROM_SIZE - off;
-
 	/* Only refresh slices which contain requested bytes */
 	for (slice = off >> 5; slice <= (off + count - 1) >> 5; slice++)
 		eeprom_update_client(client, slice);
diff --git a/kernel/drivers/misc/eeprom/eeprom_93xx46.c b/kernel/drivers/misc/eeprom/eeprom_93xx46.c
index 9ebeacdb8..ff63f05ed 100644
--- a/kernel/drivers/misc/eeprom/eeprom_93xx46.c
+++ b/kernel/drivers/misc/eeprom/eeprom_93xx46.c
@@ -48,13 +48,6 @@ eeprom_93xx46_bin_read(struct file *filp, struct kobject *kobj,
 	dev = container_of(kobj, struct device, kobj);
 	edev = dev_get_drvdata(dev);
 
-	if (unlikely(off >= edev->bin.size))
-		return 0;
-	if ((off + count) > edev->bin.size)
-		count = edev->bin.size - off;
-	if (unlikely(!count))
-		return count;
-
 	cmd_addr = OP_READ << edev->addrlen;
 
 	if (edev->addrlen == 7) {
@@ -200,13 +193,6 @@ eeprom_93xx46_bin_write(struct file *filp, struct kobject *kobj,
 	dev = container_of(kobj, struct device, kobj);
 	edev = dev_get_drvdata(dev);
 
-	if (unlikely(off >= edev->bin.size))
-		return -EFBIG;
-	if ((off + count) > edev->bin.size)
-		count = edev->bin.size - off;
-	if (unlikely(!count))
-		return count;
-
 	/* only write even number of bytes on 16-bit devices */
 	if (edev->addrlen == 6) {
 		step = 2;
@@ -384,7 +370,6 @@ static int eeprom_93xx46_remove(struct spi_device *spi)
 static struct spi_driver eeprom_93xx46_driver = {
 	.driver = {
 		.name	= "93xx46",
-		.owner	= THIS_MODULE,
 	},
 	.probe		= eeprom_93xx46_probe,
 	.remove		= eeprom_93xx46_remove,
diff --git a/kernel/drivers/misc/eeprom/max6875.c b/kernel/drivers/misc/eeprom/max6875.c
index 580ff9df5..e4dd93b25 100644
--- a/kernel/drivers/misc/eeprom/max6875.c
+++ b/kernel/drivers/misc/eeprom/max6875.c
@@ -114,12 +114,6 @@ static ssize_t max6875_read(struct file *filp, struct kobject *kobj,
 	struct max6875_data *data = i2c_get_clientdata(client);
 	int slice, max_slice;
 
-	if (off > USER_EEPROM_SIZE)
-		return 0;
-
-	if (off + count > USER_EEPROM_SIZE)
-		count = USER_EEPROM_SIZE - off;
-
 	/* refresh slices which contain requested bytes */
 	max_slice = (off + count - 1) >> SLICE_BITS;
 	for (slice = (off >> SLICE_BITS); slice <= max_slice; slice++)
@@ -197,6 +191,7 @@ static const struct i2c_device_id max6875_id[] = {
 	{ "max6875", 0 },
 	{ }
 };
+MODULE_DEVICE_TABLE(i2c, max6875_id);
 
 static struct i2c_driver max6875_driver = {
 	.driver = {
diff --git a/kernel/drivers/misc/eeprom/sunxi_sid.c b/kernel/drivers/misc/eeprom/sunxi_sid.c
deleted file mode 100644
index 8385177ff..000000000
--- a/kernel/drivers/misc/eeprom/sunxi_sid.c
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2013 Oliver Schinagl <oliver@schinagl.nl>
- * http://www.linux-sunxi.org
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * This driver exposes the Allwinner security ID, efuses exported in byte-
- * sized chunks.
- */
-
-#include <linux/compiler.h>
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/export.h>
-#include <linux/fs.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/kobject.h>
-#include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/platform_device.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/stat.h>
-#include <linux/sysfs.h>
-#include <linux/types.h>
-
-#define DRV_NAME "sunxi-sid"
-
-struct sunxi_sid_data {
-	void __iomem *reg_base;
-	unsigned int keysize;
-};
-
-/* We read the entire key, due to a 32 bit read alignment requirement. Since we
- * want to return the requested byte, this results in somewhat slower code and
- * uses 4 times more reads as needed but keeps code simpler. Since the SID is
- * only very rarely probed, this is not really an issue.
- */
-static u8 sunxi_sid_read_byte(const struct sunxi_sid_data *sid_data,
-			      const unsigned int offset)
-{
-	u32 sid_key;
-
-	if (offset >= sid_data->keysize)
-		return 0;
-
-	sid_key = ioread32be(sid_data->reg_base + round_down(offset, 4));
-	sid_key >>= (offset % 4) * 8;
-
-	return sid_key; /* Only return the last byte */
-}
-
-static ssize_t sid_read(struct file *fd, struct kobject *kobj,
-			struct bin_attribute *attr, char *buf,
-			loff_t pos, size_t size)
-{
-	struct platform_device *pdev;
-	struct sunxi_sid_data *sid_data;
-	int i;
-
-	pdev = to_platform_device(kobj_to_dev(kobj));
-	sid_data = platform_get_drvdata(pdev);
-
-	if (pos < 0 || pos >= sid_data->keysize)
-		return 0;
-	if (size > sid_data->keysize - pos)
-		size = sid_data->keysize - pos;
-
-	for (i = 0; i < size; i++)
-		buf[i] = sunxi_sid_read_byte(sid_data, pos + i);
-
-	return i;
-}
-
-static struct bin_attribute sid_bin_attr = {
-	.attr = { .name = "eeprom", .mode = S_IRUGO, },
-	.read = sid_read,
-};
-
-static int sunxi_sid_remove(struct platform_device *pdev)
-{
-	device_remove_bin_file(&pdev->dev, &sid_bin_attr);
-	dev_dbg(&pdev->dev, "driver unloaded\n");
-
-	return 0;
-}
-
-static const struct of_device_id sunxi_sid_of_match[] = {
-	{ .compatible = "allwinner,sun4i-a10-sid", .data = (void *)16},
-	{ .compatible = "allwinner,sun7i-a20-sid", .data = (void *)512},
-	{/* sentinel */},
-};
-MODULE_DEVICE_TABLE(of, sunxi_sid_of_match);
-
-static int sunxi_sid_probe(struct platform_device *pdev)
-{
-	struct sunxi_sid_data *sid_data;
-	struct resource *res;
-	const struct of_device_id *of_dev_id;
-	u8 *entropy;
-	unsigned int i;
-
-	sid_data = devm_kzalloc(&pdev->dev, sizeof(struct sunxi_sid_data),
-				GFP_KERNEL);
-	if (!sid_data)
-		return -ENOMEM;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	sid_data->reg_base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(sid_data->reg_base))
-		return PTR_ERR(sid_data->reg_base);
-
-	of_dev_id = of_match_device(sunxi_sid_of_match, &pdev->dev);
-	if (!of_dev_id)
-		return -ENODEV;
-	sid_data->keysize = (int)of_dev_id->data;
-
-	platform_set_drvdata(pdev, sid_data);
-
-	sid_bin_attr.size = sid_data->keysize;
-	if (device_create_bin_file(&pdev->dev, &sid_bin_attr))
-		return -ENODEV;
-
-	entropy = kzalloc(sizeof(u8) * sid_data->keysize, GFP_KERNEL);
-	for (i = 0; i < sid_data->keysize; i++)
-		entropy[i] = sunxi_sid_read_byte(sid_data, i);
-	add_device_randomness(entropy, sid_data->keysize);
-	kfree(entropy);
-
-	dev_dbg(&pdev->dev, "loaded\n");
-
-	return 0;
-}
-
-static struct platform_driver sunxi_sid_driver = {
-	.probe = sunxi_sid_probe,
-	.remove = sunxi_sid_remove,
-	.driver = {
-		.name = DRV_NAME,
-		.of_match_table = sunxi_sid_of_match,
-	},
-};
-module_platform_driver(sunxi_sid_driver);
-
-MODULE_AUTHOR("Oliver Schinagl <oliver@schinagl.nl>");
-MODULE_DESCRIPTION("Allwinner sunxi security id driver");
-MODULE_LICENSE("GPL");
diff --git a/kernel/drivers/misc/genwqe/card_base.h b/kernel/drivers/misc/genwqe/card_base.h
index e73534498..cb851c14c 100644
--- a/kernel/drivers/misc/genwqe/card_base.h
+++ b/kernel/drivers/misc/genwqe/card_base.h
@@ -514,7 +514,7 @@ int  __genwqe_execute_ddcb(struct genwqe_dev *cd,
 /**
  * __genwqe_execute_raw_ddcb() - Execute DDCB request without addr translation
  *
- * This version will not do address translation or any modifcation of
+ * This version will not do address translation or any modification of
  * the DDCB data. It is used e.g. for the MoveFlash DDCB which is
  * entirely prepared by the driver itself. That means the appropriate
  * DMA addresses are already in the DDCB and do not need any
diff --git a/kernel/drivers/misc/genwqe/card_ddcb.c b/kernel/drivers/misc/genwqe/card_ddcb.c
index 6d51e5f08..353ee0cc7 100644
--- a/kernel/drivers/misc/genwqe/card_ddcb.c
+++ b/kernel/drivers/misc/genwqe/card_ddcb.c
@@ -203,7 +203,7 @@ struct genwqe_ddcb_cmd *ddcb_requ_alloc(void)
 {
 	struct ddcb_requ *req;
 
-	req = kzalloc(sizeof(*req), GFP_ATOMIC);
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
 	if (!req)
 		return NULL;
 
diff --git a/kernel/drivers/misc/genwqe/card_dev.c b/kernel/drivers/misc/genwqe/card_dev.c
index c49d24426..7f1b282d7 100644
--- a/kernel/drivers/misc/genwqe/card_dev.c
+++ b/kernel/drivers/misc/genwqe/card_dev.c
@@ -418,7 +418,7 @@ static void genwqe_vma_close(struct vm_area_struct *vma)
 	kfree(dma_map);
 }
 
-static struct vm_operations_struct genwqe_vma_ops = {
+static const struct vm_operations_struct genwqe_vma_ops = {
 	.open   = genwqe_vma_open,
 	.close  = genwqe_vma_close,
 };
@@ -449,7 +449,7 @@ static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma)
 	if (get_order(vsize) > MAX_ORDER)
 		return -ENOMEM;
 
-	dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC);
+	dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL);
 	if (dma_map == NULL)
 		return -ENOMEM;
 
@@ -785,7 +785,7 @@ static int genwqe_pin_mem(struct genwqe_file *cfile, struct genwqe_mem *m)
 	map_addr = (m->addr & PAGE_MASK);
 	map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE);
 
-	dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC);
+	dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL);
 	if (dma_map == NULL)
 		return -ENOMEM;
 
diff --git a/kernel/drivers/misc/genwqe/card_utils.c b/kernel/drivers/misc/genwqe/card_utils.c
index 1ca94e6fa..222367cc8 100644
--- a/kernel/drivers/misc/genwqe/card_utils.c
+++ b/kernel/drivers/misc/genwqe/card_utils.c
@@ -220,7 +220,8 @@ void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size,
 	if (get_order(size) > MAX_ORDER)
 		return NULL;
 
-	return pci_alloc_consistent(cd->pci_dev, size, dma_handle);
+	return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle,
+				  GFP_KERNEL);
 }
 
 void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
@@ -229,7 +230,7 @@ void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
 	if (vaddr == NULL)
 		return;
 
-	pci_free_consistent(cd->pci_dev, size, vaddr, dma_handle);
+	dma_free_coherent(&cd->pci_dev->dev, size, vaddr, dma_handle);
 }
 
 static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list,
diff --git a/kernel/drivers/misc/hpilo.c b/kernel/drivers/misc/hpilo.c
index b83e3ca12..d6a901cd4 100644
--- a/kernel/drivers/misc/hpilo.c
+++ b/kernel/drivers/misc/hpilo.c
@@ -2,7 +2,7 @@
  * Driver for the HP iLO management processor.
  *
  * Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
- *	David Altobelli <david.altobelli@hp.com>
+ *	David Altobelli <david.altobelli@hpe.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -902,11 +902,11 @@ static void __exit ilo_exit(void)
 MODULE_VERSION("1.4.1");
 MODULE_ALIAS(ILO_NAME);
 MODULE_DESCRIPTION(ILO_NAME);
-MODULE_AUTHOR("David Altobelli <david.altobelli@hp.com>");
+MODULE_AUTHOR("David Altobelli <david.altobelli@hpe.com>");
 MODULE_LICENSE("GPL v2");
 
 module_param(max_ccb, uint, 0444);
-MODULE_PARM_DESC(max_ccb, "Maximum number of HP iLO channels to attach (16)");
+MODULE_PARM_DESC(max_ccb, "Maximum number of HP iLO channels to attach (8-24)(default=16)");
 
 module_init(ilo_init);
 module_exit(ilo_exit);
diff --git a/kernel/drivers/misc/hwlat_detector.c b/kernel/drivers/misc/hwlat_detector.c
index 2429c4331..52f5ad5fd 100644
--- a/kernel/drivers/misc/hwlat_detector.c
+++ b/kernel/drivers/misc/hwlat_detector.c
@@ -616,7 +616,7 @@ static ssize_t  debug_enable_fwrite(struct file *filp,
 
 	buf[sizeof(buf)-1] = '\0';			/* just in case */
 	err = kstrtoul(buf, 10, &val);
-	if (0 != err)
+	if (err)
 		return -EINVAL;
 
 	if (val) {
@@ -921,7 +921,7 @@ static ssize_t  debug_width_fwrite(struct file *filp,
 
 	buf[U64STR_SIZE-1] = '\0';			/* just in case */
 	err = kstrtoull(buf, 10, &val);
-	if (0 != err)
+	if (err)
 		return -EINVAL;
 
 	mutex_lock(&data.lock);
@@ -1005,7 +1005,7 @@ static ssize_t  debug_window_fwrite(struct file *filp,
 
 	buf[U64STR_SIZE-1] = '\0';			/* just in case */
 	err = kstrtoull(buf, 10, &val);
-	if (0 != err)
+	if (err)
 		return -EINVAL;
 
 	mutex_lock(&data.lock);
@@ -1198,11 +1198,11 @@ static int detector_init(void)
 	pr_info(BANNER "version %s\n", VERSION);
 
 	ret = init_stats();
-	if (0 != ret)
+	if (ret)
 		goto out;
 
 	ret = init_debugfs();
-	if (0 != ret)
+	if (ret)
 		goto err_stats;
 
 	if (enabled)
diff --git a/kernel/drivers/misc/isl29003.c b/kernel/drivers/misc/isl29003.c
index 12c30b486..976df0013 100644
--- a/kernel/drivers/misc/isl29003.c
+++ b/kernel/drivers/misc/isl29003.c
@@ -465,7 +465,6 @@ MODULE_DEVICE_TABLE(i2c, isl29003_id);
 static struct i2c_driver isl29003_driver = {
 	.driver = {
 		.name	= ISL29003_DRV_NAME,
-		.owner	= THIS_MODULE,
 		.pm	= ISL29003_PM_OPS,
 	},
 	.probe	= isl29003_probe,
diff --git a/kernel/drivers/misc/kgdbts.c b/kernel/drivers/misc/kgdbts.c
index 36f5d5277..99635dd9d 100644
--- a/kernel/drivers/misc/kgdbts.c
+++ b/kernel/drivers/misc/kgdbts.c
@@ -220,7 +220,7 @@ static unsigned long lookup_addr(char *arg)
 	else if (!strcmp(arg, "sys_open"))
 		addr = (unsigned long)do_sys_open;
 	else if (!strcmp(arg, "do_fork"))
-		addr = (unsigned long)do_fork;
+		addr = (unsigned long)_do_fork;
 	else if (!strcmp(arg, "hw_break_val"))
 		addr = (unsigned long)&hw_break_val;
 	addr = (unsigned long) dereference_function_descriptor((void *)addr);
@@ -1112,6 +1112,7 @@ static int __init init_kgdbts(void)
 
 	return configure_kgdbts();
 }
+device_initcall(init_kgdbts);
 
 static int kgdbts_get_char(void)
 {
@@ -1180,10 +1181,9 @@ static struct kgdb_io kgdbts_io_ops = {
 	.post_exception		= kgdbts_post_exp_handler,
 };
 
-module_init(init_kgdbts);
+/*
+ * not really modular, but the easiest way to keep compat with existing
+ * bootargs behaviour is to continue using module_param here.
+ */
 module_param_call(kgdbts, param_set_kgdbts_var, param_get_string, &kps, 0644);
 MODULE_PARM_DESC(kgdbts, "<A|V1|V2>[F#|S#][N#]");
-MODULE_DESCRIPTION("KGDB Test Suite");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Wind River Systems, Inc.");
-
diff --git a/kernel/drivers/misc/lattice-ecp3-config.c b/kernel/drivers/misc/lattice-ecp3-config.c
index c544f1f50..626fdcaf2 100644
--- a/kernel/drivers/misc/lattice-ecp3-config.c
+++ b/kernel/drivers/misc/lattice-ecp3-config.c
@@ -235,7 +235,6 @@ MODULE_DEVICE_TABLE(spi, lattice_ecp3_id);
 static struct spi_driver lattice_ecp3_driver = {
 	.driver = {
 		.name = "lattice-ecp3",
-		.owner = THIS_MODULE,
 	},
 	.probe = lattice_ecp3_probe,
 	.remove = lattice_ecp3_remove,
diff --git a/kernel/drivers/misc/lis3lv02d/lis3lv02d.c b/kernel/drivers/misc/lis3lv02d/lis3lv02d.c
index 4739689d2..fb8705fc3 100644
--- a/kernel/drivers/misc/lis3lv02d/lis3lv02d.c
+++ b/kernel/drivers/misc/lis3lv02d/lis3lv02d.c
@@ -115,7 +115,7 @@ static int param_set_axis(const char *val, const struct kernel_param *kp)
 	return ret;
 }
 
-static struct kernel_param_ops param_ops_axis = {
+static const struct kernel_param_ops param_ops_axis = {
 	.set = param_set_axis,
 	.get = param_get_int,
 };
diff --git a/kernel/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/kernel/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
index e3e7f1dc2..0c3bb7e3e 100644
--- a/kernel/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
+++ b/kernel/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
@@ -274,7 +274,6 @@ static const struct dev_pm_ops lis3_pm_ops = {
 static struct i2c_driver lis3lv02d_i2c_driver = {
 	.driver	 = {
 		.name   = DRV_NAME,
-		.owner  = THIS_MODULE,
 		.pm     = &lis3_pm_ops,
 		.of_match_table = of_match_ptr(lis3lv02d_i2c_dt_ids),
 	},
diff --git a/kernel/drivers/misc/lis3lv02d/lis3lv02d_spi.c b/kernel/drivers/misc/lis3lv02d/lis3lv02d_spi.c
index b2f6e1651..e57547512 100644
--- a/kernel/drivers/misc/lis3lv02d/lis3lv02d_spi.c
+++ b/kernel/drivers/misc/lis3lv02d/lis3lv02d_spi.c
@@ -138,7 +138,6 @@ static SIMPLE_DEV_PM_OPS(lis3lv02d_spi_pm, lis3lv02d_spi_suspend,
 static struct spi_driver lis302dl_spi_driver = {
 	.driver	 = {
 		.name   = DRV_NAME,
-		.owner  = THIS_MODULE,
 		.pm	= &lis3lv02d_spi_pm,
 		.of_match_table = of_match_ptr(lis302dl_spi_dt_ids),
 	},
diff --git a/kernel/drivers/misc/lkdtm.c b/kernel/drivers/misc/lkdtm.c
index b5abe3412..11fdadc68 100644
--- a/kernel/drivers/misc/lkdtm.c
+++ b/kernel/drivers/misc/lkdtm.c
@@ -472,7 +472,7 @@ static void lkdtm_do_action(enum ctype which)
 		break;
 	}
 	case CT_ACCESS_USERSPACE: {
-		unsigned long user_addr, tmp;
+		unsigned long user_addr, tmp = 0;
 		unsigned long *ptr;
 
 		user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
@@ -483,6 +483,12 @@ static void lkdtm_do_action(enum ctype which)
 			return;
 		}
 
+		if (copy_to_user((void __user *)user_addr, &tmp, sizeof(tmp))) {
+			pr_warn("copy_to_user failed\n");
+			vm_munmap(user_addr, PAGE_SIZE);
+			return;
+		}
+
 		ptr = (unsigned long *)user_addr;
 
 		pr_info("attempting bad read at %p\n", ptr);
diff --git a/kernel/drivers/misc/mei/Makefile b/kernel/drivers/misc/mei/Makefile
index 518914a82..01447ca21 100644
--- a/kernel/drivers/misc/mei/Makefile
+++ b/kernel/drivers/misc/mei/Makefile
@@ -11,7 +11,7 @@ mei-objs += main.o
 mei-objs += amthif.o
 mei-objs += wd.o
 mei-objs += bus.o
-mei-objs += nfc.o
+mei-objs += bus-fixup.o
 mei-$(CONFIG_DEBUG_FS) += debugfs.o
 
 obj-$(CONFIG_INTEL_MEI_ME) += mei-me.o
diff --git a/kernel/drivers/misc/mei/amthif.c b/kernel/drivers/misc/mei/amthif.c
index d2cd53e3f..cd0403f09 100644
--- a/kernel/drivers/misc/mei/amthif.c
+++ b/kernel/drivers/misc/mei/amthif.c
@@ -59,46 +59,29 @@ void mei_amthif_reset_params(struct mei_device *dev)
  * mei_amthif_host_init - mei initialization amthif client.
  *
  * @dev: the device structure
+ * @me_cl: me client
  *
  * Return: 0 on success, <0 on failure.
  */
-int mei_amthif_host_init(struct mei_device *dev)
+int mei_amthif_host_init(struct mei_device *dev, struct mei_me_client *me_cl)
 {
 	struct mei_cl *cl = &dev->iamthif_cl;
-	struct mei_me_client *me_cl;
 	int ret;
 
 	dev->iamthif_state = MEI_IAMTHIF_IDLE;
 
 	mei_cl_init(cl, dev);
 
-	me_cl = mei_me_cl_by_uuid(dev, &mei_amthif_guid);
-	if (!me_cl) {
-		dev_info(dev->dev, "amthif: failed to find the client");
-		return -ENOTTY;
-	}
-
-	cl->me_client_id = me_cl->client_id;
-	cl->cl_uuid = me_cl->props.protocol_name;
-
-	/* Assign iamthif_mtu to the value received from ME  */
-
-	dev->iamthif_mtu = me_cl->props.max_msg_length;
-	dev_dbg(dev->dev, "IAMTHIF_MTU = %d\n", dev->iamthif_mtu);
-
-
 	ret = mei_cl_link(cl, MEI_IAMTHIF_HOST_CLIENT_ID);
 	if (ret < 0) {
 		dev_err(dev->dev, "amthif: failed cl_link %d\n", ret);
-		goto out;
+		return ret;
 	}
 
-	ret = mei_cl_connect(cl, NULL);
+	ret = mei_cl_connect(cl, me_cl, NULL);
 
 	dev->iamthif_state = MEI_IAMTHIF_IDLE;
 
-out:
-	mei_me_cl_put(me_cl);
 	return ret;
 }
 
@@ -250,7 +233,6 @@ static int mei_amthif_read_start(struct mei_cl *cl, struct file *file)
 {
 	struct mei_device *dev = cl->dev;
 	struct mei_cl_cb *cb;
-	size_t length = dev->iamthif_mtu;
 	int rets;
 
 	cb = mei_io_cb_init(cl, MEI_FOP_READ, file);
@@ -259,7 +241,7 @@ static int mei_amthif_read_start(struct mei_cl *cl, struct file *file)
 		goto err;
 	}
 
-	rets = mei_io_cb_alloc_buf(cb, length);
+	rets = mei_io_cb_alloc_buf(cb, mei_cl_mtu(cl));
 	if (rets)
 		goto err;
 
@@ -476,7 +458,7 @@ void mei_amthif_complete(struct mei_device *dev, struct mei_cl_cb *cb)
 		return;
 	}
 
-	if (dev->iamthif_canceled != 1) {
+	if (!dev->iamthif_canceled) {
 		dev->iamthif_state = MEI_IAMTHIF_READ_COMPLETE;
 		dev->iamthif_stall_timer = 0;
 		list_add_tail(&cb->list, &dev->amthif_rd_complete_list.list);
diff --git a/kernel/drivers/misc/mei/bus-fixup.c b/kernel/drivers/misc/mei/bus-fixup.c
new file mode 100644
index 000000000..020de5919
--- /dev/null
+++ b/kernel/drivers/misc/mei/bus-fixup.c
@@ -0,0 +1,306 @@
+/*
+ *
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Copyright (c) 2003-2013, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+
+#include <linux/mei_cl_bus.h>
+
+#include "mei_dev.h"
+#include "client.h"
+
+#define MEI_UUID_NFC_INFO UUID_LE(0xd2de1625, 0x382d, 0x417d, \
+			0x48, 0xa4, 0xef, 0xab, 0xba, 0x8a, 0x12, 0x06)
+
+static const uuid_le mei_nfc_info_guid = MEI_UUID_NFC_INFO;
+
+#define MEI_UUID_NFC_HCI UUID_LE(0x0bb17a78, 0x2a8e, 0x4c50, \
+			0x94, 0xd4, 0x50, 0x26, 0x67, 0x23, 0x77, 0x5c)
+
+#define MEI_UUID_ANY NULL_UUID_LE
+
+/**
+ * number_of_connections - determine whether an client be on the bus
+ *    according number of connections
+ *    We support only clients:
+ *       1. with single connection
+ *       2. and fixed clients (max_number_of_connections == 0)
+ *
+ * @cldev: me clients device
+ */
+static void number_of_connections(struct mei_cl_device *cldev)
+{
+	dev_dbg(&cldev->dev, "running hook %s on %pUl\n",
+			__func__, mei_me_cl_uuid(cldev->me_cl));
+
+	if (cldev->me_cl->props.max_number_of_connections > 1)
+		cldev->do_match = 0;
+}
+
+/**
+ * blacklist - blacklist a client from the bus
+ *
+ * @cldev: me clients device
+ */
+static void blacklist(struct mei_cl_device *cldev)
+{
+	dev_dbg(&cldev->dev, "running hook %s on %pUl\n",
+			__func__, mei_me_cl_uuid(cldev->me_cl));
+	cldev->do_match = 0;
+}
+
+struct mei_nfc_cmd {
+	u8 command;
+	u8 status;
+	u16 req_id;
+	u32 reserved;
+	u16 data_size;
+	u8 sub_command;
+	u8 data[];
+} __packed;
+
+struct mei_nfc_reply {
+	u8 command;
+	u8 status;
+	u16 req_id;
+	u32 reserved;
+	u16 data_size;
+	u8 sub_command;
+	u8 reply_status;
+	u8 data[];
+} __packed;
+
+struct mei_nfc_if_version {
+	u8 radio_version_sw[3];
+	u8 reserved[3];
+	u8 radio_version_hw[3];
+	u8 i2c_addr;
+	u8 fw_ivn;
+	u8 vendor_id;
+	u8 radio_type;
+} __packed;
+
+
+#define MEI_NFC_CMD_MAINTENANCE 0x00
+#define MEI_NFC_SUBCMD_IF_VERSION 0x01
+
+/* Vendors */
+#define MEI_NFC_VENDOR_INSIDE 0x00
+#define MEI_NFC_VENDOR_NXP    0x01
+
+/* Radio types */
+#define MEI_NFC_VENDOR_INSIDE_UREAD 0x00
+#define MEI_NFC_VENDOR_NXP_PN544    0x01
+
+/**
+ * mei_nfc_if_version - get NFC interface version
+ *
+ * @cl: host client (nfc info)
+ * @ver: NFC interface version to be filled in
+ *
+ * Return: 0 on success; < 0 otherwise
+ */
+static int mei_nfc_if_version(struct mei_cl *cl,
+			      struct mei_nfc_if_version *ver)
+{
+	struct mei_device *bus;
+	struct mei_nfc_cmd cmd = {
+		.command = MEI_NFC_CMD_MAINTENANCE,
+		.data_size = 1,
+		.sub_command = MEI_NFC_SUBCMD_IF_VERSION,
+	};
+	struct mei_nfc_reply *reply = NULL;
+	size_t if_version_length;
+	int bytes_recv, ret;
+
+	bus = cl->dev;
+
+	WARN_ON(mutex_is_locked(&bus->device_lock));
+
+	ret = __mei_cl_send(cl, (u8 *)&cmd, sizeof(struct mei_nfc_cmd), 1);
+	if (ret < 0) {
+		dev_err(bus->dev, "Could not send IF version cmd\n");
+		return ret;
+	}
+
+	/* to be sure on the stack we alloc memory */
+	if_version_length = sizeof(struct mei_nfc_reply) +
+		sizeof(struct mei_nfc_if_version);
+
+	reply = kzalloc(if_version_length, GFP_KERNEL);
+	if (!reply)
+		return -ENOMEM;
+
+	ret = 0;
+	bytes_recv = __mei_cl_recv(cl, (u8 *)reply, if_version_length);
+	if (bytes_recv < 0 || bytes_recv < sizeof(struct mei_nfc_reply)) {
+		dev_err(bus->dev, "Could not read IF version\n");
+		ret = -EIO;
+		goto err;
+	}
+
+	memcpy(ver, reply->data, sizeof(struct mei_nfc_if_version));
+
+	dev_info(bus->dev, "NFC MEI VERSION: IVN 0x%x Vendor ID 0x%x Type 0x%x\n",
+		ver->fw_ivn, ver->vendor_id, ver->radio_type);
+
+err:
+	kfree(reply);
+	return ret;
+}
+
+/**
+ * mei_nfc_radio_name - derive nfc radio name from the interface version
+ *
+ * @ver: NFC radio version
+ *
+ * Return: radio name string
+ */
+static const char *mei_nfc_radio_name(struct mei_nfc_if_version *ver)
+{
+
+	if (ver->vendor_id == MEI_NFC_VENDOR_INSIDE) {
+		if (ver->radio_type == MEI_NFC_VENDOR_INSIDE_UREAD)
+			return "microread";
+	}
+
+	if (ver->vendor_id == MEI_NFC_VENDOR_NXP) {
+		if (ver->radio_type == MEI_NFC_VENDOR_NXP_PN544)
+			return "pn544";
+	}
+
+	return NULL;
+}
+
+/**
+ * mei_nfc - The nfc fixup function. The function retrieves nfc radio
+ *    name and set is as device attribute so we can load
+ *    the proper device driver for it
+ *
+ * @cldev: me client device (nfc)
+ */
+static void mei_nfc(struct mei_cl_device *cldev)
+{
+	struct mei_device *bus;
+	struct mei_cl *cl;
+	struct mei_me_client *me_cl = NULL;
+	struct mei_nfc_if_version ver;
+	const char *radio_name = NULL;
+	int ret;
+
+	bus = cldev->bus;
+
+	dev_dbg(bus->dev, "running hook %s: %pUl match=%d\n",
+		__func__, mei_me_cl_uuid(cldev->me_cl), cldev->do_match);
+
+	mutex_lock(&bus->device_lock);
+	/* we need to connect to INFO GUID */
+	cl = mei_cl_alloc_linked(bus, MEI_HOST_CLIENT_ID_ANY);
+	if (IS_ERR(cl)) {
+		ret = PTR_ERR(cl);
+		cl = NULL;
+		dev_err(bus->dev, "nfc hook alloc failed %d\n", ret);
+		goto out;
+	}
+
+	me_cl = mei_me_cl_by_uuid(bus, &mei_nfc_info_guid);
+	if (!me_cl) {
+		ret = -ENOTTY;
+		dev_err(bus->dev, "Cannot find nfc info %d\n", ret);
+		goto out;
+	}
+
+	ret = mei_cl_connect(cl, me_cl, NULL);
+	if (ret < 0) {
+		dev_err(&cldev->dev, "Can't connect to the NFC INFO ME ret = %d\n",
+			ret);
+		goto out;
+	}
+
+	mutex_unlock(&bus->device_lock);
+
+	ret = mei_nfc_if_version(cl, &ver);
+	if (ret)
+		goto disconnect;
+
+	radio_name = mei_nfc_radio_name(&ver);
+
+	if (!radio_name) {
+		ret = -ENOENT;
+		dev_err(&cldev->dev, "Can't get the NFC interface version ret = %d\n",
+			ret);
+		goto disconnect;
+	}
+
+	dev_dbg(bus->dev, "nfc radio %s\n", radio_name);
+	strlcpy(cldev->name, radio_name, sizeof(cldev->name));
+
+disconnect:
+	mutex_lock(&bus->device_lock);
+	if (mei_cl_disconnect(cl) < 0)
+		dev_err(bus->dev, "Can't disconnect the NFC INFO ME\n");
+
+	mei_cl_flush_queues(cl, NULL);
+
+out:
+	mei_cl_unlink(cl);
+	mutex_unlock(&bus->device_lock);
+	mei_me_cl_put(me_cl);
+	kfree(cl);
+
+	if (ret)
+		cldev->do_match = 0;
+
+	dev_dbg(bus->dev, "end of fixup match = %d\n", cldev->do_match);
+}
+
+#define MEI_FIXUP(_uuid, _hook) { _uuid, _hook }
+
+static struct mei_fixup {
+
+	const uuid_le uuid;
+	void (*hook)(struct mei_cl_device *cldev);
+} mei_fixups[] = {
+	MEI_FIXUP(MEI_UUID_ANY, number_of_connections),
+	MEI_FIXUP(MEI_UUID_NFC_INFO, blacklist),
+	MEI_FIXUP(MEI_UUID_NFC_HCI, mei_nfc),
+};
+
+/**
+ * mei_cldev_fixup - run fixup handlers
+ *
+ * @cldev: me client device
+ */
+void mei_cl_bus_dev_fixup(struct mei_cl_device *cldev)
+{
+	struct mei_fixup *f;
+	const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mei_fixups); i++) {
+
+		f = &mei_fixups[i];
+		if (uuid_le_cmp(f->uuid, MEI_UUID_ANY) == 0 ||
+		    uuid_le_cmp(f->uuid, *uuid) == 0)
+			f->hook(cldev);
+	}
+}
+
diff --git a/kernel/drivers/misc/mei/bus.c b/kernel/drivers/misc/mei/bus.c
index 4cf38c398..0b05aa938 100644
--- a/kernel/drivers/misc/mei/bus.c
+++ b/kernel/drivers/misc/mei/bus.c
@@ -30,227 +30,41 @@
 #define to_mei_cl_driver(d) container_of(d, struct mei_cl_driver, driver)
 #define to_mei_cl_device(d) container_of(d, struct mei_cl_device, dev)
 
-static int mei_cl_device_match(struct device *dev, struct device_driver *drv)
-{
-	struct mei_cl_device *device = to_mei_cl_device(dev);
-	struct mei_cl_driver *driver = to_mei_cl_driver(drv);
-	const struct mei_cl_device_id *id;
-
-	if (!device)
-		return 0;
-
-	if (!driver || !driver->id_table)
-		return 0;
-
-	id = driver->id_table;
-
-	while (id->name[0]) {
-		if (!strncmp(dev_name(dev), id->name, sizeof(id->name)))
-			return 1;
-
-		id++;
-	}
-
-	return 0;
-}
-
-static int mei_cl_device_probe(struct device *dev)
-{
-	struct mei_cl_device *device = to_mei_cl_device(dev);
-	struct mei_cl_driver *driver;
-	struct mei_cl_device_id id;
-
-	if (!device)
-		return 0;
-
-	driver = to_mei_cl_driver(dev->driver);
-	if (!driver || !driver->probe)
-		return -ENODEV;
-
-	dev_dbg(dev, "Device probe\n");
-
-	strlcpy(id.name, dev_name(dev), sizeof(id.name));
-
-	return driver->probe(device, &id);
-}
-
-static int mei_cl_device_remove(struct device *dev)
-{
-	struct mei_cl_device *device = to_mei_cl_device(dev);
-	struct mei_cl_driver *driver;
-
-	if (!device || !dev->driver)
-		return 0;
-
-	if (device->event_cb) {
-		device->event_cb = NULL;
-		cancel_work_sync(&device->event_work);
-	}
-
-	driver = to_mei_cl_driver(dev->driver);
-	if (!driver->remove) {
-		dev->driver = NULL;
-
-		return 0;
-	}
-
-	return driver->remove(device);
-}
-
-static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
-			     char *buf)
-{
-	int len;
-
-	len = snprintf(buf, PAGE_SIZE, "mei:%s\n", dev_name(dev));
-
-	return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
-}
-static DEVICE_ATTR_RO(modalias);
-
-static struct attribute *mei_cl_dev_attrs[] = {
-	&dev_attr_modalias.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(mei_cl_dev);
-
-static int mei_cl_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
-	if (add_uevent_var(env, "MODALIAS=mei:%s", dev_name(dev)))
-		return -ENOMEM;
-
-	return 0;
-}
-
-static struct bus_type mei_cl_bus_type = {
-	.name		= "mei",
-	.dev_groups	= mei_cl_dev_groups,
-	.match		= mei_cl_device_match,
-	.probe		= mei_cl_device_probe,
-	.remove		= mei_cl_device_remove,
-	.uevent		= mei_cl_uevent,
-};
-
-static void mei_cl_dev_release(struct device *dev)
-{
-	kfree(to_mei_cl_device(dev));
-}
-
-static struct device_type mei_cl_device_type = {
-	.release	= mei_cl_dev_release,
-};
-
-struct mei_cl *mei_cl_bus_find_cl_by_uuid(struct mei_device *dev,
-						uuid_le uuid)
-{
-	struct mei_cl *cl;
-
-	list_for_each_entry(cl, &dev->device_list, device_link) {
-		if (!uuid_le_cmp(uuid, cl->cl_uuid))
-			return cl;
-	}
-
-	return NULL;
-}
-struct mei_cl_device *mei_cl_add_device(struct mei_device *dev,
-					uuid_le uuid, char *name,
-					struct mei_cl_ops *ops)
-{
-	struct mei_cl_device *device;
-	struct mei_cl *cl;
-	int status;
-
-	cl = mei_cl_bus_find_cl_by_uuid(dev, uuid);
-	if (cl == NULL)
-		return NULL;
-
-	device = kzalloc(sizeof(struct mei_cl_device), GFP_KERNEL);
-	if (!device)
-		return NULL;
-
-	device->cl = cl;
-	device->ops = ops;
-
-	device->dev.parent = dev->dev;
-	device->dev.bus = &mei_cl_bus_type;
-	device->dev.type = &mei_cl_device_type;
-
-	dev_set_name(&device->dev, "%s", name);
-
-	status = device_register(&device->dev);
-	if (status) {
-		dev_err(dev->dev, "Failed to register MEI device\n");
-		kfree(device);
-		return NULL;
-	}
-
-	cl->device = device;
-
-	dev_dbg(&device->dev, "client %s registered\n", name);
-
-	return device;
-}
-EXPORT_SYMBOL_GPL(mei_cl_add_device);
-
-void mei_cl_remove_device(struct mei_cl_device *device)
-{
-	device_unregister(&device->dev);
-}
-EXPORT_SYMBOL_GPL(mei_cl_remove_device);
-
-int __mei_cl_driver_register(struct mei_cl_driver *driver, struct module *owner)
-{
-	int err;
-
-	driver->driver.name = driver->name;
-	driver->driver.owner = owner;
-	driver->driver.bus = &mei_cl_bus_type;
-
-	err = driver_register(&driver->driver);
-	if (err)
-		return err;
-
-	pr_debug("mei: driver [%s] registered\n", driver->driver.name);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(__mei_cl_driver_register);
-
-void mei_cl_driver_unregister(struct mei_cl_driver *driver)
-{
-	driver_unregister(&driver->driver);
-
-	pr_debug("mei: driver [%s] unregistered\n", driver->driver.name);
-}
-EXPORT_SYMBOL_GPL(mei_cl_driver_unregister);
-
-static ssize_t ___mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length,
+/**
+ * __mei_cl_send - internal client send (write)
+ *
+ * @cl: host client
+ * @buf: buffer to send
+ * @length: buffer length
+ * @blocking: wait for write completion
+ *
+ * Return: written size bytes or < 0 on error
+ */
+ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length,
 			bool blocking)
 {
-	struct mei_device *dev;
-	struct mei_me_client *me_cl = NULL;
+	struct mei_device *bus;
 	struct mei_cl_cb *cb = NULL;
 	ssize_t rets;
 
 	if (WARN_ON(!cl || !cl->dev))
 		return -ENODEV;
 
-	dev = cl->dev;
+	bus = cl->dev;
 
-	mutex_lock(&dev->device_lock);
+	mutex_lock(&bus->device_lock);
 	if (!mei_cl_is_connected(cl)) {
 		rets = -ENODEV;
 		goto out;
 	}
 
 	/* Check if we have an ME client device */
-	me_cl = mei_me_cl_by_uuid_id(dev, &cl->cl_uuid, cl->me_client_id);
-	if (!me_cl) {
+	if (!mei_me_cl_is_active(cl->me_cl)) {
 		rets = -ENOTTY;
 		goto out;
 	}
 
-	if (length > me_cl->props.max_msg_length) {
+	if (length > mei_cl_mtu(cl)) {
 		rets = -EFBIG;
 		goto out;
 	}
@@ -266,17 +80,25 @@ static ssize_t ___mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length,
 	rets = mei_cl_write(cl, cb, blocking);
 
 out:
-	mei_me_cl_put(me_cl);
-	mutex_unlock(&dev->device_lock);
+	mutex_unlock(&bus->device_lock);
 	if (rets < 0)
 		mei_io_cb_free(cb);
 
 	return rets;
 }
 
+/**
+ * __mei_cl_recv - internal client receive (read)
+ *
+ * @cl: host client
+ * @buf: buffer to receive
+ * @length: buffer length
+ *
+ * Return: read size in bytes of < 0 on error
+ */
 ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length)
 {
-	struct mei_device *dev;
+	struct mei_device *bus;
 	struct mei_cl_cb *cb;
 	size_t r_length;
 	ssize_t rets;
@@ -284,9 +106,9 @@ ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length)
 	if (WARN_ON(!cl || !cl->dev))
 		return -ENODEV;
 
-	dev = cl->dev;
+	bus = cl->dev;
 
-	mutex_lock(&dev->device_lock);
+	mutex_lock(&bus->device_lock);
 
 	cb = mei_cl_read_cb(cl, NULL);
 	if (cb)
@@ -296,9 +118,10 @@ ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length)
 	if (rets && rets != -EBUSY)
 		goto out;
 
+	/* wait on event only if there is no other waiter */
 	if (list_empty(&cl->rd_completed) && !waitqueue_active(&cl->rx_wait)) {
 
-		mutex_unlock(&dev->device_lock);
+		mutex_unlock(&bus->device_lock);
 
 		if (wait_event_interruptible(cl->rx_wait,
 				(!list_empty(&cl->rd_completed)) ||
@@ -309,7 +132,7 @@ ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length)
 			return -ERESTARTSYS;
 		}
 
-		mutex_lock(&dev->device_lock);
+		mutex_lock(&bus->device_lock);
 
 		if (!mei_cl_is_connected(cl)) {
 			rets = -EBUSY;
@@ -336,196 +159,833 @@ copy:
 free:
 	mei_io_cb_free(cb);
 out:
-	mutex_unlock(&dev->device_lock);
+	mutex_unlock(&bus->device_lock);
 
 	return rets;
 }
 
-inline ssize_t __mei_cl_async_send(struct mei_cl *cl, u8 *buf, size_t length)
+/**
+ * mei_cldev_send - me device send  (write)
+ *
+ * @cldev: me client device
+ * @buf: buffer to send
+ * @length: buffer length
+ *
+ * Return: written size in bytes or < 0 on error
+ */
+ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length)
 {
-	return ___mei_cl_send(cl, buf, length, 0);
-}
+	struct mei_cl *cl = cldev->cl;
 
-inline ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length)
-{
-	return ___mei_cl_send(cl, buf, length, 1);
+	if (cl == NULL)
+		return -ENODEV;
+
+	return __mei_cl_send(cl, buf, length, 1);
 }
+EXPORT_SYMBOL_GPL(mei_cldev_send);
 
-ssize_t mei_cl_send(struct mei_cl_device *device, u8 *buf, size_t length)
+/**
+ * mei_cldev_recv - client receive (read)
+ *
+ * @cldev: me client device
+ * @buf: buffer to receive
+ * @length: buffer length
+ *
+ * Return: read size in bytes of < 0 on error
+ */
+ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length)
 {
-	struct mei_cl *cl = device->cl;
+	struct mei_cl *cl = cldev->cl;
 
 	if (cl == NULL)
 		return -ENODEV;
 
-	if (device->ops && device->ops->send)
-		return device->ops->send(device, buf, length);
+	return __mei_cl_recv(cl, buf, length);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_recv);
 
-	return __mei_cl_send(cl, buf, length);
+/**
+ * mei_cl_bus_event_work  - dispatch rx event for a bus device
+ *    and schedule new work
+ *
+ * @work: work
+ */
+static void mei_cl_bus_event_work(struct work_struct *work)
+{
+	struct mei_cl_device *cldev;
+
+	cldev = container_of(work, struct mei_cl_device, event_work);
+
+	if (cldev->event_cb)
+		cldev->event_cb(cldev, cldev->events, cldev->event_context);
+
+	cldev->events = 0;
+
+	/* Prepare for the next read */
+	if (cldev->events_mask & BIT(MEI_CL_EVENT_RX))
+		mei_cl_read_start(cldev->cl, 0, NULL);
 }
-EXPORT_SYMBOL_GPL(mei_cl_send);
 
-ssize_t mei_cl_recv(struct mei_cl_device *device, u8 *buf, size_t length)
+/**
+ * mei_cl_bus_notify_event - schedule notify cb on bus client
+ *
+ * @cl: host client
+ */
+void mei_cl_bus_notify_event(struct mei_cl *cl)
 {
-	struct mei_cl *cl =  device->cl;
+	struct mei_cl_device *cldev = cl->cldev;
 
-	if (cl == NULL)
-		return -ENODEV;
+	if (!cldev || !cldev->event_cb)
+		return;
 
-	if (device->ops && device->ops->recv)
-		return device->ops->recv(device, buf, length);
+	if (!(cldev->events_mask & BIT(MEI_CL_EVENT_NOTIF)))
+		return;
 
-	return __mei_cl_recv(cl, buf, length);
+	if (!cl->notify_ev)
+		return;
+
+	set_bit(MEI_CL_EVENT_NOTIF, &cldev->events);
+
+	schedule_work(&cldev->event_work);
+
+	cl->notify_ev = false;
 }
-EXPORT_SYMBOL_GPL(mei_cl_recv);
 
-static void mei_bus_event_work(struct work_struct *work)
+/**
+ * mei_cl_bus_rx_event  - schedule rx evenet
+ *
+ * @cl: host client
+ */
+void mei_cl_bus_rx_event(struct mei_cl *cl)
 {
-	struct mei_cl_device *device;
+	struct mei_cl_device *cldev = cl->cldev;
 
-	device = container_of(work, struct mei_cl_device, event_work);
+	if (!cldev || !cldev->event_cb)
+		return;
 
-	if (device->event_cb)
-		device->event_cb(device, device->events, device->event_context);
+	if (!(cldev->events_mask & BIT(MEI_CL_EVENT_RX)))
+		return;
 
-	device->events = 0;
+	set_bit(MEI_CL_EVENT_RX, &cldev->events);
 
-	/* Prepare for the next read */
-	mei_cl_read_start(device->cl, 0, NULL);
+	schedule_work(&cldev->event_work);
 }
 
-int mei_cl_register_event_cb(struct mei_cl_device *device,
-			  mei_cl_event_cb_t event_cb, void *context)
+/**
+ * mei_cldev_register_event_cb - register event callback
+ *
+ * @cldev: me client devices
+ * @event_cb: callback function
+ * @events_mask: requested events bitmask
+ * @context: driver context data
+ *
+ * Return: 0 on success
+ *         -EALREADY if an callback is already registered
+ *         <0 on other errors
+ */
+int mei_cldev_register_event_cb(struct mei_cl_device *cldev,
+				unsigned long events_mask,
+				mei_cldev_event_cb_t event_cb, void *context)
 {
-	if (device->event_cb)
+	int ret;
+
+	if (cldev->event_cb)
 		return -EALREADY;
 
-	device->events = 0;
-	device->event_cb = event_cb;
-	device->event_context = context;
-	INIT_WORK(&device->event_work, mei_bus_event_work);
+	cldev->events = 0;
+	cldev->events_mask = events_mask;
+	cldev->event_cb = event_cb;
+	cldev->event_context = context;
+	INIT_WORK(&cldev->event_work, mei_cl_bus_event_work);
 
-	mei_cl_read_start(device->cl, 0, NULL);
+	if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) {
+		ret = mei_cl_read_start(cldev->cl, 0, NULL);
+		if (ret && ret != -EBUSY)
+			return ret;
+	}
+
+	if (cldev->events_mask & BIT(MEI_CL_EVENT_NOTIF)) {
+		mutex_lock(&cldev->cl->dev->device_lock);
+		ret = mei_cl_notify_request(cldev->cl, NULL, event_cb ? 1 : 0);
+		mutex_unlock(&cldev->cl->dev->device_lock);
+		if (ret)
+			return ret;
+	}
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(mei_cl_register_event_cb);
+EXPORT_SYMBOL_GPL(mei_cldev_register_event_cb);
 
-void *mei_cl_get_drvdata(const struct mei_cl_device *device)
+/**
+ * mei_cldev_get_drvdata - driver data getter
+ *
+ * @cldev: mei client device
+ *
+ * Return: driver private data
+ */
+void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev)
 {
-	return dev_get_drvdata(&device->dev);
+	return dev_get_drvdata(&cldev->dev);
 }
-EXPORT_SYMBOL_GPL(mei_cl_get_drvdata);
+EXPORT_SYMBOL_GPL(mei_cldev_get_drvdata);
 
-void mei_cl_set_drvdata(struct mei_cl_device *device, void *data)
+/**
+ * mei_cldev_set_drvdata - driver data setter
+ *
+ * @cldev: mei client device
+ * @data: data to store
+ */
+void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data)
 {
-	dev_set_drvdata(&device->dev, data);
+	dev_set_drvdata(&cldev->dev, data);
 }
-EXPORT_SYMBOL_GPL(mei_cl_set_drvdata);
+EXPORT_SYMBOL_GPL(mei_cldev_set_drvdata);
 
-int mei_cl_enable_device(struct mei_cl_device *device)
+/**
+ * mei_cldev_uuid - return uuid of the underlying me client
+ *
+ * @cldev: mei client device
+ *
+ * Return: me client uuid
+ */
+const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev)
 {
-	int err;
-	struct mei_device *dev;
-	struct mei_cl *cl = device->cl;
-
-	if (cl == NULL)
-		return -ENODEV;
+	return mei_me_cl_uuid(cldev->me_cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_uuid);
 
-	dev = cl->dev;
+/**
+ * mei_cldev_ver - return protocol version of the underlying me client
+ *
+ * @cldev: mei client device
+ *
+ * Return: me client protocol version
+ */
+u8 mei_cldev_ver(const struct mei_cl_device *cldev)
+{
+	return mei_me_cl_ver(cldev->me_cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_ver);
 
-	mutex_lock(&dev->device_lock);
+/**
+ * mei_cldev_enabled - check whether the device is enabled
+ *
+ * @cldev: mei client device
+ *
+ * Return: true if me client is initialized and connected
+ */
+bool mei_cldev_enabled(struct mei_cl_device *cldev)
+{
+	return cldev->cl && mei_cl_is_connected(cldev->cl);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_enabled);
 
-	err = mei_cl_connect(cl, NULL);
-	if (err < 0) {
-		mutex_unlock(&dev->device_lock);
-		dev_err(dev->dev, "Could not connect to the ME client");
+/**
+ * mei_cldev_enable_device - enable me client device
+ *     create connection with me client
+ *
+ * @cldev: me client device
+ *
+ * Return: 0 on success and < 0 on error
+ */
+int mei_cldev_enable(struct mei_cl_device *cldev)
+{
+	struct mei_device *bus = cldev->bus;
+	struct mei_cl *cl;
+	int ret;
+
+	cl = cldev->cl;
+
+	if (!cl) {
+		mutex_lock(&bus->device_lock);
+		cl = mei_cl_alloc_linked(bus, MEI_HOST_CLIENT_ID_ANY);
+		mutex_unlock(&bus->device_lock);
+		if (IS_ERR(cl))
+			return PTR_ERR(cl);
+		/* update pointers */
+		cldev->cl = cl;
+		cl->cldev = cldev;
+	}
 
-		return err;
+	mutex_lock(&bus->device_lock);
+	if (mei_cl_is_connected(cl)) {
+		ret = 0;
+		goto out;
 	}
 
-	mutex_unlock(&dev->device_lock);
+	if (!mei_me_cl_is_active(cldev->me_cl)) {
+		dev_err(&cldev->dev, "me client is not active\n");
+		ret = -ENOTTY;
+		goto out;
+	}
 
-	if (device->event_cb)
-		mei_cl_read_start(device->cl, 0, NULL);
+	ret = mei_cl_connect(cl, cldev->me_cl, NULL);
+	if (ret < 0)
+		dev_err(&cldev->dev, "cannot connect\n");
 
-	if (!device->ops || !device->ops->enable)
-		return 0;
+out:
+	mutex_unlock(&bus->device_lock);
 
-	return device->ops->enable(device);
+	return ret;
 }
-EXPORT_SYMBOL_GPL(mei_cl_enable_device);
+EXPORT_SYMBOL_GPL(mei_cldev_enable);
 
-int mei_cl_disable_device(struct mei_cl_device *device)
+/**
+ * mei_cldev_disable - disable me client device
+ *     disconnect form the me client
+ *
+ * @cldev: me client device
+ *
+ * Return: 0 on success and < 0 on error
+ */
+int mei_cldev_disable(struct mei_cl_device *cldev)
 {
+	struct mei_device *bus;
+	struct mei_cl *cl;
 	int err;
-	struct mei_device *dev;
-	struct mei_cl *cl = device->cl;
 
-	if (cl == NULL)
+	if (!cldev || !cldev->cl)
 		return -ENODEV;
 
-	dev = cl->dev;
+	cl = cldev->cl;
 
-	if (device->ops && device->ops->disable)
-		device->ops->disable(device);
+	bus = cldev->bus;
 
-	device->event_cb = NULL;
+	cldev->event_cb = NULL;
 
-	mutex_lock(&dev->device_lock);
+	mutex_lock(&bus->device_lock);
 
 	if (!mei_cl_is_connected(cl)) {
-		dev_err(dev->dev, "Already disconnected");
+		dev_err(bus->dev, "Already disconnected");
 		err = 0;
 		goto out;
 	}
 
-	cl->state = MEI_FILE_DISCONNECTING;
-
 	err = mei_cl_disconnect(cl);
-	if (err < 0) {
-		dev_err(dev->dev, "Could not disconnect from the ME client");
-		goto out;
-	}
+	if (err < 0)
+		dev_err(bus->dev, "Could not disconnect from the ME client");
 
+out:
 	/* Flush queues and remove any pending read */
 	mei_cl_flush_queues(cl, NULL);
+	mei_cl_unlink(cl);
 
-out:
-	mutex_unlock(&dev->device_lock);
+	kfree(cl);
+	cldev->cl = NULL;
+
+	mutex_unlock(&bus->device_lock);
 	return err;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_disable);
 
+/**
+ * mei_cl_device_find - find matching entry in the driver id table
+ *
+ * @cldev: me client device
+ * @cldrv: me client driver
+ *
+ * Return: id on success; NULL if no id is matching
+ */
+static const
+struct mei_cl_device_id *mei_cl_device_find(struct mei_cl_device *cldev,
+					    struct mei_cl_driver *cldrv)
+{
+	const struct mei_cl_device_id *id;
+	const uuid_le *uuid;
+	u8 version;
+	bool match;
+
+	uuid = mei_me_cl_uuid(cldev->me_cl);
+	version = mei_me_cl_ver(cldev->me_cl);
+
+	id = cldrv->id_table;
+	while (uuid_le_cmp(NULL_UUID_LE, id->uuid)) {
+		if (!uuid_le_cmp(*uuid, id->uuid)) {
+			match = true;
+
+			if (cldev->name[0])
+				if (strncmp(cldev->name, id->name,
+					    sizeof(id->name)))
+					match = false;
+
+			if (id->version != MEI_CL_VERSION_ANY)
+				if (id->version != version)
+					match = false;
+			if (match)
+				return id;
+		}
+
+		id++;
+	}
+
+	return NULL;
 }
-EXPORT_SYMBOL_GPL(mei_cl_disable_device);
 
-void mei_cl_bus_rx_event(struct mei_cl *cl)
+/**
+ * mei_cl_device_match  - device match function
+ *
+ * @dev: device
+ * @drv: driver
+ *
+ * Return:  1 if matching device was found 0 otherwise
+ */
+static int mei_cl_device_match(struct device *dev, struct device_driver *drv)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	struct mei_cl_driver *cldrv = to_mei_cl_driver(drv);
+	const struct mei_cl_device_id *found_id;
+
+	if (!cldev)
+		return 0;
+
+	if (!cldev->do_match)
+		return 0;
+
+	if (!cldrv || !cldrv->id_table)
+		return 0;
+
+	found_id = mei_cl_device_find(cldev, cldrv);
+	if (found_id)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * mei_cl_device_probe - bus probe function
+ *
+ * @dev: device
+ *
+ * Return:  0 on success; < 0 otherwise
+ */
+static int mei_cl_device_probe(struct device *dev)
+{
+	struct mei_cl_device *cldev;
+	struct mei_cl_driver *cldrv;
+	const struct mei_cl_device_id *id;
+
+	cldev = to_mei_cl_device(dev);
+	cldrv = to_mei_cl_driver(dev->driver);
+
+	if (!cldev)
+		return 0;
+
+	if (!cldrv || !cldrv->probe)
+		return -ENODEV;
+
+	id = mei_cl_device_find(cldev, cldrv);
+	if (!id)
+		return -ENODEV;
+
+	__module_get(THIS_MODULE);
+
+	return cldrv->probe(cldev, id);
+}
+
+/**
+ * mei_cl_device_remove - remove device from the bus
+ *
+ * @dev: device
+ *
+ * Return:  0 on success; < 0 otherwise
+ */
+static int mei_cl_device_remove(struct device *dev)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	struct mei_cl_driver *cldrv;
+	int ret = 0;
+
+	if (!cldev || !dev->driver)
+		return 0;
+
+	if (cldev->event_cb) {
+		cldev->event_cb = NULL;
+		cancel_work_sync(&cldev->event_work);
+	}
+
+	cldrv = to_mei_cl_driver(dev->driver);
+	if (cldrv->remove)
+		ret = cldrv->remove(cldev);
+
+	module_put(THIS_MODULE);
+	dev->driver = NULL;
+	return ret;
+
+}
+
+static ssize_t name_show(struct device *dev, struct device_attribute *a,
+			     char *buf)
 {
-	struct mei_cl_device *device = cl->device;
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	size_t len;
 
-	if (!device || !device->event_cb)
+	len = snprintf(buf, PAGE_SIZE, "%s", cldev->name);
+
+	return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
+}
+static DEVICE_ATTR_RO(name);
+
+static ssize_t uuid_show(struct device *dev, struct device_attribute *a,
+			     char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+	size_t len;
+
+	len = snprintf(buf, PAGE_SIZE, "%pUl", uuid);
+
+	return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
+}
+static DEVICE_ATTR_RO(uuid);
+
+static ssize_t version_show(struct device *dev, struct device_attribute *a,
+			     char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	u8 version = mei_me_cl_ver(cldev->me_cl);
+	size_t len;
+
+	len = snprintf(buf, PAGE_SIZE, "%02X", version);
+
+	return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
+}
+static DEVICE_ATTR_RO(version);
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
+			     char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+	size_t len;
+
+	len = snprintf(buf, PAGE_SIZE, "mei:%s:%pUl:", cldev->name, uuid);
+	return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *mei_cldev_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_uuid.attr,
+	&dev_attr_version.attr,
+	&dev_attr_modalias.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(mei_cldev);
+
+/**
+ * mei_cl_device_uevent - me client bus uevent handler
+ *
+ * @dev: device
+ * @env: uevent kobject
+ *
+ * Return: 0 on success -ENOMEM on when add_uevent_var fails
+ */
+static int mei_cl_device_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
+	u8 version = mei_me_cl_ver(cldev->me_cl);
+
+	if (add_uevent_var(env, "MEI_CL_VERSION=%d", version))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "MEI_CL_UUID=%pUl", uuid))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "MEI_CL_NAME=%s", cldev->name))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "MODALIAS=mei:%s:%pUl:%02X:",
+			   cldev->name, uuid, version))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static struct bus_type mei_cl_bus_type = {
+	.name		= "mei",
+	.dev_groups	= mei_cldev_groups,
+	.match		= mei_cl_device_match,
+	.probe		= mei_cl_device_probe,
+	.remove		= mei_cl_device_remove,
+	.uevent		= mei_cl_device_uevent,
+};
+
+static struct mei_device *mei_dev_bus_get(struct mei_device *bus)
+{
+	if (bus)
+		get_device(bus->dev);
+
+	return bus;
+}
+
+static void mei_dev_bus_put(struct mei_device *bus)
+{
+	if (bus)
+		put_device(bus->dev);
+}
+
+static void mei_cl_bus_dev_release(struct device *dev)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+
+	if (!cldev)
+		return;
+
+	mei_me_cl_put(cldev->me_cl);
+	mei_dev_bus_put(cldev->bus);
+	kfree(cldev);
+}
+
+static struct device_type mei_cl_device_type = {
+	.release	= mei_cl_bus_dev_release,
+};
+
+/**
+ * mei_cl_bus_set_name - set device name for me client device
+ *
+ * @cldev: me client device
+ */
+static inline void mei_cl_bus_set_name(struct mei_cl_device *cldev)
+{
+	dev_set_name(&cldev->dev, "mei:%s:%pUl:%02X",
+		     cldev->name,
+		     mei_me_cl_uuid(cldev->me_cl),
+		     mei_me_cl_ver(cldev->me_cl));
+}
+
+/**
+ * mei_cl_bus_dev_alloc - initialize and allocate mei client device
+ *
+ * @bus: mei device
+ * @me_cl: me client
+ *
+ * Return: allocated device structur or NULL on allocation failure
+ */
+static struct mei_cl_device *mei_cl_bus_dev_alloc(struct mei_device *bus,
+						  struct mei_me_client *me_cl)
+{
+	struct mei_cl_device *cldev;
+
+	cldev = kzalloc(sizeof(struct mei_cl_device), GFP_KERNEL);
+	if (!cldev)
+		return NULL;
+
+	device_initialize(&cldev->dev);
+	cldev->dev.parent = bus->dev;
+	cldev->dev.bus    = &mei_cl_bus_type;
+	cldev->dev.type   = &mei_cl_device_type;
+	cldev->bus        = mei_dev_bus_get(bus);
+	cldev->me_cl      = mei_me_cl_get(me_cl);
+	mei_cl_bus_set_name(cldev);
+	cldev->is_added   = 0;
+	INIT_LIST_HEAD(&cldev->bus_list);
+
+	return cldev;
+}
+
+/**
+ * mei_cl_dev_setup - setup me client device
+ *    run fix up routines and set the device name
+ *
+ * @bus: mei device
+ * @cldev: me client device
+ *
+ * Return: true if the device is eligible for enumeration
+ */
+static bool mei_cl_bus_dev_setup(struct mei_device *bus,
+				 struct mei_cl_device *cldev)
+{
+	cldev->do_match = 1;
+	mei_cl_bus_dev_fixup(cldev);
+
+	/* the device name can change during fix up */
+	if (cldev->do_match)
+		mei_cl_bus_set_name(cldev);
+
+	return cldev->do_match == 1;
+}
+
+/**
+ * mei_cl_bus_dev_add - add me client devices
+ *
+ * @cldev: me client device
+ *
+ * Return: 0 on success; < 0 on failre
+ */
+static int mei_cl_bus_dev_add(struct mei_cl_device *cldev)
+{
+	int ret;
+
+	dev_dbg(cldev->bus->dev, "adding %pUL:%02X\n",
+		mei_me_cl_uuid(cldev->me_cl),
+		mei_me_cl_ver(cldev->me_cl));
+	ret = device_add(&cldev->dev);
+	if (!ret)
+		cldev->is_added = 1;
+
+	return ret;
+}
+
+/**
+ * mei_cl_bus_dev_stop - stop the driver
+ *
+ * @cldev: me client device
+ */
+static void mei_cl_bus_dev_stop(struct mei_cl_device *cldev)
+{
+	if (cldev->is_added)
+		device_release_driver(&cldev->dev);
+}
+
+/**
+ * mei_cl_bus_dev_destroy - destroy me client devices object
+ *
+ * @cldev: me client device
+ *
+ * Locking: called under "dev->cl_bus_lock" lock
+ */
+static void mei_cl_bus_dev_destroy(struct mei_cl_device *cldev)
+{
+
+	WARN_ON(!mutex_is_locked(&cldev->bus->cl_bus_lock));
+
+	if (!cldev->is_added)
+		return;
+
+	device_del(&cldev->dev);
+
+	list_del_init(&cldev->bus_list);
+
+	cldev->is_added = 0;
+	put_device(&cldev->dev);
+}
+
+/**
+ * mei_cl_bus_remove_device - remove a devices form the bus
+ *
+ * @cldev: me client device
+ */
+static void mei_cl_bus_remove_device(struct mei_cl_device *cldev)
+{
+	mei_cl_bus_dev_stop(cldev);
+	mei_cl_bus_dev_destroy(cldev);
+}
+
+/**
+ * mei_cl_bus_remove_devices - remove all devices form the bus
+ *
+ * @bus: mei device
+ */
+void mei_cl_bus_remove_devices(struct mei_device *bus)
+{
+	struct mei_cl_device *cldev, *next;
+
+	mutex_lock(&bus->cl_bus_lock);
+	list_for_each_entry_safe(cldev, next, &bus->device_list, bus_list)
+		mei_cl_bus_remove_device(cldev);
+	mutex_unlock(&bus->cl_bus_lock);
+}
+
+
+/**
+ * mei_cl_bus_dev_init - allocate and initializes an mei client devices
+ *     based on me client
+ *
+ * @bus: mei device
+ * @me_cl: me client
+ *
+ * Locking: called under "dev->cl_bus_lock" lock
+ */
+static void mei_cl_bus_dev_init(struct mei_device *bus,
+				struct mei_me_client *me_cl)
+{
+	struct mei_cl_device *cldev;
+
+	WARN_ON(!mutex_is_locked(&bus->cl_bus_lock));
+
+	dev_dbg(bus->dev, "initializing %pUl", mei_me_cl_uuid(me_cl));
+
+	if (me_cl->bus_added)
 		return;
 
-	set_bit(MEI_CL_EVENT_RX, &device->events);
+	cldev = mei_cl_bus_dev_alloc(bus, me_cl);
+	if (!cldev)
+		return;
+
+	me_cl->bus_added = true;
+	list_add_tail(&cldev->bus_list, &bus->device_list);
 
-	schedule_work(&device->event_work);
 }
 
-void mei_cl_bus_remove_devices(struct mei_device *dev)
+/**
+ * mei_cl_bus_rescan - scan me clients list and add create
+ *    devices for eligible clients
+ *
+ * @bus: mei device
+ */
+void mei_cl_bus_rescan(struct mei_device *bus)
 {
-	struct mei_cl *cl, *next;
+	struct mei_cl_device *cldev, *n;
+	struct mei_me_client *me_cl;
+
+	mutex_lock(&bus->cl_bus_lock);
+
+	down_read(&bus->me_clients_rwsem);
+	list_for_each_entry(me_cl, &bus->me_clients, list)
+		mei_cl_bus_dev_init(bus, me_cl);
+	up_read(&bus->me_clients_rwsem);
+
+	list_for_each_entry_safe(cldev, n, &bus->device_list, bus_list) {
 
-	mutex_lock(&dev->device_lock);
-	list_for_each_entry_safe(cl, next, &dev->device_list, device_link) {
-		if (cl->device)
-			mei_cl_remove_device(cl->device);
+		if (!mei_me_cl_is_active(cldev->me_cl)) {
+			mei_cl_bus_remove_device(cldev);
+			continue;
+		}
+
+		if (cldev->is_added)
+			continue;
 
-		list_del(&cl->device_link);
-		mei_cl_unlink(cl);
-		kfree(cl);
+		if (mei_cl_bus_dev_setup(bus, cldev))
+			mei_cl_bus_dev_add(cldev);
+		else {
+			list_del_init(&cldev->bus_list);
+			put_device(&cldev->dev);
+		}
 	}
-	mutex_unlock(&dev->device_lock);
+	mutex_unlock(&bus->cl_bus_lock);
+
+	dev_dbg(bus->dev, "rescan end");
 }
 
+int __mei_cldev_driver_register(struct mei_cl_driver *cldrv,
+				struct module *owner)
+{
+	int err;
+
+	cldrv->driver.name = cldrv->name;
+	cldrv->driver.owner = owner;
+	cldrv->driver.bus = &mei_cl_bus_type;
+
+	err = driver_register(&cldrv->driver);
+	if (err)
+		return err;
+
+	pr_debug("mei: driver [%s] registered\n", cldrv->driver.name);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__mei_cldev_driver_register);
+
+void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv)
+{
+	driver_unregister(&cldrv->driver);
+
+	pr_debug("mei: driver [%s] unregistered\n", cldrv->driver.name);
+}
+EXPORT_SYMBOL_GPL(mei_cldev_driver_unregister);
+
+
 int __init mei_cl_bus_init(void)
 {
 	return bus_register(&mei_cl_bus_type);
diff --git a/kernel/drivers/misc/mei/client.c b/kernel/drivers/misc/mei/client.c
index b2b9f4382..a6c87c713 100644
--- a/kernel/drivers/misc/mei/client.c
+++ b/kernel/drivers/misc/mei/client.c
@@ -83,7 +83,7 @@ void mei_me_cl_put(struct mei_me_client *me_cl)
 }
 
 /**
- * __mei_me_cl_del  - delete me client form the list and decrease
+ * __mei_me_cl_del  - delete me client from the list and decrease
  *     reference counter
  *
  * @dev: mei device
@@ -96,11 +96,25 @@ static void __mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl)
 	if (!me_cl)
 		return;
 
-	list_del(&me_cl->list);
+	list_del_init(&me_cl->list);
 	mei_me_cl_put(me_cl);
 }
 
 /**
+ * mei_me_cl_del - delete me client from the list and decrease
+ *     reference counter
+ *
+ * @dev: mei device
+ * @me_cl: me client
+ */
+void mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl)
+{
+	down_write(&dev->me_clients_rwsem);
+	__mei_me_cl_del(dev, me_cl);
+	up_write(&dev->me_clients_rwsem);
+}
+
+/**
  * mei_me_cl_add - add me client to the list
  *
  * @dev: mei device
@@ -317,7 +331,7 @@ static inline bool mei_cl_cmp_id(const struct mei_cl *cl1,
 {
 	return cl1 && cl2 &&
 		(cl1->host_client_id == cl2->host_client_id) &&
-		(cl1->me_client_id == cl2->me_client_id);
+		(mei_cl_me_id(cl1) == mei_cl_me_id(cl2));
 }
 
 /**
@@ -541,11 +555,12 @@ void mei_cl_init(struct mei_cl *cl, struct mei_device *dev)
 	init_waitqueue_head(&cl->wait);
 	init_waitqueue_head(&cl->rx_wait);
 	init_waitqueue_head(&cl->tx_wait);
+	init_waitqueue_head(&cl->ev_wait);
 	INIT_LIST_HEAD(&cl->rd_completed);
 	INIT_LIST_HEAD(&cl->rd_pending);
 	INIT_LIST_HEAD(&cl->link);
-	INIT_LIST_HEAD(&cl->device_link);
 	cl->writing_state = MEI_IDLE;
+	cl->state = MEI_FILE_INITIALIZING;
 	cl->dev = dev;
 }
 
@@ -619,7 +634,7 @@ int mei_cl_link(struct mei_cl *cl, int id)
 }
 
 /**
- * mei_cl_unlink - remove me_cl from the list
+ * mei_cl_unlink - remove host client from the list
  *
  * @cl: host client
  *
@@ -667,24 +682,20 @@ void mei_host_client_init(struct work_struct *work)
 
 	me_cl = mei_me_cl_by_uuid(dev, &mei_amthif_guid);
 	if (me_cl)
-		mei_amthif_host_init(dev);
+		mei_amthif_host_init(dev, me_cl);
 	mei_me_cl_put(me_cl);
 
 	me_cl = mei_me_cl_by_uuid(dev, &mei_wd_guid);
 	if (me_cl)
-		mei_wd_host_init(dev);
+		mei_wd_host_init(dev, me_cl);
 	mei_me_cl_put(me_cl);
 
-	me_cl = mei_me_cl_by_uuid(dev, &mei_nfc_guid);
-	if (me_cl)
-		mei_nfc_host_init(dev);
-	mei_me_cl_put(me_cl);
-
-
 	dev->dev_state = MEI_DEV_ENABLED;
 	dev->reset_count = 0;
 	mutex_unlock(&dev->device_lock);
 
+	mei_cl_bus_rescan(dev);
+
 	pm_runtime_mark_last_busy(dev->dev);
 	dev_dbg(dev->dev, "rpm: autosuspend\n");
 	pm_runtime_autosuspend(dev->dev);
@@ -715,6 +726,173 @@ bool mei_hbuf_acquire(struct mei_device *dev)
 }
 
 /**
+ * mei_cl_set_disconnected - set disconnected state and clear
+ *   associated states and resources
+ *
+ * @cl: host client
+ */
+void mei_cl_set_disconnected(struct mei_cl *cl)
+{
+	struct mei_device *dev = cl->dev;
+
+	if (cl->state == MEI_FILE_DISCONNECTED ||
+	    cl->state == MEI_FILE_INITIALIZING)
+		return;
+
+	cl->state = MEI_FILE_DISCONNECTED;
+	mei_io_list_flush(&dev->ctrl_rd_list, cl);
+	mei_io_list_flush(&dev->ctrl_wr_list, cl);
+	cl->mei_flow_ctrl_creds = 0;
+	cl->timer_count = 0;
+
+	if (!cl->me_cl)
+		return;
+
+	if (!WARN_ON(cl->me_cl->connect_count == 0))
+		cl->me_cl->connect_count--;
+
+	if (cl->me_cl->connect_count == 0)
+		cl->me_cl->mei_flow_ctrl_creds = 0;
+
+	mei_me_cl_put(cl->me_cl);
+	cl->me_cl = NULL;
+}
+
+static int mei_cl_set_connecting(struct mei_cl *cl, struct mei_me_client *me_cl)
+{
+	if (!mei_me_cl_get(me_cl))
+		return -ENOENT;
+
+	/* only one connection is allowed for fixed address clients */
+	if (me_cl->props.fixed_address) {
+		if (me_cl->connect_count) {
+			mei_me_cl_put(me_cl);
+			return -EBUSY;
+		}
+	}
+
+	cl->me_cl = me_cl;
+	cl->state = MEI_FILE_CONNECTING;
+	cl->me_cl->connect_count++;
+
+	return 0;
+}
+
+/*
+ * mei_cl_send_disconnect - send disconnect request
+ *
+ * @cl: host client
+ * @cb: callback block
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+static int mei_cl_send_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb)
+{
+	struct mei_device *dev;
+	int ret;
+
+	dev = cl->dev;
+
+	ret = mei_hbm_cl_disconnect_req(dev, cl);
+	cl->status = ret;
+	if (ret) {
+		cl->state = MEI_FILE_DISCONNECT_REPLY;
+		return ret;
+	}
+
+	list_move_tail(&cb->list, &dev->ctrl_rd_list.list);
+	cl->timer_count = MEI_CONNECT_TIMEOUT;
+
+	return 0;
+}
+
+/**
+ * mei_cl_irq_disconnect - processes close related operation from
+ *	interrupt thread context - send disconnect request
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+int mei_cl_irq_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb,
+			    struct mei_cl_cb *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	u32 msg_slots;
+	int slots;
+	int ret;
+
+	msg_slots = mei_data2slots(sizeof(struct hbm_client_connect_request));
+	slots = mei_hbuf_empty_slots(dev);
+
+	if (slots < msg_slots)
+		return -EMSGSIZE;
+
+	ret = mei_cl_send_disconnect(cl, cb);
+	if (ret)
+		list_move_tail(&cb->list, &cmpl_list->list);
+
+	return ret;
+}
+
+/**
+ * __mei_cl_disconnect - disconnect host client from the me one
+ *     internal function runtime pm has to be already acquired
+ *
+ * @cl: host client
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+static int __mei_cl_disconnect(struct mei_cl *cl)
+{
+	struct mei_device *dev;
+	struct mei_cl_cb *cb;
+	int rets;
+
+	dev = cl->dev;
+
+	cl->state = MEI_FILE_DISCONNECTING;
+
+	cb = mei_io_cb_init(cl, MEI_FOP_DISCONNECT, NULL);
+	rets = cb ? 0 : -ENOMEM;
+	if (rets)
+		goto out;
+
+	cl_dbg(dev, cl, "add disconnect cb to control write list\n");
+	list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+
+	if (mei_hbuf_acquire(dev)) {
+		rets = mei_cl_send_disconnect(cl, cb);
+		if (rets) {
+			cl_err(dev, cl, "failed to disconnect.\n");
+			goto out;
+		}
+	}
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(cl->wait, cl->state == MEI_FILE_DISCONNECT_REPLY,
+			   mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT));
+	mutex_lock(&dev->device_lock);
+
+	rets = cl->status;
+	if (cl->state != MEI_FILE_DISCONNECT_REPLY) {
+		cl_dbg(dev, cl, "timeout on disconnect from FW client.\n");
+		rets = -ETIME;
+	}
+
+out:
+	/* we disconnect also on error */
+	mei_cl_set_disconnected(cl);
+	if (!rets)
+		cl_dbg(dev, cl, "successfully disconnected from FW client.\n");
+
+	mei_io_cb_free(cb);
+	return rets;
+}
+
+/**
  * mei_cl_disconnect - disconnect host client from the me one
  *
  * @cl: host client
@@ -726,7 +904,6 @@ bool mei_hbuf_acquire(struct mei_device *dev)
 int mei_cl_disconnect(struct mei_cl *cl)
 {
 	struct mei_device *dev;
-	struct mei_cl_cb *cb;
 	int rets;
 
 	if (WARN_ON(!cl || !cl->dev))
@@ -736,9 +913,14 @@ int mei_cl_disconnect(struct mei_cl *cl)
 
 	cl_dbg(dev, cl, "disconnecting");
 
-	if (cl->state != MEI_FILE_DISCONNECTING)
+	if (!mei_cl_is_connected(cl))
 		return 0;
 
+	if (mei_cl_is_fixed_address(cl)) {
+		mei_cl_set_disconnected(cl);
+		return 0;
+	}
+
 	rets = pm_runtime_get(dev->dev);
 	if (rets < 0 && rets != -EINPROGRESS) {
 		pm_runtime_put_noidle(dev->dev);
@@ -746,49 +928,12 @@ int mei_cl_disconnect(struct mei_cl *cl)
 		return rets;
 	}
 
-	cb = mei_io_cb_init(cl, MEI_FOP_DISCONNECT, NULL);
-	rets = cb ? 0 : -ENOMEM;
-	if (rets)
-		goto free;
+	rets = __mei_cl_disconnect(cl);
 
-	if (mei_hbuf_acquire(dev)) {
-		if (mei_hbm_cl_disconnect_req(dev, cl)) {
-			rets = -ENODEV;
-			cl_err(dev, cl, "failed to disconnect.\n");
-			goto free;
-		}
-		cl->timer_count = MEI_CONNECT_TIMEOUT;
-		mdelay(10); /* Wait for hardware disconnection ready */
-		list_add_tail(&cb->list, &dev->ctrl_rd_list.list);
-	} else {
-		cl_dbg(dev, cl, "add disconnect cb to control write list\n");
-		list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
-
-	}
-	mutex_unlock(&dev->device_lock);
-
-	wait_event_timeout(cl->wait,
-			MEI_FILE_DISCONNECTED == cl->state,
-			mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT));
-
-	mutex_lock(&dev->device_lock);
-
-	if (MEI_FILE_DISCONNECTED == cl->state) {
-		rets = 0;
-		cl_dbg(dev, cl, "successfully disconnected from FW client.\n");
-	} else {
-		cl_dbg(dev, cl, "timeout on disconnect from FW client.\n");
-		rets = -ETIME;
-	}
-
-	mei_io_list_flush(&dev->ctrl_rd_list, cl);
-	mei_io_list_flush(&dev->ctrl_wr_list, cl);
-free:
 	cl_dbg(dev, cl, "rpm: autosuspend\n");
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
 
-	mei_io_cb_free(cb);
 	return rets;
 }
 
@@ -801,53 +946,119 @@ free:
  *
  * Return: true if other client is connected, false - otherwise.
  */
-bool mei_cl_is_other_connecting(struct mei_cl *cl)
+static bool mei_cl_is_other_connecting(struct mei_cl *cl)
 {
 	struct mei_device *dev;
-	struct mei_cl *ocl; /* the other client */
-
-	if (WARN_ON(!cl || !cl->dev))
-		return false;
+	struct mei_cl_cb *cb;
 
 	dev = cl->dev;
 
-	list_for_each_entry(ocl, &dev->file_list, link) {
-		if (ocl->state == MEI_FILE_CONNECTING &&
-		    ocl != cl &&
-		    cl->me_client_id == ocl->me_client_id)
+	list_for_each_entry(cb, &dev->ctrl_rd_list.list, list) {
+		if (cb->fop_type == MEI_FOP_CONNECT &&
+		    mei_cl_me_id(cl) == mei_cl_me_id(cb->cl))
 			return true;
-
 	}
 
 	return false;
 }
 
 /**
+ * mei_cl_send_connect - send connect request
+ *
+ * @cl: host client
+ * @cb: callback block
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+static int mei_cl_send_connect(struct mei_cl *cl, struct mei_cl_cb *cb)
+{
+	struct mei_device *dev;
+	int ret;
+
+	dev = cl->dev;
+
+	ret = mei_hbm_cl_connect_req(dev, cl);
+	cl->status = ret;
+	if (ret) {
+		cl->state = MEI_FILE_DISCONNECT_REPLY;
+		return ret;
+	}
+
+	list_move_tail(&cb->list, &dev->ctrl_rd_list.list);
+	cl->timer_count = MEI_CONNECT_TIMEOUT;
+	return 0;
+}
+
+/**
+ * mei_cl_irq_connect - send connect request in irq_thread context
+ *
+ * @cl: host client
+ * @cb: callback block
+ * @cmpl_list: complete list
+ *
+ * Return: 0, OK; otherwise, error.
+ */
+int mei_cl_irq_connect(struct mei_cl *cl, struct mei_cl_cb *cb,
+			      struct mei_cl_cb *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	u32 msg_slots;
+	int slots;
+	int rets;
+
+	msg_slots = mei_data2slots(sizeof(struct hbm_client_connect_request));
+	slots = mei_hbuf_empty_slots(dev);
+
+	if (mei_cl_is_other_connecting(cl))
+		return 0;
+
+	if (slots < msg_slots)
+		return -EMSGSIZE;
+
+	rets = mei_cl_send_connect(cl, cb);
+	if (rets)
+		list_move_tail(&cb->list, &cmpl_list->list);
+
+	return rets;
+}
+
+/**
  * mei_cl_connect - connect host client to the me one
  *
  * @cl: host client
+ * @me_cl: me client
  * @file: pointer to file structure
  *
  * Locking: called under "dev->device_lock" lock
  *
  * Return: 0 on success, <0 on failure.
  */
-int mei_cl_connect(struct mei_cl *cl, struct file *file)
+int mei_cl_connect(struct mei_cl *cl, struct mei_me_client *me_cl,
+		   struct file *file)
 {
 	struct mei_device *dev;
 	struct mei_cl_cb *cb;
 	int rets;
 
-	if (WARN_ON(!cl || !cl->dev))
+	if (WARN_ON(!cl || !cl->dev || !me_cl))
 		return -ENODEV;
 
 	dev = cl->dev;
 
+	rets = mei_cl_set_connecting(cl, me_cl);
+	if (rets)
+		return rets;
+
+	if (mei_cl_is_fixed_address(cl)) {
+		cl->state = MEI_FILE_CONNECTED;
+		return 0;
+	}
+
 	rets = pm_runtime_get(dev->dev);
 	if (rets < 0 && rets != -EINPROGRESS) {
 		pm_runtime_put_noidle(dev->dev);
 		cl_err(dev, cl, "rpm: get failed %d\n", rets);
-		return rets;
+		goto nortpm;
 	}
 
 	cb = mei_io_cb_init(cl, MEI_FOP_CONNECT, file);
@@ -855,45 +1066,52 @@ int mei_cl_connect(struct mei_cl *cl, struct file *file)
 	if (rets)
 		goto out;
 
+	list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+
 	/* run hbuf acquire last so we don't have to undo */
 	if (!mei_cl_is_other_connecting(cl) && mei_hbuf_acquire(dev)) {
-		cl->state = MEI_FILE_CONNECTING;
-		if (mei_hbm_cl_connect_req(dev, cl)) {
-			rets = -ENODEV;
+		rets = mei_cl_send_connect(cl, cb);
+		if (rets)
 			goto out;
-		}
-		cl->timer_count = MEI_CONNECT_TIMEOUT;
-		list_add_tail(&cb->list, &dev->ctrl_rd_list.list);
-	} else {
-		cl->state = MEI_FILE_INITIALIZING;
-		list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
 	}
 
 	mutex_unlock(&dev->device_lock);
 	wait_event_timeout(cl->wait,
 			(cl->state == MEI_FILE_CONNECTED ||
-			 cl->state == MEI_FILE_DISCONNECTED),
+			 cl->state == MEI_FILE_DISCONNECT_REQUIRED ||
+			 cl->state == MEI_FILE_DISCONNECT_REPLY),
 			mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT));
 	mutex_lock(&dev->device_lock);
 
 	if (!mei_cl_is_connected(cl)) {
-		cl->state = MEI_FILE_DISCONNECTED;
-		/* something went really wrong */
+		if (cl->state == MEI_FILE_DISCONNECT_REQUIRED) {
+			mei_io_list_flush(&dev->ctrl_rd_list, cl);
+			mei_io_list_flush(&dev->ctrl_wr_list, cl);
+			 /* ignore disconnect return valuue;
+			  * in case of failure reset will be invoked
+			  */
+			__mei_cl_disconnect(cl);
+			rets = -EFAULT;
+			goto out;
+		}
+
+		/* timeout or something went really wrong */
 		if (!cl->status)
 			cl->status = -EFAULT;
-
-		mei_io_list_flush(&dev->ctrl_rd_list, cl);
-		mei_io_list_flush(&dev->ctrl_wr_list, cl);
 	}
 
 	rets = cl->status;
-
 out:
 	cl_dbg(dev, cl, "rpm: autosuspend\n");
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
 
 	mei_io_cb_free(cb);
+
+nortpm:
+	if (!mei_cl_is_connected(cl))
+		mei_cl_set_disconnected(cl);
+
 	return rets;
 }
 
@@ -934,36 +1152,29 @@ err:
  * @cl: private data of the file object
  *
  * Return: 1 if mei_flow_ctrl_creds >0, 0 - otherwise.
- *	-ENOENT if mei_cl is not present
- *	-EINVAL if single_recv_buf == 0
  */
 int mei_cl_flow_ctrl_creds(struct mei_cl *cl)
 {
-	struct mei_device *dev;
-	struct mei_me_client *me_cl;
-	int rets = 0;
+	int rets;
 
-	if (WARN_ON(!cl || !cl->dev))
+	if (WARN_ON(!cl || !cl->me_cl))
 		return -EINVAL;
 
-	dev = cl->dev;
-
 	if (cl->mei_flow_ctrl_creds > 0)
 		return 1;
 
-	me_cl = mei_me_cl_by_uuid_id(dev, &cl->cl_uuid, cl->me_client_id);
-	if (!me_cl) {
-		cl_err(dev, cl, "no such me client %d\n", cl->me_client_id);
-		return -ENOENT;
+	if (mei_cl_is_fixed_address(cl)) {
+		rets = mei_cl_read_start(cl, mei_cl_mtu(cl), NULL);
+		if (rets && rets != -EBUSY)
+			return rets;
+		return 1;
 	}
 
-	if (me_cl->mei_flow_ctrl_creds > 0) {
-		rets = 1;
-		if (WARN_ON(me_cl->props.single_recv_buf == 0))
-			rets = -EINVAL;
+	if (mei_cl_is_single_recv_buf(cl)) {
+		if (cl->me_cl->mei_flow_ctrl_creds > 0)
+			return 1;
 	}
-	mei_me_cl_put(me_cl);
-	return rets;
+	return 0;
 }
 
 /**
@@ -973,46 +1184,244 @@ int mei_cl_flow_ctrl_creds(struct mei_cl *cl)
  *
  * Return:
  *	0 on success
- *	-ENOENT when me client is not found
  *	-EINVAL when ctrl credits are <= 0
  */
 int mei_cl_flow_ctrl_reduce(struct mei_cl *cl)
 {
+	if (WARN_ON(!cl || !cl->me_cl))
+		return -EINVAL;
+
+	if (mei_cl_is_fixed_address(cl))
+		return 0;
+
+	if (mei_cl_is_single_recv_buf(cl)) {
+		if (WARN_ON(cl->me_cl->mei_flow_ctrl_creds <= 0))
+			return -EINVAL;
+		cl->me_cl->mei_flow_ctrl_creds--;
+	} else {
+		if (WARN_ON(cl->mei_flow_ctrl_creds <= 0))
+			return -EINVAL;
+		cl->mei_flow_ctrl_creds--;
+	}
+	return 0;
+}
+
+/**
+ *  mei_cl_notify_fop2req - convert fop to proper request
+ *
+ * @fop: client notification start response command
+ *
+ * Return:  MEI_HBM_NOTIFICATION_START/STOP
+ */
+u8 mei_cl_notify_fop2req(enum mei_cb_file_ops fop)
+{
+	if (fop == MEI_FOP_NOTIFY_START)
+		return MEI_HBM_NOTIFICATION_START;
+	else
+		return MEI_HBM_NOTIFICATION_STOP;
+}
+
+/**
+ *  mei_cl_notify_req2fop - convert notification request top file operation type
+ *
+ * @req: hbm notification request type
+ *
+ * Return:  MEI_FOP_NOTIFY_START/STOP
+ */
+enum mei_cb_file_ops mei_cl_notify_req2fop(u8 req)
+{
+	if (req == MEI_HBM_NOTIFICATION_START)
+		return MEI_FOP_NOTIFY_START;
+	else
+		return MEI_FOP_NOTIFY_STOP;
+}
+
+/**
+ * mei_cl_irq_notify - send notification request in irq_thread context
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_irq_notify(struct mei_cl *cl, struct mei_cl_cb *cb,
+		      struct mei_cl_cb *cmpl_list)
+{
+	struct mei_device *dev = cl->dev;
+	u32 msg_slots;
+	int slots;
+	int ret;
+	bool request;
+
+	msg_slots = mei_data2slots(sizeof(struct hbm_client_connect_request));
+	slots = mei_hbuf_empty_slots(dev);
+
+	if (slots < msg_slots)
+		return -EMSGSIZE;
+
+	request = mei_cl_notify_fop2req(cb->fop_type);
+	ret = mei_hbm_cl_notify_req(dev, cl, request);
+	if (ret) {
+		cl->status = ret;
+		list_move_tail(&cb->list, &cmpl_list->list);
+		return ret;
+	}
+
+	list_move_tail(&cb->list, &dev->ctrl_rd_list.list);
+	return 0;
+}
+
+/**
+ * mei_cl_notify_request - send notification stop/start request
+ *
+ * @cl: host client
+ * @file: associate request with file
+ * @request: 1 for start or 0 for stop
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_notify_request(struct mei_cl *cl, struct file *file, u8 request)
+{
 	struct mei_device *dev;
-	struct mei_me_client *me_cl;
+	struct mei_cl_cb *cb;
+	enum mei_cb_file_ops fop_type;
 	int rets;
 
 	if (WARN_ON(!cl || !cl->dev))
-		return -EINVAL;
+		return -ENODEV;
 
 	dev = cl->dev;
 
-	me_cl = mei_me_cl_by_uuid_id(dev, &cl->cl_uuid, cl->me_client_id);
-	if (!me_cl) {
-		cl_err(dev, cl, "no such me client %d\n", cl->me_client_id);
-		return -ENOENT;
+	if (!dev->hbm_f_ev_supported) {
+		cl_dbg(dev, cl, "notifications not supported\n");
+		return -EOPNOTSUPP;
 	}
 
-	if (me_cl->props.single_recv_buf) {
-		if (WARN_ON(me_cl->mei_flow_ctrl_creds <= 0)) {
-			rets = -EINVAL;
+	rets = pm_runtime_get(dev->dev);
+	if (rets < 0 && rets != -EINPROGRESS) {
+		pm_runtime_put_noidle(dev->dev);
+		cl_err(dev, cl, "rpm: get failed %d\n", rets);
+		return rets;
+	}
+
+	fop_type = mei_cl_notify_req2fop(request);
+	cb = mei_io_cb_init(cl, fop_type, file);
+	if (!cb) {
+		rets = -ENOMEM;
+		goto out;
+	}
+
+	if (mei_hbuf_acquire(dev)) {
+		if (mei_hbm_cl_notify_req(dev, cl, request)) {
+			rets = -ENODEV;
 			goto out;
 		}
-		me_cl->mei_flow_ctrl_creds--;
+		list_add_tail(&cb->list, &dev->ctrl_rd_list.list);
 	} else {
-		if (WARN_ON(cl->mei_flow_ctrl_creds <= 0)) {
-			rets = -EINVAL;
-			goto out;
-		}
-		cl->mei_flow_ctrl_creds--;
+		list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+	}
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(cl->wait, cl->notify_en == request,
+			mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT));
+	mutex_lock(&dev->device_lock);
+
+	if (cl->notify_en != request) {
+		mei_io_list_flush(&dev->ctrl_rd_list, cl);
+		mei_io_list_flush(&dev->ctrl_wr_list, cl);
+		if (!cl->status)
+			cl->status = -EFAULT;
 	}
-	rets = 0;
+
+	rets = cl->status;
+
 out:
-	mei_me_cl_put(me_cl);
+	cl_dbg(dev, cl, "rpm: autosuspend\n");
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
+	mei_io_cb_free(cb);
 	return rets;
 }
 
 /**
+ * mei_cl_notify - raise notification
+ *
+ * @cl: host client
+ *
+ * Locking: called under "dev->device_lock" lock
+ */
+void mei_cl_notify(struct mei_cl *cl)
+{
+	struct mei_device *dev;
+
+	if (!cl || !cl->dev)
+		return;
+
+	dev = cl->dev;
+
+	if (!cl->notify_en)
+		return;
+
+	cl_dbg(dev, cl, "notify event");
+	cl->notify_ev = true;
+	wake_up_interruptible_all(&cl->ev_wait);
+
+	if (cl->ev_async)
+		kill_fasync(&cl->ev_async, SIGIO, POLL_PRI);
+
+	mei_cl_bus_notify_event(cl);
+}
+
+/**
+ * mei_cl_notify_get - get or wait for notification event
+ *
+ * @cl: host client
+ * @block: this request is blocking
+ * @notify_ev: true if notification event was received
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_notify_get(struct mei_cl *cl, bool block, bool *notify_ev)
+{
+	struct mei_device *dev;
+	int rets;
+
+	*notify_ev = false;
+
+	if (WARN_ON(!cl || !cl->dev))
+		return -ENODEV;
+
+	dev = cl->dev;
+
+	if (!mei_cl_is_connected(cl))
+		return -ENODEV;
+
+	if (cl->notify_ev)
+		goto out;
+
+	if (!block)
+		return -EAGAIN;
+
+	mutex_unlock(&dev->device_lock);
+	rets = wait_event_interruptible(cl->ev_wait, cl->notify_ev);
+	mutex_lock(&dev->device_lock);
+
+	if (rets < 0)
+		return rets;
+
+out:
+	*notify_ev = cl->notify_ev;
+	cl->notify_ev = false;
+	return 0;
+}
+
+/**
  * mei_cl_read_start - the start read client message function.
  *
  * @cl: host client
@@ -1025,7 +1434,6 @@ int mei_cl_read_start(struct mei_cl *cl, size_t length, struct file *fp)
 {
 	struct mei_device *dev;
 	struct mei_cl_cb *cb;
-	struct mei_me_client *me_cl;
 	int rets;
 
 	if (WARN_ON(!cl || !cl->dev))
@@ -1040,27 +1448,29 @@ int mei_cl_read_start(struct mei_cl *cl, size_t length, struct file *fp)
 	if (!list_empty(&cl->rd_pending))
 		return -EBUSY;
 
-	me_cl = mei_me_cl_by_uuid_id(dev, &cl->cl_uuid, cl->me_client_id);
-	if (!me_cl) {
-		cl_err(dev, cl, "no such me client %d\n", cl->me_client_id);
+	if (!mei_me_cl_is_active(cl->me_cl)) {
+		cl_err(dev, cl, "no such me client\n");
 		return  -ENOTTY;
 	}
+
 	/* always allocate at least client max message */
-	length = max_t(size_t, length, me_cl->props.max_msg_length);
-	mei_me_cl_put(me_cl);
+	length = max_t(size_t, length, mei_cl_mtu(cl));
+	cb = mei_cl_alloc_cb(cl, length, MEI_FOP_READ, fp);
+	if (!cb)
+		return -ENOMEM;
+
+	if (mei_cl_is_fixed_address(cl)) {
+		list_add_tail(&cb->list, &cl->rd_pending);
+		return 0;
+	}
 
 	rets = pm_runtime_get(dev->dev);
 	if (rets < 0 && rets != -EINPROGRESS) {
 		pm_runtime_put_noidle(dev->dev);
 		cl_err(dev, cl, "rpm: get failed %d\n", rets);
-		return rets;
+		goto nortpm;
 	}
 
-	cb = mei_cl_alloc_cb(cl, length, MEI_FOP_READ, fp);
-	rets = cb ? 0 : -ENOMEM;
-	if (rets)
-		goto out;
-
 	if (mei_hbuf_acquire(dev)) {
 		rets = mei_hbm_cl_flow_control_req(dev, cl);
 		if (rets < 0)
@@ -1068,6 +1478,7 @@ int mei_cl_read_start(struct mei_cl *cl, size_t length, struct file *fp)
 
 		list_add_tail(&cb->list, &cl->rd_pending);
 	} else {
+		rets = 0;
 		list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
 	}
 
@@ -1075,7 +1486,7 @@ out:
 	cl_dbg(dev, cl, "rpm: autosuspend\n");
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
-
+nortpm:
 	if (rets)
 		mei_io_cb_free(cb);
 
@@ -1102,6 +1513,7 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
 	u32 msg_slots;
 	int slots;
 	int rets;
+	bool first_chunk;
 
 	if (WARN_ON(!cl || !cl->dev))
 		return -ENODEV;
@@ -1110,7 +1522,9 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
 
 	buf = &cb->buf;
 
-	rets = mei_cl_flow_ctrl_creds(cl);
+	first_chunk = cb->buf_idx == 0;
+
+	rets = first_chunk ? mei_cl_flow_ctrl_creds(cl) : 1;
 	if (rets < 0)
 		return rets;
 
@@ -1123,8 +1537,8 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
 	len = buf->size - cb->buf_idx;
 	msg_slots = mei_data2slots(len);
 
-	mei_hdr.host_addr = cl->host_client_id;
-	mei_hdr.me_addr = cl->me_client_id;
+	mei_hdr.host_addr = mei_cl_host_addr(cl);
+	mei_hdr.me_addr = mei_cl_me_id(cl);
 	mei_hdr.reserved = 0;
 	mei_hdr.internal = cb->internal;
 
@@ -1157,12 +1571,14 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
 	cb->buf_idx += mei_hdr.length;
 	cb->completed = mei_hdr.msg_complete == 1;
 
-	if (mei_hdr.msg_complete) {
+	if (first_chunk) {
 		if (mei_cl_flow_ctrl_reduce(cl))
 			return -EIO;
-		list_move_tail(&cb->list, &dev->write_waiting_list.list);
 	}
 
+	if (mei_hdr.msg_complete)
+		list_move_tail(&cb->list, &dev->write_waiting_list.list);
+
 	return 0;
 }
 
@@ -1181,6 +1597,7 @@ int mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, bool blocking)
 	struct mei_device *dev;
 	struct mei_msg_data *buf;
 	struct mei_msg_hdr mei_hdr;
+	int size;
 	int rets;
 
 
@@ -1192,10 +1609,10 @@ int mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, bool blocking)
 
 	dev = cl->dev;
 
-
 	buf = &cb->buf;
+	size = buf->size;
 
-	cl_dbg(dev, cl, "size=%d\n", buf->size);
+	cl_dbg(dev, cl, "size=%d\n", size);
 
 	rets = pm_runtime_get(dev->dev);
 	if (rets < 0 && rets != -EINPROGRESS) {
@@ -1207,8 +1624,8 @@ int mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, bool blocking)
 	cb->buf_idx = 0;
 	cl->writing_state = MEI_IDLE;
 
-	mei_hdr.host_addr = cl->host_client_id;
-	mei_hdr.me_addr = cl->me_client_id;
+	mei_hdr.host_addr = mei_cl_host_addr(cl);
+	mei_hdr.me_addr = mei_cl_me_id(cl);
 	mei_hdr.reserved = 0;
 	mei_hdr.msg_complete = 0;
 	mei_hdr.internal = cb->internal;
@@ -1219,21 +1636,21 @@ int mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, bool blocking)
 
 	if (rets == 0) {
 		cl_dbg(dev, cl, "No flow control credentials: not sending.\n");
-		rets = buf->size;
+		rets = size;
 		goto out;
 	}
 	if (!mei_hbuf_acquire(dev)) {
 		cl_dbg(dev, cl, "Cannot acquire the host buffer: not sending.\n");
-		rets = buf->size;
+		rets = size;
 		goto out;
 	}
 
 	/* Check for a maximum length */
-	if (buf->size > mei_hbuf_max_len(dev)) {
+	if (size > mei_hbuf_max_len(dev)) {
 		mei_hdr.length = mei_hbuf_max_len(dev);
 		mei_hdr.msg_complete = 0;
 	} else {
-		mei_hdr.length = buf->size;
+		mei_hdr.length = size;
 		mei_hdr.msg_complete = 1;
 	}
 
@@ -1241,22 +1658,21 @@ int mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, bool blocking)
 	if (rets)
 		goto err;
 
+	rets = mei_cl_flow_ctrl_reduce(cl);
+	if (rets)
+		goto err;
+
 	cl->writing_state = MEI_WRITING;
 	cb->buf_idx = mei_hdr.length;
 	cb->completed = mei_hdr.msg_complete == 1;
 
 out:
-	if (mei_hdr.msg_complete) {
-		rets = mei_cl_flow_ctrl_reduce(cl);
-		if (rets < 0)
-			goto err;
-
+	if (mei_hdr.msg_complete)
 		list_add_tail(&cb->list, &dev->write_waiting_list.list);
-	} else {
+	else
 		list_add_tail(&cb->list, &dev->write_list.list);
-	}
-
 
+	cb = NULL;
 	if (blocking && cl->writing_state != MEI_WRITE_COMPLETE) {
 
 		mutex_unlock(&dev->device_lock);
@@ -1271,7 +1687,7 @@ out:
 		}
 	}
 
-	rets = buf->size;
+	rets = size;
 err:
 	cl_dbg(dev, cl, "rpm: autosuspend\n");
 	pm_runtime_mark_last_busy(dev->dev);
@@ -1289,20 +1705,38 @@ err:
  */
 void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb)
 {
-	if (cb->fop_type == MEI_FOP_WRITE) {
+	struct mei_device *dev = cl->dev;
+
+	switch (cb->fop_type) {
+	case MEI_FOP_WRITE:
 		mei_io_cb_free(cb);
-		cb = NULL;
 		cl->writing_state = MEI_WRITE_COMPLETE;
-		if (waitqueue_active(&cl->tx_wait))
+		if (waitqueue_active(&cl->tx_wait)) {
 			wake_up_interruptible(&cl->tx_wait);
+		} else {
+			pm_runtime_mark_last_busy(dev->dev);
+			pm_request_autosuspend(dev->dev);
+		}
+		break;
 
-	} else if (cb->fop_type == MEI_FOP_READ) {
+	case MEI_FOP_READ:
 		list_add_tail(&cb->list, &cl->rd_completed);
 		if (waitqueue_active(&cl->rx_wait))
 			wake_up_interruptible_all(&cl->rx_wait);
 		else
 			mei_cl_bus_rx_event(cl);
-
+		break;
+
+	case MEI_FOP_CONNECT:
+	case MEI_FOP_DISCONNECT:
+	case MEI_FOP_NOTIFY_STOP:
+	case MEI_FOP_NOTIFY_START:
+		if (waitqueue_active(&cl->wait))
+			wake_up(&cl->wait);
+
+		break;
+	default:
+		BUG_ON(0);
 	}
 }
 
@@ -1312,16 +1746,12 @@ void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb)
  *
  * @dev: mei device
  */
-
 void mei_cl_all_disconnect(struct mei_device *dev)
 {
 	struct mei_cl *cl;
 
-	list_for_each_entry(cl, &dev->file_list, link) {
-		cl->state = MEI_FILE_DISCONNECTED;
-		cl->mei_flow_ctrl_creds = 0;
-		cl->timer_count = 0;
-	}
+	list_for_each_entry(cl, &dev->file_list, link)
+		mei_cl_set_disconnected(cl);
 }
 
 
@@ -1343,6 +1773,12 @@ void mei_cl_all_wakeup(struct mei_device *dev)
 			cl_dbg(dev, cl, "Waking up writing client!\n");
 			wake_up_interruptible(&cl->tx_wait);
 		}
+
+		/* synchronized under device mutex */
+		if (waitqueue_active(&cl->ev_wait)) {
+			cl_dbg(dev, cl, "Waking up waiting for event clients!\n");
+			wake_up_interruptible(&cl->ev_wait);
+		}
 	}
 }
 
diff --git a/kernel/drivers/misc/mei/client.h b/kernel/drivers/misc/mei/client.h
index 0a39e5d45..04e1aa392 100644
--- a/kernel/drivers/misc/mei/client.h
+++ b/kernel/drivers/misc/mei/client.h
@@ -44,6 +44,42 @@ void mei_me_cl_rm_by_uuid_id(struct mei_device *dev,
 			     const uuid_le *uuid, u8 id);
 void mei_me_cl_rm_all(struct mei_device *dev);
 
+/**
+ * mei_me_cl_is_active - check whether me client is active in the fw
+ *
+ * @me_cl: me client
+ *
+ * Return: true if the me client is active in the firmware
+ */
+static inline bool mei_me_cl_is_active(const struct mei_me_client *me_cl)
+{
+	return !list_empty_careful(&me_cl->list);
+}
+
+/**
+ * mei_me_cl_uuid - return me client protocol name (uuid)
+ *
+ * @me_cl: me client
+ *
+ * Return: me client protocol name
+ */
+static inline const uuid_le *mei_me_cl_uuid(const struct mei_me_client *me_cl)
+{
+	return &me_cl->props.protocol_name;
+}
+
+/**
+ * mei_me_cl_ver - return me client protocol version
+ *
+ * @me_cl: me client
+ *
+ * Return: me client protocol version
+ */
+static inline u8 mei_me_cl_ver(const struct mei_me_client *me_cl)
+{
+	return me_cl->props.protocol_version;
+}
+
 /*
  * MEI IO Functions
  */
@@ -94,18 +130,96 @@ int mei_cl_flow_ctrl_reduce(struct mei_cl *cl);
 /**
  * mei_cl_is_connected - host client is connected
  *
- * @cl: host clinet
+ * @cl: host client
  *
- * Return: true if the host clinet is connected
+ * Return: true if the host client is connected
  */
 static inline bool mei_cl_is_connected(struct mei_cl *cl)
 {
 	return  cl->state == MEI_FILE_CONNECTED;
 }
 
-bool mei_cl_is_other_connecting(struct mei_cl *cl);
+/**
+ * mei_cl_me_id - me client id
+ *
+ * @cl: host client
+ *
+ * Return: me client id or 0 if client is not connected
+ */
+static inline u8 mei_cl_me_id(const struct mei_cl *cl)
+{
+	return cl->me_cl ? cl->me_cl->client_id : 0;
+}
+
+/**
+ * mei_cl_mtu - maximal message that client can send and receive
+ *
+ * @cl: host client
+ *
+ * Return: mtu
+ */
+static inline size_t mei_cl_mtu(const struct mei_cl *cl)
+{
+	return cl->me_cl->props.max_msg_length;
+}
+
+/**
+ * mei_cl_is_fixed_address - check whether the me client uses fixed address
+ *
+ * @cl: host client
+ *
+ * Return: true if the client is connected and it has fixed me address
+ */
+static inline bool mei_cl_is_fixed_address(const struct mei_cl *cl)
+{
+	return cl->me_cl && cl->me_cl->props.fixed_address;
+}
+
+/**
+ * mei_cl_is_single_recv_buf- check whether the me client
+ *       uses single receiving buffer
+ *
+ * @cl: host client
+ *
+ * Return: true if single_recv_buf == 1; 0 otherwise
+ */
+static inline bool mei_cl_is_single_recv_buf(const struct mei_cl *cl)
+{
+	return cl->me_cl->props.single_recv_buf;
+}
+
+/**
+ * mei_cl_uuid -  client's uuid
+ *
+ * @cl: host client
+ *
+ * Return: return uuid of connected me client
+ */
+static inline const uuid_le *mei_cl_uuid(const struct mei_cl *cl)
+{
+	return mei_me_cl_uuid(cl->me_cl);
+}
+
+/**
+ * mei_cl_host_addr - client's host address
+ *
+ * @cl: host client
+ *
+ * Return: 0 for fixed address client, host address for dynamic client
+ */
+static inline u8 mei_cl_host_addr(const struct mei_cl *cl)
+{
+	return  mei_cl_is_fixed_address(cl) ? 0 : cl->host_client_id;
+}
+
 int mei_cl_disconnect(struct mei_cl *cl);
-int mei_cl_connect(struct mei_cl *cl, struct file *file);
+void mei_cl_set_disconnected(struct mei_cl *cl);
+int mei_cl_irq_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb,
+			  struct mei_cl_cb *cmpl_list);
+int mei_cl_connect(struct mei_cl *cl, struct mei_me_client *me_cl,
+		   struct file *file);
+int mei_cl_irq_connect(struct mei_cl *cl, struct mei_cl_cb *cb,
+			      struct mei_cl_cb *cmpl_list);
 int mei_cl_read_start(struct mei_cl *cl, size_t length, struct file *fp);
 int mei_cl_irq_read_msg(struct mei_cl *cl, struct mei_msg_hdr *hdr,
 			struct mei_cl_cb *cmpl_list);
@@ -117,14 +231,20 @@ void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb);
 
 void mei_host_client_init(struct work_struct *work);
 
-
+u8 mei_cl_notify_fop2req(enum mei_cb_file_ops fop);
+enum mei_cb_file_ops mei_cl_notify_req2fop(u8 request);
+int mei_cl_notify_request(struct mei_cl *cl, struct file *file, u8 request);
+int mei_cl_irq_notify(struct mei_cl *cl, struct mei_cl_cb *cb,
+		      struct mei_cl_cb *cmpl_list);
+int mei_cl_notify_get(struct mei_cl *cl, bool block, bool *notify_ev);
+void mei_cl_notify(struct mei_cl *cl);
 
 void mei_cl_all_disconnect(struct mei_device *dev);
 void mei_cl_all_wakeup(struct mei_device *dev);
 void mei_cl_all_write_clear(struct mei_device *dev);
 
 #define MEI_CL_FMT "cl:host=%02d me=%02d "
-#define MEI_CL_PRM(cl) (cl)->host_client_id, (cl)->me_client_id
+#define MEI_CL_PRM(cl) (cl)->host_client_id, mei_cl_me_id(cl)
 
 #define cl_dbg(dev, cl, format, arg...) \
 	dev_dbg((dev)->dev, MEI_CL_FMT format, MEI_CL_PRM(cl), ##arg)
diff --git a/kernel/drivers/misc/mei/debugfs.c b/kernel/drivers/misc/mei/debugfs.c
index d9cd7e6ee..a138d8a27 100644
--- a/kernel/drivers/misc/mei/debugfs.c
+++ b/kernel/drivers/misc/mei/debugfs.c
@@ -116,7 +116,7 @@ static ssize_t mei_dbgfs_read_active(struct file *fp, char __user *ubuf,
 
 		pos += scnprintf(buf + pos, bufsz - pos,
 			"%2d|%2d|%4d|%5d|%2d|%2d|\n",
-			i, cl->me_client_id, cl->host_client_id, cl->state,
+			i, mei_cl_me_id(cl), cl->host_client_id, cl->state,
 			!list_empty(&cl->rd_completed), cl->writing_state);
 		i++;
 	}
@@ -149,6 +149,19 @@ static ssize_t mei_dbgfs_read_devstate(struct file *fp, char __user *ubuf,
 			mei_dev_state_str(dev->dev_state));
 	pos += scnprintf(buf + pos, bufsz - pos, "hbm: %s\n",
 			mei_hbm_state_str(dev->hbm_state));
+
+	if (dev->hbm_state == MEI_HBM_STARTED) {
+		pos += scnprintf(buf + pos, bufsz - pos, "hbm features:\n");
+		pos += scnprintf(buf + pos, bufsz - pos, "\tPG: %01d\n",
+				 dev->hbm_f_pg_supported);
+		pos += scnprintf(buf + pos, bufsz - pos, "\tDC: %01d\n",
+				 dev->hbm_f_dc_supported);
+		pos += scnprintf(buf + pos, bufsz - pos, "\tDOT: %01d\n",
+				 dev->hbm_f_dot_supported);
+		pos += scnprintf(buf + pos, bufsz - pos, "\tEV: %01d\n",
+				 dev->hbm_f_ev_supported);
+	}
+
 	pos += scnprintf(buf + pos, bufsz - pos, "pg:  %s, %s\n",
 			mei_pg_is_enabled(dev) ? "ENABLED" : "DISABLED",
 			mei_pg_state_str(mei_pg_state(dev)));
@@ -191,6 +204,8 @@ int mei_dbgfs_register(struct mei_device *dev, const char *name)
 	if (!dir)
 		return -ENOMEM;
 
+	dev->dbgfs_dir = dir;
+
 	f = debugfs_create_file("meclients", S_IRUSR, dir,
 				dev, &mei_dbgfs_fops_meclients);
 	if (!f) {
@@ -200,7 +215,7 @@ int mei_dbgfs_register(struct mei_device *dev, const char *name)
 	f = debugfs_create_file("active", S_IRUSR, dir,
 				dev, &mei_dbgfs_fops_active);
 	if (!f) {
-		dev_err(dev->dev, "meclients: registration failed\n");
+		dev_err(dev->dev, "active: registration failed\n");
 		goto err;
 	}
 	f = debugfs_create_file("devstate", S_IRUSR, dir,
@@ -209,7 +224,12 @@ int mei_dbgfs_register(struct mei_device *dev, const char *name)
 		dev_err(dev->dev, "devstate: registration failed\n");
 		goto err;
 	}
-	dev->dbgfs_dir = dir;
+	f = debugfs_create_bool("allow_fixed_address", S_IRUSR | S_IWUSR, dir,
+				&dev->allow_fixed_address);
+	if (!f) {
+		dev_err(dev->dev, "allow_fixed_address: registration failed\n");
+		goto err;
+	}
 	return 0;
 err:
 	mei_dbgfs_deregister(dev);
diff --git a/kernel/drivers/misc/mei/hbm.c b/kernel/drivers/misc/mei/hbm.c
index 58da92565..e7b7aad09 100644
--- a/kernel/drivers/misc/mei/hbm.c
+++ b/kernel/drivers/misc/mei/hbm.c
@@ -52,6 +52,7 @@ static const char *mei_cl_conn_status_str(enum mei_cl_connect_status status)
 	MEI_CL_CS(ALREADY_STARTED);
 	MEI_CL_CS(OUT_OF_RESOURCES);
 	MEI_CL_CS(MESSAGE_SMALL);
+	MEI_CL_CS(NOT_ALLOWED);
 	default: return "unknown";
 	}
 #undef MEI_CL_CCS
@@ -89,6 +90,7 @@ static int mei_cl_conn_status_to_errno(enum mei_cl_connect_status status)
 	case MEI_CL_CONN_ALREADY_STARTED:  return -EBUSY;
 	case MEI_CL_CONN_OUT_OF_RESOURCES: return -EBUSY;
 	case MEI_CL_CONN_MESSAGE_SMALL:    return -EINVAL;
+	case MEI_CL_CONN_NOT_ALLOWED:      return -EBUSY;
 	default:                           return -EINVAL;
 	}
 }
@@ -150,8 +152,8 @@ void mei_hbm_cl_hdr(struct mei_cl *cl, u8 hbm_cmd, void *buf, size_t len)
 	memset(cmd, 0, len);
 
 	cmd->hbm_cmd = hbm_cmd;
-	cmd->host_addr = cl->host_client_id;
-	cmd->me_addr = cl->me_client_id;
+	cmd->host_addr = mei_cl_host_addr(cl);
+	cmd->me_addr = mei_cl_me_id(cl);
 }
 
 /**
@@ -188,8 +190,8 @@ int mei_hbm_cl_write(struct mei_device *dev,
 static inline
 bool mei_hbm_cl_addr_equal(struct mei_cl *cl, struct mei_hbm_cl_cmd *cmd)
 {
-	return cl->host_client_id == cmd->host_addr &&
-		cl->me_client_id == cmd->me_addr;
+	return  mei_cl_host_addr(cl) == cmd->host_addr &&
+		mei_cl_me_id(cl) == cmd->me_addr;
 }
 
 /**
@@ -279,7 +281,7 @@ int mei_hbm_start_req(struct mei_device *dev)
 	return 0;
 }
 
-/*
+/**
  * mei_hbm_enum_clients_req - sends enumeration client request message.
  *
  * @dev: the device structure
@@ -299,6 +301,7 @@ static int mei_hbm_enum_clients_req(struct mei_device *dev)
 	enum_req = (struct hbm_host_enum_request *)dev->wr_msg.data;
 	memset(enum_req, 0, len);
 	enum_req->hbm_cmd = HOST_ENUM_REQ_CMD;
+	enum_req->allow_add = dev->hbm_f_dc_supported;
 
 	ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
 	if (ret) {
@@ -311,7 +314,7 @@ static int mei_hbm_enum_clients_req(struct mei_device *dev)
 	return 0;
 }
 
-/*
+/**
  * mei_hbm_me_cl_add - add new me client to the list
  *
  * @dev: the device structure
@@ -344,6 +347,180 @@ static int mei_hbm_me_cl_add(struct mei_device *dev,
 }
 
 /**
+ * mei_hbm_add_cl_resp - send response to fw on client add request
+ *
+ * @dev: the device structure
+ * @addr: me address
+ * @status: response status
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+static int mei_hbm_add_cl_resp(struct mei_device *dev, u8 addr, u8 status)
+{
+	struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
+	struct hbm_add_client_response *resp;
+	const size_t len = sizeof(struct hbm_add_client_response);
+	int ret;
+
+	dev_dbg(dev->dev, "adding client response\n");
+
+	resp = (struct hbm_add_client_response *)dev->wr_msg.data;
+
+	mei_hbm_hdr(mei_hdr, len);
+	memset(resp, 0, sizeof(struct hbm_add_client_response));
+
+	resp->hbm_cmd = MEI_HBM_ADD_CLIENT_RES_CMD;
+	resp->me_addr = addr;
+	resp->status  = status;
+
+	ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+	if (ret)
+		dev_err(dev->dev, "add client response write failed: ret = %d\n",
+			ret);
+	return ret;
+}
+
+/**
+ * mei_hbm_fw_add_cl_req - request from the fw to add a client
+ *
+ * @dev: the device structure
+ * @req: add client request
+ *
+ * Return: 0 on success and < 0 on failure
+ */
+static int mei_hbm_fw_add_cl_req(struct mei_device *dev,
+			      struct hbm_add_client_request *req)
+{
+	int ret;
+	u8 status = MEI_HBMS_SUCCESS;
+
+	BUILD_BUG_ON(sizeof(struct hbm_add_client_request) !=
+			sizeof(struct hbm_props_response));
+
+	ret = mei_hbm_me_cl_add(dev, (struct hbm_props_response *)req);
+	if (ret)
+		status = !MEI_HBMS_SUCCESS;
+
+	return mei_hbm_add_cl_resp(dev, req->me_addr, status);
+}
+
+/**
+ * mei_hbm_cl_notify_req - send notification request
+ *
+ * @dev: the device structure
+ * @cl: a client to disconnect from
+ * @start: true for start false for stop
+ *
+ * Return: 0 on success and -EIO on write failure
+ */
+int mei_hbm_cl_notify_req(struct mei_device *dev,
+			  struct mei_cl *cl, u8 start)
+{
+
+	struct mei_msg_hdr *mei_hdr = &dev->wr_msg.hdr;
+	struct hbm_notification_request *req;
+	const size_t len = sizeof(struct hbm_notification_request);
+	int ret;
+
+	mei_hbm_hdr(mei_hdr, len);
+	mei_hbm_cl_hdr(cl, MEI_HBM_NOTIFY_REQ_CMD, dev->wr_msg.data, len);
+
+	req = (struct hbm_notification_request *)dev->wr_msg.data;
+	req->start = start;
+
+	ret = mei_write_message(dev, mei_hdr, dev->wr_msg.data);
+	if (ret)
+		dev_err(dev->dev, "notify request failed: ret = %d\n", ret);
+
+	return ret;
+}
+
+/**
+ *  notify_res_to_fop - convert notification response to the proper
+ *      notification FOP
+ *
+ * @cmd: client notification start response command
+ *
+ * Return:  MEI_FOP_NOTIFY_START or MEI_FOP_NOTIFY_STOP;
+ */
+static inline enum mei_cb_file_ops notify_res_to_fop(struct mei_hbm_cl_cmd *cmd)
+{
+	struct hbm_notification_response *rs =
+		(struct hbm_notification_response *)cmd;
+
+	return mei_cl_notify_req2fop(rs->start);
+}
+
+/**
+ * mei_hbm_cl_notify_start_res - update the client state according
+ *       notify start response
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ * @cmd: client notification start response command
+ */
+static void mei_hbm_cl_notify_start_res(struct mei_device *dev,
+					struct mei_cl *cl,
+					struct mei_hbm_cl_cmd *cmd)
+{
+	struct hbm_notification_response *rs =
+		(struct hbm_notification_response *)cmd;
+
+	cl_dbg(dev, cl, "hbm: notify start response status=%d\n", rs->status);
+
+	if (rs->status == MEI_HBMS_SUCCESS ||
+	    rs->status == MEI_HBMS_ALREADY_STARTED) {
+		cl->notify_en = true;
+		cl->status = 0;
+	} else {
+		cl->status = -EINVAL;
+	}
+}
+
+/**
+ * mei_hbm_cl_notify_stop_res - update the client state according
+ *       notify stop response
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ * @cmd: client notification stop response command
+ */
+static void mei_hbm_cl_notify_stop_res(struct mei_device *dev,
+				       struct mei_cl *cl,
+				       struct mei_hbm_cl_cmd *cmd)
+{
+	struct hbm_notification_response *rs =
+		(struct hbm_notification_response *)cmd;
+
+	cl_dbg(dev, cl, "hbm: notify stop response status=%d\n", rs->status);
+
+	if (rs->status == MEI_HBMS_SUCCESS ||
+	    rs->status == MEI_HBMS_NOT_STARTED) {
+		cl->notify_en = false;
+		cl->status = 0;
+	} else {
+		/* TODO: spec is not clear yet about other possible issues */
+		cl->status = -EINVAL;
+	}
+}
+
+/**
+ * mei_hbm_cl_notify - signal notification event
+ *
+ * @dev: the device structure
+ * @cmd: notification client message
+ */
+static void mei_hbm_cl_notify(struct mei_device *dev,
+			      struct mei_hbm_cl_cmd *cmd)
+{
+	struct mei_cl *cl;
+
+	cl = mei_hbm_cl_find_by_cmd(dev, cmd);
+	if (cl)
+		mei_cl_notify(cl);
+}
+
+/**
  * mei_hbm_prop_req - request property for a single client
  *
  * @dev: the device structure
@@ -392,7 +569,7 @@ static int mei_hbm_prop_req(struct mei_device *dev)
 	return 0;
 }
 
-/*
+/**
  * mei_hbm_pg - sends pg command
  *
  * @dev: the device structure
@@ -572,7 +749,7 @@ static void mei_hbm_cl_disconnect_res(struct mei_device *dev, struct mei_cl *cl,
 	cl_dbg(dev, cl, "hbm: disconnect response status=%d\n", rs->status);
 
 	if (rs->status == MEI_CL_DISCONN_SUCCESS)
-		cl->state = MEI_FILE_DISCONNECTED;
+		cl->state = MEI_FILE_DISCONNECT_REPLY;
 	cl->status = 0;
 }
 
@@ -610,8 +787,11 @@ static void mei_hbm_cl_connect_res(struct mei_device *dev, struct mei_cl *cl,
 
 	if (rs->status == MEI_CL_CONN_SUCCESS)
 		cl->state = MEI_FILE_CONNECTED;
-	else
-		cl->state = MEI_FILE_DISCONNECTED;
+	else {
+		cl->state = MEI_FILE_DISCONNECT_REPLY;
+		if (rs->status == MEI_CL_CONN_NOT_FOUND)
+			mei_me_cl_del(dev, cl->me_cl);
+	}
 	cl->status = mei_cl_conn_status_to_errno(rs->status);
 }
 
@@ -654,6 +834,12 @@ static void mei_hbm_cl_res(struct mei_device *dev,
 	case MEI_FOP_DISCONNECT:
 		mei_hbm_cl_disconnect_res(dev, cl, rs);
 		break;
+	case MEI_FOP_NOTIFY_START:
+		mei_hbm_cl_notify_start_res(dev, cl, rs);
+		break;
+	case MEI_FOP_NOTIFY_STOP:
+		mei_hbm_cl_notify_stop_res(dev, cl, rs);
+		break;
 	default:
 		return;
 	}
@@ -680,8 +866,8 @@ static int mei_hbm_fw_disconnect_req(struct mei_device *dev,
 
 	cl = mei_hbm_cl_find_by_cmd(dev, disconnect_req);
 	if (cl) {
-		cl_dbg(dev, cl, "disconnect request received\n");
-		cl->state = MEI_FILE_DISCONNECTED;
+		cl_dbg(dev, cl, "fw disconnect request received\n");
+		cl->state = MEI_FILE_DISCONNECTING;
 		cl->timer_count = 0;
 
 		cb = mei_io_cb_init(cl, MEI_FOP_DISCONNECT_RSP, NULL);
@@ -694,6 +880,79 @@ static int mei_hbm_fw_disconnect_req(struct mei_device *dev,
 }
 
 /**
+ * mei_hbm_pg_enter_res - PG enter response received
+ *
+ * @dev: the device structure.
+ *
+ * Return: 0 on success, -EPROTO on state mismatch
+ */
+static int mei_hbm_pg_enter_res(struct mei_device *dev)
+{
+	if (mei_pg_state(dev) != MEI_PG_OFF ||
+	    dev->pg_event != MEI_PG_EVENT_WAIT) {
+		dev_err(dev->dev, "hbm: pg entry response: state mismatch [%s, %d]\n",
+			mei_pg_state_str(mei_pg_state(dev)), dev->pg_event);
+		return -EPROTO;
+	}
+
+	dev->pg_event = MEI_PG_EVENT_RECEIVED;
+	wake_up(&dev->wait_pg);
+
+	return 0;
+}
+
+/**
+ * mei_hbm_pg_resume - process with PG resume
+ *
+ * @dev: the device structure.
+ */
+void mei_hbm_pg_resume(struct mei_device *dev)
+{
+	pm_request_resume(dev->dev);
+}
+EXPORT_SYMBOL_GPL(mei_hbm_pg_resume);
+
+/**
+ * mei_hbm_pg_exit_res - PG exit response received
+ *
+ * @dev: the device structure.
+ *
+ * Return: 0 on success, -EPROTO on state mismatch
+ */
+static int mei_hbm_pg_exit_res(struct mei_device *dev)
+{
+	if (mei_pg_state(dev) != MEI_PG_ON ||
+	    (dev->pg_event != MEI_PG_EVENT_WAIT &&
+	     dev->pg_event != MEI_PG_EVENT_IDLE)) {
+		dev_err(dev->dev, "hbm: pg exit response: state mismatch [%s, %d]\n",
+			mei_pg_state_str(mei_pg_state(dev)), dev->pg_event);
+		return -EPROTO;
+	}
+
+	switch (dev->pg_event) {
+	case MEI_PG_EVENT_WAIT:
+		dev->pg_event = MEI_PG_EVENT_RECEIVED;
+		wake_up(&dev->wait_pg);
+		break;
+	case MEI_PG_EVENT_IDLE:
+		/*
+		* If the driver is not waiting on this then
+		* this is HW initiated exit from PG.
+		* Start runtime pm resume sequence to exit from PG.
+		*/
+		dev->pg_event = MEI_PG_EVENT_RECEIVED;
+		mei_hbm_pg_resume(dev);
+		break;
+	default:
+		WARN(1, "hbm: pg exit response: unexpected pg event = %d\n",
+		     dev->pg_event);
+		return -EPROTO;
+	}
+
+	return 0;
+}
+
+/**
  * mei_hbm_config_features - check what hbm features and commands
  *        are supported by the fw
  *
@@ -709,6 +968,17 @@ static void mei_hbm_config_features(struct mei_device *dev)
 	if (dev->version.major_version == HBM_MAJOR_VERSION_PGI &&
 	    dev->version.minor_version >= HBM_MINOR_VERSION_PGI)
 		dev->hbm_f_pg_supported = 1;
+
+	if (dev->version.major_version >= HBM_MAJOR_VERSION_DC)
+		dev->hbm_f_dc_supported = 1;
+
+	/* disconnect on connect timeout instead of link reset */
+	if (dev->version.major_version >= HBM_MAJOR_VERSION_DOT)
+		dev->hbm_f_dot_supported = 1;
+
+	/* Notification Event Support */
+	if (dev->version.major_version >= HBM_MAJOR_VERSION_EV)
+		dev->hbm_f_ev_supported = 1;
 }
 
 /**
@@ -740,6 +1010,8 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 	struct hbm_host_version_response *version_res;
 	struct hbm_props_response *props_res;
 	struct hbm_host_enum_response *enum_res;
+	struct hbm_add_client_request *add_cl_req;
+	int ret;
 
 	struct mei_hbm_cl_cmd *cl_cmd;
 	struct hbm_client_connect_request *disconnect_req;
@@ -828,24 +1100,17 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 		break;
 
 	case MEI_PG_ISOLATION_ENTRY_RES_CMD:
-		dev_dbg(dev->dev, "power gate isolation entry response received\n");
-		dev->pg_event = MEI_PG_EVENT_RECEIVED;
-		if (waitqueue_active(&dev->wait_pg))
-			wake_up(&dev->wait_pg);
+		dev_dbg(dev->dev, "hbm: power gate isolation entry response received\n");
+		ret = mei_hbm_pg_enter_res(dev);
+		if (ret)
+			return ret;
 		break;
 
 	case MEI_PG_ISOLATION_EXIT_REQ_CMD:
-		dev_dbg(dev->dev, "power gate isolation exit request received\n");
-		dev->pg_event = MEI_PG_EVENT_RECEIVED;
-		if (waitqueue_active(&dev->wait_pg))
-			wake_up(&dev->wait_pg);
-		else
-			/*
-			* If the driver is not waiting on this then
-			* this is HW initiated exit from PG.
-			* Start runtime pm resume sequence to exit from PG.
-			*/
-			pm_request_resume(dev->dev);
+		dev_dbg(dev->dev, "hbm: power gate isolation exit request received\n");
+		ret = mei_hbm_pg_exit_res(dev);
+		if (ret)
+			return ret;
 		break;
 
 	case HOST_CLIENT_PROPERTIES_RES_CMD:
@@ -937,6 +1202,39 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 			return -EIO;
 		}
 		break;
+
+	case MEI_HBM_ADD_CLIENT_REQ_CMD:
+		dev_dbg(dev->dev, "hbm: add client request received\n");
+		/*
+		 * after the host receives the enum_resp
+		 * message clients may be added or removed
+		 */
+		if (dev->hbm_state <= MEI_HBM_ENUM_CLIENTS ||
+		    dev->hbm_state >= MEI_HBM_STOPPED) {
+			dev_err(dev->dev, "hbm: add client: state mismatch, [%d, %d]\n",
+				dev->dev_state, dev->hbm_state);
+			return -EPROTO;
+		}
+		add_cl_req = (struct hbm_add_client_request *)mei_msg;
+		ret = mei_hbm_fw_add_cl_req(dev, add_cl_req);
+		if (ret) {
+			dev_err(dev->dev, "hbm: add client: failed to send response %d\n",
+				ret);
+			return -EIO;
+		}
+		dev_dbg(dev->dev, "hbm: add client request processed\n");
+		break;
+
+	case MEI_HBM_NOTIFY_RES_CMD:
+		dev_dbg(dev->dev, "hbm: notify response received\n");
+		mei_hbm_cl_res(dev, cl_cmd, notify_res_to_fop(cl_cmd));
+		break;
+
+	case MEI_HBM_NOTIFICATION_CMD:
+		dev_dbg(dev->dev, "hbm: notification\n");
+		mei_hbm_cl_notify(dev, cl_cmd);
+		break;
+
 	default:
 		BUG();
 		break;
diff --git a/kernel/drivers/misc/mei/hbm.h b/kernel/drivers/misc/mei/hbm.h
index 2544db7d1..a2025a508 100644
--- a/kernel/drivers/misc/mei/hbm.h
+++ b/kernel/drivers/misc/mei/hbm.h
@@ -54,6 +54,9 @@ int mei_hbm_cl_disconnect_rsp(struct mei_device *dev, struct mei_cl *cl);
 int mei_hbm_cl_connect_req(struct mei_device *dev, struct mei_cl *cl);
 bool mei_hbm_version_is_supported(struct mei_device *dev);
 int mei_hbm_pg(struct mei_device *dev, u8 pg_cmd);
+void mei_hbm_pg_resume(struct mei_device *dev);
+int mei_hbm_cl_notify_req(struct mei_device *dev,
+			  struct mei_cl *cl, u8 request);
 
 #endif /* _MEI_HBM_H_ */
 
diff --git a/kernel/drivers/misc/mei/hw-me-regs.h b/kernel/drivers/misc/mei/hw-me-regs.h
index 9eb7ed70a..a8a68acd3 100644
--- a/kernel/drivers/misc/mei/hw-me-regs.h
+++ b/kernel/drivers/misc/mei/hw-me-regs.h
@@ -117,12 +117,17 @@
 #define MEI_DEV_ID_WPT_LP     0x9CBA  /* Wildcat Point LP */
 #define MEI_DEV_ID_WPT_LP_2   0x9CBB  /* Wildcat Point LP 2 */
 
+#define MEI_DEV_ID_SPT        0x9D3A  /* Sunrise Point */
+#define MEI_DEV_ID_SPT_2      0x9D3B  /* Sunrise Point 2 */
+#define MEI_DEV_ID_SPT_H      0xA13A  /* Sunrise Point H */
+#define MEI_DEV_ID_SPT_H_2    0xA13B  /* Sunrise Point H 2 */
 /*
  * MEI HW Section
  */
 
 /* Host Firmware Status Registers in PCI Config Space */
 #define PCI_CFG_HFS_1         0x40
+#  define PCI_CFG_HFS_1_D0I3_MSK     0x80000000
 #define PCI_CFG_HFS_2         0x48
 #define PCI_CFG_HFS_3         0x60
 #define PCI_CFG_HFS_4         0x64
@@ -140,7 +145,8 @@
 #define ME_CSR_HA  0xC
 /* H_HGC_CSR - PGI register */
 #define H_HPG_CSR  0x10
-
+/* H_D0I3C - D0I3 Control  */
+#define H_D0I3C    0x800
 
 /* register bits of H_CSR (Host Control Status register) */
 /* Host Circular Buffer Depth - maximum number of 32-bit entries in CB */
@@ -159,7 +165,14 @@
 #define H_IS              0x00000002
 /* Host Interrupt Enable */
 #define H_IE              0x00000001
+/* Host D0I3 Interrupt Enable */
+#define H_D0I3C_IE        0x00000020
+/* Host D0I3 Interrupt Status */
+#define H_D0I3C_IS        0x00000040
 
+/* H_CSR masks */
+#define H_CSR_IE_MASK     (H_IE | H_D0I3C_IE)
+#define H_CSR_IS_MASK     (H_IS | H_D0I3C_IS)
 
 /* register bits of ME_CSR_HA (ME Control Status Host Access register) */
 /* ME CB (Circular Buffer) Depth HRA (Host Read Access) - host read only
@@ -183,8 +196,14 @@ access to ME_CBD */
 #define ME_IE_HRA         0x00000001
 
 
-/* register bits - H_HPG_CSR */
-#define H_HPG_CSR_PGIHEXR       0x00000001
-#define H_HPG_CSR_PGI           0x00000002
+/* H_HPG_CSR register bits */
+#define H_HPG_CSR_PGIHEXR 0x00000001
+#define H_HPG_CSR_PGI     0x00000002
+
+/* H_D0I3C register bits */
+#define H_D0I3C_CIP      0x00000001
+#define H_D0I3C_IR       0x00000002
+#define H_D0I3C_I3       0x00000004
+#define H_D0I3C_RR       0x00000008
 
 #endif /* _MEI_HW_MEI_REGS_H_ */
diff --git a/kernel/drivers/misc/mei/hw-me.c b/kernel/drivers/misc/mei/hw-me.c
index 43d7101ff..25b1997a6 100644
--- a/kernel/drivers/misc/mei/hw-me.c
+++ b/kernel/drivers/misc/mei/hw-me.c
@@ -134,11 +134,40 @@ static inline void mei_hcsr_write(struct mei_device *dev, u32 reg)
  */
 static inline void mei_hcsr_set(struct mei_device *dev, u32 reg)
 {
-	reg &= ~H_IS;
+	reg &= ~H_CSR_IS_MASK;
 	mei_hcsr_write(dev, reg);
 }
 
 /**
+ * mei_me_d0i3c_read - Reads 32bit data from the D0I3C register
+ *
+ * @dev: the device structure
+ *
+ * Return: H_D0I3C register value (u32)
+ */
+static inline u32 mei_me_d0i3c_read(const struct mei_device *dev)
+{
+	u32 reg;
+
+	reg = mei_me_reg_read(to_me_hw(dev), H_D0I3C);
+	trace_mei_reg_read(dev->dev, "H_D0I3C", H_D0I3C, reg);
+
+	return reg;
+}
+
+/**
+ * mei_me_d0i3c_write - writes H_D0I3C register to device
+ *
+ * @dev: the device structure
+ * @reg: new register value
+ */
+static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg)
+{
+	trace_mei_reg_write(dev->dev, "H_D0I3C", H_D0I3C, reg);
+	mei_me_reg_write(to_me_hw(dev), H_D0I3C, reg);
+}
+
+/**
  * mei_me_fw_status - read fw status register from pci config space
  *
  * @dev: mei device
@@ -176,12 +205,25 @@ static int mei_me_fw_status(struct mei_device *dev,
  */
 static void mei_me_hw_config(struct mei_device *dev)
 {
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
 	struct mei_me_hw *hw = to_me_hw(dev);
-	u32 hcsr = mei_hcsr_read(dev);
+	u32 hcsr, reg;
+
 	/* Doesn't change in runtime */
+	hcsr = mei_hcsr_read(dev);
 	dev->hbuf_depth = (hcsr & H_CBD) >> 24;
 
+	reg = 0;
+	pci_read_config_dword(pdev, PCI_CFG_HFS_1, &reg);
+	hw->d0i3_supported =
+		((reg & PCI_CFG_HFS_1_D0I3_MSK) == PCI_CFG_HFS_1_D0I3_MSK);
+
 	hw->pg_state = MEI_PG_OFF;
+	if (hw->d0i3_supported) {
+		reg = mei_me_d0i3c_read(dev);
+		if (reg & H_D0I3C_I3)
+			hw->pg_state = MEI_PG_ON;
+	}
 }
 
 /**
@@ -208,7 +250,7 @@ static void mei_me_intr_clear(struct mei_device *dev)
 {
 	u32 hcsr = mei_hcsr_read(dev);
 
-	if ((hcsr & H_IS) == H_IS)
+	if (hcsr & H_CSR_IS_MASK)
 		mei_hcsr_write(dev, hcsr);
 }
 /**
@@ -220,7 +262,7 @@ static void mei_me_intr_enable(struct mei_device *dev)
 {
 	u32 hcsr = mei_hcsr_read(dev);
 
-	hcsr |= H_IE;
+	hcsr |= H_CSR_IE_MASK;
 	mei_hcsr_set(dev, hcsr);
 }
 
@@ -233,7 +275,7 @@ static void mei_me_intr_disable(struct mei_device *dev)
 {
 	u32 hcsr = mei_hcsr_read(dev);
 
-	hcsr  &= ~H_IE;
+	hcsr  &= ~H_CSR_IE_MASK;
 	mei_hcsr_set(dev, hcsr);
 }
 
@@ -253,57 +295,6 @@ static void mei_me_hw_reset_release(struct mei_device *dev)
 	/* complete this write before we set host ready on another CPU */
 	mmiowb();
 }
-/**
- * mei_me_hw_reset - resets fw via mei csr register.
- *
- * @dev: the device structure
- * @intr_enable: if interrupt should be enabled after reset.
- *
- * Return: always 0
- */
-static int mei_me_hw_reset(struct mei_device *dev, bool intr_enable)
-{
-	u32 hcsr = mei_hcsr_read(dev);
-
-	/* H_RST may be found lit before reset is started,
-	 * for example if preceding reset flow hasn't completed.
-	 * In that case asserting H_RST will be ignored, therefore
-	 * we need to clean H_RST bit to start a successful reset sequence.
-	 */
-	if ((hcsr & H_RST) == H_RST) {
-		dev_warn(dev->dev, "H_RST is set = 0x%08X", hcsr);
-		hcsr &= ~H_RST;
-		mei_hcsr_set(dev, hcsr);
-		hcsr = mei_hcsr_read(dev);
-	}
-
-	hcsr |= H_RST | H_IG | H_IS;
-
-	if (intr_enable)
-		hcsr |= H_IE;
-	else
-		hcsr &= ~H_IE;
-
-	dev->recvd_hw_ready = false;
-	mei_hcsr_write(dev, hcsr);
-
-	/*
-	 * Host reads the H_CSR once to ensure that the
-	 * posted write to H_CSR completes.
-	 */
-	hcsr = mei_hcsr_read(dev);
-
-	if ((hcsr & H_RST) == 0)
-		dev_warn(dev->dev, "H_RST is not set = 0x%08X", hcsr);
-
-	if ((hcsr & H_RDY) == H_RDY)
-		dev_warn(dev->dev, "H_RDY is not cleared 0x%08X", hcsr);
-
-	if (intr_enable == false)
-		mei_me_hw_reset_release(dev);
-
-	return 0;
-}
 
 /**
  * mei_me_host_set_ready - enable device
@@ -314,7 +305,7 @@ static void mei_me_host_set_ready(struct mei_device *dev)
 {
 	u32 hcsr = mei_hcsr_read(dev);
 
-	hcsr |= H_IE | H_IG | H_RDY;
+	hcsr |= H_CSR_IE_MASK | H_IG | H_RDY;
 	mei_hcsr_set(dev, hcsr);
 }
 
@@ -601,13 +592,13 @@ static void mei_me_pg_unset(struct mei_device *dev)
 }
 
 /**
- * mei_me_pg_enter_sync - perform pg entry procedure
+ * mei_me_pg_legacy_enter_sync - perform legacy pg entry procedure
  *
  * @dev: the device structure
  *
  * Return: 0 on success an error code otherwise
  */
-int mei_me_pg_enter_sync(struct mei_device *dev)
+static int mei_me_pg_legacy_enter_sync(struct mei_device *dev)
 {
 	struct mei_me_hw *hw = to_me_hw(dev);
 	unsigned long timeout = mei_secs_to_jiffies(MEI_PGI_TIMEOUT);
@@ -638,13 +629,13 @@ int mei_me_pg_enter_sync(struct mei_device *dev)
 }
 
 /**
- * mei_me_pg_exit_sync - perform pg exit procedure
+ * mei_me_pg_legacy_exit_sync - perform legacy pg exit procedure
  *
  * @dev: the device structure
  *
  * Return: 0 on success an error code otherwise
  */
-int mei_me_pg_exit_sync(struct mei_device *dev)
+static int mei_me_pg_legacy_exit_sync(struct mei_device *dev)
 {
 	struct mei_me_hw *hw = to_me_hw(dev);
 	unsigned long timeout = mei_secs_to_jiffies(MEI_PGI_TIMEOUT);
@@ -712,8 +703,12 @@ static bool mei_me_pg_in_transition(struct mei_device *dev)
  */
 static bool mei_me_pg_is_enabled(struct mei_device *dev)
 {
+	struct mei_me_hw *hw = to_me_hw(dev);
 	u32 reg = mei_me_mecsr_read(dev);
 
+	if (hw->d0i3_supported)
+		return true;
+
 	if ((reg & ME_PGIC_HRA) == 0)
 		goto notsupported;
 
@@ -723,7 +718,8 @@ static bool mei_me_pg_is_enabled(struct mei_device *dev)
 	return true;
 
 notsupported:
-	dev_dbg(dev->dev, "pg: not supported: HGP = %d hbm version %d.%d ?= %d.%d\n",
+	dev_dbg(dev->dev, "pg: not supported: d0i3 = %d HGP = %d hbm version %d.%d ?= %d.%d\n",
+		hw->d0i3_supported,
 		!!(reg & ME_PGIC_HRA),
 		dev->version.major_version,
 		dev->version.minor_version,
@@ -734,11 +730,211 @@ notsupported:
 }
 
 /**
- * mei_me_pg_intr - perform pg processing in interrupt thread handler
+ * mei_me_d0i3_set - write d0i3 register bit on mei device.
  *
  * @dev: the device structure
+ * @intr: ask for interrupt
+ *
+ * Return: D0I3C register value
  */
-static void mei_me_pg_intr(struct mei_device *dev)
+static u32 mei_me_d0i3_set(struct mei_device *dev, bool intr)
+{
+	u32 reg = mei_me_d0i3c_read(dev);
+
+	reg |= H_D0I3C_I3;
+	if (intr)
+		reg |= H_D0I3C_IR;
+	else
+		reg &= ~H_D0I3C_IR;
+	mei_me_d0i3c_write(dev, reg);
+	/* read it to ensure HW consistency */
+	reg = mei_me_d0i3c_read(dev);
+	return reg;
+}
+
+/**
+ * mei_me_d0i3_unset - clean d0i3 register bit on mei device.
+ *
+ * @dev: the device structure
+ *
+ * Return: D0I3C register value
+ */
+static u32 mei_me_d0i3_unset(struct mei_device *dev)
+{
+	u32 reg = mei_me_d0i3c_read(dev);
+
+	reg &= ~H_D0I3C_I3;
+	reg |= H_D0I3C_IR;
+	mei_me_d0i3c_write(dev, reg);
+	/* read it to ensure HW consistency */
+	reg = mei_me_d0i3c_read(dev);
+	return reg;
+}
+
+/**
+ * mei_me_d0i3_enter_sync - perform d0i3 entry procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_d0i3_enter_sync(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	unsigned long d0i3_timeout = mei_secs_to_jiffies(MEI_D0I3_TIMEOUT);
+	unsigned long pgi_timeout = mei_secs_to_jiffies(MEI_PGI_TIMEOUT);
+	int ret;
+	u32 reg;
+
+	reg = mei_me_d0i3c_read(dev);
+	if (reg & H_D0I3C_I3) {
+		/* we are in d0i3, nothing to do */
+		dev_dbg(dev->dev, "d0i3 set not needed\n");
+		ret = 0;
+		goto on;
+	}
+
+	/* PGI entry procedure */
+	dev->pg_event = MEI_PG_EVENT_WAIT;
+
+	ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_ENTRY_REQ_CMD);
+	if (ret)
+		/* FIXME: should we reset here? */
+		goto out;
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_pg,
+		dev->pg_event == MEI_PG_EVENT_RECEIVED, pgi_timeout);
+	mutex_lock(&dev->device_lock);
+
+	if (dev->pg_event != MEI_PG_EVENT_RECEIVED) {
+		ret = -ETIME;
+		goto out;
+	}
+	/* end PGI entry procedure */
+
+	dev->pg_event = MEI_PG_EVENT_INTR_WAIT;
+
+	reg = mei_me_d0i3_set(dev, true);
+	if (!(reg & H_D0I3C_CIP)) {
+		dev_dbg(dev->dev, "d0i3 enter wait not needed\n");
+		ret = 0;
+		goto on;
+	}
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_pg,
+		dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, d0i3_timeout);
+	mutex_lock(&dev->device_lock);
+
+	if (dev->pg_event != MEI_PG_EVENT_INTR_RECEIVED) {
+		reg = mei_me_d0i3c_read(dev);
+		if (!(reg & H_D0I3C_I3)) {
+			ret = -ETIME;
+			goto out;
+		}
+	}
+
+	ret = 0;
+on:
+	hw->pg_state = MEI_PG_ON;
+out:
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+	dev_dbg(dev->dev, "d0i3 enter ret = %d\n", ret);
+	return ret;
+}
+
+/**
+ * mei_me_d0i3_enter - perform d0i3 entry procedure
+ *   no hbm PG handshake
+ *   no waiting for confirmation; runs with interrupts
+ *   disabled
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_d0i3_enter(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	u32 reg;
+
+	reg = mei_me_d0i3c_read(dev);
+	if (reg & H_D0I3C_I3) {
+		/* we are in d0i3, nothing to do */
+		dev_dbg(dev->dev, "already d0i3 : set not needed\n");
+		goto on;
+	}
+
+	mei_me_d0i3_set(dev, false);
+on:
+	hw->pg_state = MEI_PG_ON;
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+	dev_dbg(dev->dev, "d0i3 enter\n");
+	return 0;
+}
+
+/**
+ * mei_me_d0i3_exit_sync - perform d0i3 exit procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_d0i3_exit_sync(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	unsigned long timeout = mei_secs_to_jiffies(MEI_D0I3_TIMEOUT);
+	int ret;
+	u32 reg;
+
+	dev->pg_event = MEI_PG_EVENT_INTR_WAIT;
+
+	reg = mei_me_d0i3c_read(dev);
+	if (!(reg & H_D0I3C_I3)) {
+		/* we are not in d0i3, nothing to do */
+		dev_dbg(dev->dev, "d0i3 exit not needed\n");
+		ret = 0;
+		goto off;
+	}
+
+	reg = mei_me_d0i3_unset(dev);
+	if (!(reg & H_D0I3C_CIP)) {
+		dev_dbg(dev->dev, "d0i3 exit wait not needed\n");
+		ret = 0;
+		goto off;
+	}
+
+	mutex_unlock(&dev->device_lock);
+	wait_event_timeout(dev->wait_pg,
+		dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, timeout);
+	mutex_lock(&dev->device_lock);
+
+	if (dev->pg_event != MEI_PG_EVENT_INTR_RECEIVED) {
+		reg = mei_me_d0i3c_read(dev);
+		if (reg & H_D0I3C_I3) {
+			ret = -ETIME;
+			goto out;
+		}
+	}
+
+	ret = 0;
+off:
+	hw->pg_state = MEI_PG_OFF;
+out:
+	dev->pg_event = MEI_PG_EVENT_IDLE;
+
+	dev_dbg(dev->dev, "d0i3 exit ret = %d\n", ret);
+	return ret;
+}
+
+/**
+ * mei_me_pg_legacy_intr - perform legacy pg processing
+ *			   in interrupt thread handler
+ *
+ * @dev: the device structure
+ */
+static void mei_me_pg_legacy_intr(struct mei_device *dev)
 {
 	struct mei_me_hw *hw = to_me_hw(dev);
 
@@ -752,6 +948,162 @@ static void mei_me_pg_intr(struct mei_device *dev)
 }
 
 /**
+ * mei_me_d0i3_intr - perform d0i3 processing in interrupt thread handler
+ *
+ * @dev: the device structure
+ */
+static void mei_me_d0i3_intr(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	if (dev->pg_event == MEI_PG_EVENT_INTR_WAIT &&
+	    (hw->intr_source & H_D0I3C_IS)) {
+		dev->pg_event = MEI_PG_EVENT_INTR_RECEIVED;
+		if (hw->pg_state == MEI_PG_ON) {
+			hw->pg_state = MEI_PG_OFF;
+			if (dev->hbm_state != MEI_HBM_IDLE) {
+				/*
+				 * force H_RDY because it could be
+				 * wiped off during PG
+				 */
+				dev_dbg(dev->dev, "d0i3 set host ready\n");
+				mei_me_host_set_ready(dev);
+			}
+		} else {
+			hw->pg_state = MEI_PG_ON;
+		}
+
+		wake_up(&dev->wait_pg);
+	}
+
+	if (hw->pg_state == MEI_PG_ON && (hw->intr_source & H_IS)) {
+		/*
+		 * HW sent some data and we are in D0i3, so
+		 * we got here because of HW initiated exit from D0i3.
+		 * Start runtime pm resume sequence to exit low power state.
+		 */
+		dev_dbg(dev->dev, "d0i3 want resume\n");
+		mei_hbm_pg_resume(dev);
+	}
+}
+
+/**
+ * mei_me_pg_intr - perform pg processing in interrupt thread handler
+ *
+ * @dev: the device structure
+ */
+static void mei_me_pg_intr(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	if (hw->d0i3_supported)
+		mei_me_d0i3_intr(dev);
+	else
+		mei_me_pg_legacy_intr(dev);
+}
+
+/**
+ * mei_me_pg_enter_sync - perform runtime pm entry procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+int mei_me_pg_enter_sync(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	if (hw->d0i3_supported)
+		return mei_me_d0i3_enter_sync(dev);
+	else
+		return mei_me_pg_legacy_enter_sync(dev);
+}
+
+/**
+ * mei_me_pg_exit_sync - perform runtime pm exit procedure
+ *
+ * @dev: the device structure
+ *
+ * Return: 0 on success an error code otherwise
+ */
+int mei_me_pg_exit_sync(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	if (hw->d0i3_supported)
+		return mei_me_d0i3_exit_sync(dev);
+	else
+		return mei_me_pg_legacy_exit_sync(dev);
+}
+
+/**
+ * mei_me_hw_reset - resets fw via mei csr register.
+ *
+ * @dev: the device structure
+ * @intr_enable: if interrupt should be enabled after reset.
+ *
+ * Return: 0 on success an error code otherwise
+ */
+static int mei_me_hw_reset(struct mei_device *dev, bool intr_enable)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	int ret;
+	u32 hcsr;
+
+	if (intr_enable) {
+		mei_me_intr_enable(dev);
+		if (hw->d0i3_supported) {
+			ret = mei_me_d0i3_exit_sync(dev);
+			if (ret)
+				return ret;
+		}
+	}
+
+	hcsr = mei_hcsr_read(dev);
+	/* H_RST may be found lit before reset is started,
+	 * for example if preceding reset flow hasn't completed.
+	 * In that case asserting H_RST will be ignored, therefore
+	 * we need to clean H_RST bit to start a successful reset sequence.
+	 */
+	if ((hcsr & H_RST) == H_RST) {
+		dev_warn(dev->dev, "H_RST is set = 0x%08X", hcsr);
+		hcsr &= ~H_RST;
+		mei_hcsr_set(dev, hcsr);
+		hcsr = mei_hcsr_read(dev);
+	}
+
+	hcsr |= H_RST | H_IG | H_CSR_IS_MASK;
+
+	if (!intr_enable)
+		hcsr &= ~H_CSR_IE_MASK;
+
+	dev->recvd_hw_ready = false;
+	mei_hcsr_write(dev, hcsr);
+
+	/*
+	 * Host reads the H_CSR once to ensure that the
+	 * posted write to H_CSR completes.
+	 */
+	hcsr = mei_hcsr_read(dev);
+
+	if ((hcsr & H_RST) == 0)
+		dev_warn(dev->dev, "H_RST is not set = 0x%08X", hcsr);
+
+	if ((hcsr & H_RDY) == H_RDY)
+		dev_warn(dev->dev, "H_RDY is not cleared 0x%08X", hcsr);
+
+	if (!intr_enable) {
+		mei_me_hw_reset_release(dev);
+		if (hw->d0i3_supported) {
+			ret = mei_me_d0i3_enter(dev);
+			if (ret)
+				return ret;
+		}
+	}
+	return 0;
+}
+
+/**
  * mei_me_irq_quick_handler - The ISR of the MEI device
  *
  * @irq: The irq number
@@ -759,16 +1111,20 @@ static void mei_me_pg_intr(struct mei_device *dev)
  *
  * Return: irqreturn_t
  */
-
 irqreturn_t mei_me_irq_quick_handler(int irq, void *dev_id)
 {
-	struct mei_device *dev = (struct mei_device *) dev_id;
-	u32 hcsr = mei_hcsr_read(dev);
+	struct mei_device *dev = (struct mei_device *)dev_id;
+	struct mei_me_hw *hw = to_me_hw(dev);
+	u32 hcsr;
 
-	if ((hcsr & H_IS) != H_IS)
+	hcsr = mei_hcsr_read(dev);
+	if (!(hcsr & H_CSR_IS_MASK))
 		return IRQ_NONE;
 
-	/* clear H_IS bit in H_CSR */
+	hw->intr_source = hcsr & H_CSR_IS_MASK;
+	dev_dbg(dev->dev, "interrupt source 0x%08X.\n", hw->intr_source);
+
+	/* clear H_IS and H_D0I3C_IS bits in H_CSR to clear the interrupts */
 	mei_hcsr_write(dev, hcsr);
 
 	return IRQ_WAKE_THREAD;
@@ -796,11 +1152,6 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id)
 	mutex_lock(&dev->device_lock);
 	mei_io_list_init(&complete_list);
 
-	/* Ack the interrupt here
-	 * In case of MSI we don't go through the quick handler */
-	if (pci_dev_msi_enabled(to_pci_dev(dev->dev)))
-		mei_clear_interrupts(dev);
-
 	/* check if ME wants a reset */
 	if (!mei_hw_is_ready(dev) && dev->dev_state != MEI_DEV_RESETTING) {
 		dev_warn(dev->dev, "FW not ready: resetting.\n");
diff --git a/kernel/drivers/misc/mei/hw-me.h b/kernel/drivers/misc/mei/hw-me.h
index 6022d52af..2ee14dc1b 100644
--- a/kernel/drivers/misc/mei/hw-me.h
+++ b/kernel/drivers/misc/mei/hw-me.h
@@ -50,13 +50,17 @@ struct mei_cfg {
  * struct mei_me_hw - me hw specific data
  *
  * @cfg: per device generation config and ops
- * @mem_addr:  io memory address
- * @pg_state:      power gating state
+ * @mem_addr: io memory address
+ * @intr_source: interrupt source
+ * @pg_state: power gating state
+ * @d0i3_supported: di03 support
  */
 struct mei_me_hw {
 	const struct mei_cfg *cfg;
 	void __iomem *mem_addr;
+	u32 intr_source;
 	enum mei_pg_state pg_state;
+	bool d0i3_supported;
 };
 
 #define to_me_hw(dev) (struct mei_me_hw *)((dev)->hw)
diff --git a/kernel/drivers/misc/mei/hw.h b/kernel/drivers/misc/mei/hw.h
index 16fef6dc4..4cebde859 100644
--- a/kernel/drivers/misc/mei/hw.h
+++ b/kernel/drivers/misc/mei/hw.h
@@ -31,14 +31,15 @@
 #define MEI_IAMTHIF_STALL_TIMER    12  /* HPS */
 #define MEI_IAMTHIF_READ_TIMER     10  /* HPS */
 
-#define MEI_PGI_TIMEOUT            1  /* PG Isolation time response 1 sec */
-#define MEI_HBM_TIMEOUT            1   /* 1 second */
+#define MEI_PGI_TIMEOUT             1  /* PG Isolation time response 1 sec */
+#define MEI_D0I3_TIMEOUT            5  /* D0i3 set/unset max response time */
+#define MEI_HBM_TIMEOUT             1  /* 1 second */
 
 /*
  * MEI Version
  */
-#define HBM_MINOR_VERSION                   1
-#define HBM_MAJOR_VERSION                   1
+#define HBM_MINOR_VERSION                   0
+#define HBM_MAJOR_VERSION                   2
 
 /*
  * MEI version with PGI support
@@ -46,6 +47,24 @@
 #define HBM_MINOR_VERSION_PGI               1
 #define HBM_MAJOR_VERSION_PGI               1
 
+/*
+ * MEI version with Dynamic clients support
+ */
+#define HBM_MINOR_VERSION_DC               0
+#define HBM_MAJOR_VERSION_DC               2
+
+/*
+ * MEI version with disconnect on connection timeout support
+ */
+#define HBM_MINOR_VERSION_DOT              0
+#define HBM_MAJOR_VERSION_DOT              2
+
+/*
+ * MEI version with notifcation support
+ */
+#define HBM_MINOR_VERSION_EV               0
+#define HBM_MAJOR_VERSION_EV               2
+
 /* Host bus message command opcode */
 #define MEI_HBM_CMD_OP_MSK                  0x7f
 /* Host bus message command RESPONSE */
@@ -81,6 +100,13 @@
 #define MEI_PG_ISOLATION_EXIT_REQ_CMD       0x0b
 #define MEI_PG_ISOLATION_EXIT_RES_CMD       0x8b
 
+#define MEI_HBM_ADD_CLIENT_REQ_CMD          0x0f
+#define MEI_HBM_ADD_CLIENT_RES_CMD          0x8f
+
+#define MEI_HBM_NOTIFY_REQ_CMD              0x10
+#define MEI_HBM_NOTIFY_RES_CMD              0x90
+#define MEI_HBM_NOTIFICATION_CMD            0x11
+
 /*
  * MEI Stop Reason
  * used by hbm_host_stop_request.reason
@@ -136,6 +162,7 @@ enum mei_cl_connect_status {
 	MEI_CL_CONN_ALREADY_STARTED  = MEI_HBMS_ALREADY_EXISTS,
 	MEI_CL_CONN_OUT_OF_RESOURCES = MEI_HBMS_REJECTED,
 	MEI_CL_CONN_MESSAGE_SMALL    = MEI_HBMS_INVALID_PARAMETER,
+	MEI_CL_CONN_NOT_ALLOWED      = MEI_HBMS_NOT_ALLOWED,
 };
 
 /*
@@ -213,9 +240,17 @@ struct hbm_me_stop_request {
 	u8 reserved[2];
 } __packed;
 
+/**
+ * struct hbm_host_enum_request -  enumeration request from host to fw
+ *
+ * @hbm_cmd: bus message command header
+ * @allow_add: allow dynamic clients add HBM version >= 2.0
+ * @reserved: reserved
+ */
 struct hbm_host_enum_request {
 	u8 hbm_cmd;
-	u8 reserved[3];
+	u8 allow_add;
+	u8 reserved[2];
 } __packed;
 
 struct hbm_host_enum_response {
@@ -248,6 +283,38 @@ struct hbm_props_response {
 } __packed;
 
 /**
+ * struct hbm_add_client_request - request to add a client
+ *     might be sent by fw after enumeration has already completed
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @reserved: reserved
+ * @client_properties: client properties
+ */
+struct hbm_add_client_request {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 reserved[2];
+	struct mei_client_properties client_properties;
+} __packed;
+
+/**
+ * struct hbm_add_client_response - response to add a client
+ *     sent by the host to report client addition status to fw
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @status: if HBMS_SUCCESS then the client can now accept connections.
+ * @reserved: reserved
+ */
+struct hbm_add_client_response {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 status;
+	u8 reserved[1];
+} __packed;
+
+/**
  * struct hbm_power_gate - power gate request/response
  *
  * @hbm_cmd: bus message command header
@@ -298,5 +365,62 @@ struct hbm_flow_control {
 	u8 reserved[MEI_FC_MESSAGE_RESERVED_LENGTH];
 } __packed;
 
+#define MEI_HBM_NOTIFICATION_START 1
+#define MEI_HBM_NOTIFICATION_STOP  0
+/**
+ * struct hbm_notification_request - start/stop notification request
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: address of the client in the driver
+ * @start:  start = 1 or stop = 0 asynchronous notifications
+ */
+struct hbm_notification_request {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 host_addr;
+	u8 start;
+} __packed;
+
+/**
+ * struct hbm_notification_response - start/stop notification response
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr: address of the client in ME
+ * @host_addr: - address of the client in the driver
+ * @status: (mei_hbm_status) response status for the request
+ *  - MEI_HBMS_SUCCESS: successful stop/start
+ *  - MEI_HBMS_CLIENT_NOT_FOUND: if the connection could not be found.
+ *  - MEI_HBMS_ALREADY_STARTED: for start requests for a previously
+ *                         started notification.
+ *  - MEI_HBMS_NOT_STARTED: for stop request for a connected client for whom
+ *                         asynchronous notifications are currently disabled.
+ *
+ * @start:  start = 1 or stop = 0 asynchronous notifications
+ * @reserved: reserved
+ */
+struct hbm_notification_response {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 host_addr;
+	u8 status;
+	u8 start;
+	u8 reserved[3];
+} __packed;
+
+/**
+ * struct hbm_notification - notification event
+ *
+ * @hbm_cmd: bus message command header
+ * @me_addr:  address of the client in ME
+ * @host_addr:  address of the client in the driver
+ * @reserved: reserved for alignment
+ */
+struct hbm_notification {
+	u8 hbm_cmd;
+	u8 me_addr;
+	u8 host_addr;
+	u8 reserved[1];
+} __packed;
 
 #endif
diff --git a/kernel/drivers/misc/mei/init.c b/kernel/drivers/misc/mei/init.c
index 97353cf8d..3edafc8d3 100644
--- a/kernel/drivers/misc/mei/init.c
+++ b/kernel/drivers/misc/mei/init.c
@@ -329,12 +329,10 @@ void mei_stop(struct mei_device *dev)
 {
 	dev_dbg(dev->dev, "stopping the device.\n");
 
-	mei_cancel_work(dev);
-
-	mei_nfc_host_exit(dev);
-
 	mei_cl_bus_remove_devices(dev);
 
+	mei_cancel_work(dev);
+
 	mutex_lock(&dev->device_lock);
 
 	mei_wd_stop(dev);
@@ -361,13 +359,15 @@ bool mei_write_is_idle(struct mei_device *dev)
 {
 	bool idle = (dev->dev_state == MEI_DEV_ENABLED &&
 		list_empty(&dev->ctrl_wr_list.list) &&
-		list_empty(&dev->write_list.list));
+		list_empty(&dev->write_list.list)   &&
+		list_empty(&dev->write_waiting_list.list));
 
-	dev_dbg(dev->dev, "write pg: is idle[%d] state=%s ctrl=%d write=%d\n",
+	dev_dbg(dev->dev, "write pg: is idle[%d] state=%s ctrl=%01d write=%01d wwait=%01d\n",
 		idle,
 		mei_dev_state_str(dev->dev_state),
 		list_empty(&dev->ctrl_wr_list.list),
-		list_empty(&dev->write_list.list));
+		list_empty(&dev->write_list.list),
+		list_empty(&dev->write_waiting_list.list));
 
 	return idle;
 }
@@ -390,6 +390,7 @@ void mei_device_init(struct mei_device *dev,
 	INIT_LIST_HEAD(&dev->me_clients);
 	mutex_init(&dev->device_lock);
 	init_rwsem(&dev->me_clients_rwsem);
+	mutex_init(&dev->cl_bus_lock);
 	init_waitqueue_head(&dev->wait_hw_ready);
 	init_waitqueue_head(&dev->wait_pg);
 	init_waitqueue_head(&dev->wait_hbm_start);
diff --git a/kernel/drivers/misc/mei/interrupt.c b/kernel/drivers/misc/mei/interrupt.c
index 3f84d2edc..64b568a02 100644
--- a/kernel/drivers/misc/mei/interrupt.c
+++ b/kernel/drivers/misc/mei/interrupt.c
@@ -21,6 +21,7 @@
 #include <linux/fs.h>
 #include <linux/jiffies.h>
 #include <linux/slab.h>
+#include <linux/pm_runtime.h>
 
 #include <linux/mei.h>
 
@@ -65,8 +66,8 @@ EXPORT_SYMBOL_GPL(mei_irq_compl_handler);
 static inline int mei_cl_hbm_equal(struct mei_cl *cl,
 			struct mei_msg_hdr *mei_hdr)
 {
-	return cl->host_client_id == mei_hdr->host_addr &&
-		cl->me_client_id == mei_hdr->me_addr;
+	return  mei_cl_host_addr(cl) == mei_hdr->host_addr &&
+		mei_cl_me_id(cl) == mei_hdr->me_addr;
 }
 
 /**
@@ -147,6 +148,9 @@ int mei_cl_irq_read_msg(struct mei_cl *cl,
 		cb->read_time = jiffies;
 		cl_dbg(dev, cl, "completed read length = %lu\n", cb->buf_idx);
 		list_move_tail(&cb->list, &complete_list->list);
+	} else {
+		pm_runtime_mark_last_busy(dev->dev);
+		pm_request_autosuspend(dev->dev);
 	}
 
 out:
@@ -180,56 +184,14 @@ static int mei_cl_irq_disconnect_rsp(struct mei_cl *cl, struct mei_cl_cb *cb,
 		return -EMSGSIZE;
 
 	ret = mei_hbm_cl_disconnect_rsp(dev, cl);
-
-	cl->state = MEI_FILE_DISCONNECTED;
-	cl->status = 0;
+	mei_cl_set_disconnected(cl);
 	mei_io_cb_free(cb);
+	mei_me_cl_put(cl->me_cl);
+	cl->me_cl = NULL;
 
 	return ret;
 }
 
-
-
-/**
- * mei_cl_irq_disconnect - processes close related operation from
- *	interrupt thread context - send disconnect request
- *
- * @cl: client
- * @cb: callback block.
- * @cmpl_list: complete list.
- *
- * Return: 0, OK; otherwise, error.
- */
-static int mei_cl_irq_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb,
-			    struct mei_cl_cb *cmpl_list)
-{
-	struct mei_device *dev = cl->dev;
-	u32 msg_slots;
-	int slots;
-
-	msg_slots = mei_data2slots(sizeof(struct hbm_client_connect_request));
-	slots = mei_hbuf_empty_slots(dev);
-
-	if (slots < msg_slots)
-		return -EMSGSIZE;
-
-	if (mei_hbm_cl_disconnect_req(dev, cl)) {
-		cl->status = 0;
-		cb->buf_idx = 0;
-		list_move_tail(&cb->list, &cmpl_list->list);
-		return -EIO;
-	}
-
-	cl->state = MEI_FILE_DISCONNECTING;
-	cl->status = 0;
-	cb->buf_idx = 0;
-	list_move_tail(&cb->list, &dev->ctrl_rd_list.list);
-	cl->timer_count = MEI_CONNECT_TIMEOUT;
-
-	return 0;
-}
-
-
 /**
  * mei_cl_irq_read - processes client read related operation from the
  *	interrupt thread context - request for flow control credits
@@ -267,49 +229,6 @@ static int mei_cl_irq_read(struct mei_cl *cl, struct mei_cl_cb *cb,
 	return 0;
 }
 
-
-/**
- * mei_cl_irq_connect - send connect request in irq_thread context
- *
- * @cl: client
- * @cb: callback block.
- * @cmpl_list: complete list.
- *
- * Return: 0, OK; otherwise, error.
- */
-static int mei_cl_irq_connect(struct mei_cl *cl, struct mei_cl_cb *cb,
-			      struct mei_cl_cb *cmpl_list)
-{
-	struct mei_device *dev = cl->dev;
-	u32 msg_slots;
-	int slots;
-	int ret;
-
-	msg_slots = mei_data2slots(sizeof(struct hbm_client_connect_request));
-	slots = mei_hbuf_empty_slots(dev);
-
-	if (mei_cl_is_other_connecting(cl))
-		return 0;
-
-	if (slots < msg_slots)
-		return -EMSGSIZE;
-
-	cl->state = MEI_FILE_CONNECTING;
-
-	ret = mei_hbm_cl_connect_req(dev, cl);
-	if (ret) {
-		cl->status = ret;
-		cb->buf_idx = 0;
-		list_del_init(&cb->list);
-		return ret;
-	}
-
-	list_move_tail(&cb->list, &dev->ctrl_rd_list.list);
-	cl->timer_count = MEI_CONNECT_TIMEOUT;
-	return 0;
-}
-
-
 /**
  * mei_irq_read_handler - bottom half read routine after ISR to
  * handle the read processing.
@@ -488,6 +407,13 @@ int mei_irq_write_handler(struct mei_device *dev, struct mei_cl_cb *cmpl_list)
 			if (ret)
 				return ret;
 			break;
+
+		case MEI_FOP_NOTIFY_START:
+		case MEI_FOP_NOTIFY_STOP:
+			ret = mei_cl_irq_notify(cl, cb, cmpl_list);
+			if (ret)
+				return ret;
+			break;
 		default:
 			BUG();
 		}
@@ -509,6 +435,24 @@ int mei_irq_write_handler(struct mei_device *dev, struct mei_cl_cb *cmpl_list)
 EXPORT_SYMBOL_GPL(mei_irq_write_handler);
 
 
+/**
+ * mei_connect_timeout  - connect/disconnect timeouts
+ *
+ * @cl: host client
+ */
+static void mei_connect_timeout(struct mei_cl *cl)
+{
+	struct mei_device *dev = cl->dev;
+
+	if (cl->state == MEI_FILE_CONNECTING) {
+		if (dev->hbm_f_dot_supported) {
+			cl->state = MEI_FILE_DISCONNECT_REQUIRED;
+			wake_up(&cl->wait);
+			return;
+		}
+	}
+	mei_reset(dev);
+}
 
 /**
  * mei_timer - timer function.
@@ -549,7 +493,7 @@ void mei_timer(struct work_struct *work)
 		if (cl->timer_count) {
 			if (--cl->timer_count == 0) {
 				dev_err(dev->dev, "timer: connect/disconnect timeout.\n");
-				mei_reset(dev);
+				mei_connect_timeout(cl);
 				goto out;
 			}
 		}
diff --git a/kernel/drivers/misc/mei/main.c b/kernel/drivers/misc/mei/main.c
index e40bcd03b..80f9afcb1 100644
--- a/kernel/drivers/misc/mei/main.c
+++ b/kernel/drivers/misc/mei/main.c
@@ -94,7 +94,7 @@ static int mei_release(struct inode *inode, struct file *file)
 {
 	struct mei_cl *cl = file->private_data;
 	struct mei_device *dev;
-	int rets = 0;
+	int rets;
 
 	if (WARN_ON(!cl || !cl->dev))
 		return -ENODEV;
@@ -106,11 +106,8 @@ static int mei_release(struct inode *inode, struct file *file)
 		rets = mei_amthif_release(dev, file);
 		goto out;
 	}
-	if (mei_cl_is_connected(cl)) {
-		cl->state = MEI_FILE_DISCONNECTING;
-		cl_dbg(dev, cl, "disconnecting\n");
-		rets = mei_cl_disconnect(cl);
-	}
+	rets = mei_cl_disconnect(cl);
+
 	mei_cl_flush_queues(cl, file);
 	cl_dbg(dev, cl, "removing\n");
 
@@ -186,8 +183,7 @@ static ssize_t mei_read(struct file *file, char __user *ubuf,
 
 	err = mei_cl_read_start(cl, length, file);
 	if (err && err != -EBUSY) {
-		dev_dbg(dev->dev,
-			"mei start read failure with status = %d\n", err);
+		cl_dbg(dev, cl, "mei start read failure status = %d\n", err);
 		rets = err;
 		goto out;
 	}
@@ -218,6 +214,11 @@ static ssize_t mei_read(struct file *file, char __user *ubuf,
 
 	cb = mei_cl_read_cb(cl, file);
 	if (!cb) {
+		if (mei_cl_is_fixed_address(cl) && dev->allow_fixed_address) {
+			cb = mei_cl_read_cb(cl, NULL);
+			if (cb)
+				goto copy_buffer;
+		}
 		rets = 0;
 		goto out;
 	}
@@ -226,11 +227,11 @@ copy_buffer:
 	/* now copy the data to user space */
 	if (cb->status) {
 		rets = cb->status;
-		dev_dbg(dev->dev, "read operation failed %d\n", rets);
+		cl_dbg(dev, cl, "read operation failed %d\n", rets);
 		goto free;
 	}
 
-	dev_dbg(dev->dev, "buf.size = %d buf.idx= %ld\n",
+	cl_dbg(dev, cl, "buf.size = %d buf.idx = %ld\n",
 	    cb->buf.size, cb->buf_idx);
 	if (length == 0 || ubuf == NULL || *offset > cb->buf_idx) {
 		rets = -EMSGSIZE;
@@ -256,7 +257,7 @@ free:
 	mei_io_cb_free(cb);
 
 out:
-	dev_dbg(dev->dev, "end mei read rets= %d\n", rets);
+	cl_dbg(dev, cl, "end mei read rets = %d\n", rets);
 	mutex_unlock(&dev->device_lock);
 	return rets;
 }
@@ -274,7 +275,6 @@ static ssize_t mei_write(struct file *file, const char __user *ubuf,
 			 size_t length, loff_t *offset)
 {
 	struct mei_cl *cl = file->private_data;
-	struct mei_me_client *me_cl = NULL;
 	struct mei_cl_cb *write_cb = NULL;
 	struct mei_device *dev;
 	unsigned long timeout = 0;
@@ -292,27 +292,27 @@ static ssize_t mei_write(struct file *file, const char __user *ubuf,
 		goto out;
 	}
 
-	me_cl = mei_me_cl_by_uuid_id(dev, &cl->cl_uuid, cl->me_client_id);
-	if (!me_cl) {
-		rets = -ENOTTY;
+	if (!mei_cl_is_connected(cl)) {
+		cl_err(dev, cl, "is not connected");
+		rets = -ENODEV;
 		goto out;
 	}
 
-	if (length == 0) {
-		rets = 0;
+	if (!mei_me_cl_is_active(cl->me_cl)) {
+		rets = -ENOTTY;
 		goto out;
 	}
 
-	if (length > me_cl->props.max_msg_length) {
+	if (length > mei_cl_mtu(cl)) {
 		rets = -EFBIG;
 		goto out;
 	}
 
-	if (!mei_cl_is_connected(cl)) {
-		cl_err(dev, cl, "is not connected");
-		rets = -ENODEV;
+	if (length == 0) {
+		rets = 0;
 		goto out;
 	}
+
 	if (cl == &dev->iamthif_cl) {
 		write_cb = mei_amthif_find_read_list_entry(dev, file);
 
@@ -350,14 +350,12 @@ static ssize_t mei_write(struct file *file, const char __user *ubuf,
 				"amthif write failed with status = %d\n", rets);
 			goto out;
 		}
-		mei_me_cl_put(me_cl);
 		mutex_unlock(&dev->device_lock);
 		return length;
 	}
 
 	rets = mei_cl_write(cl, write_cb, false);
 out:
-	mei_me_cl_put(me_cl);
 	mutex_unlock(&dev->device_lock);
 	if (rets < 0)
 		mei_io_cb_free(write_cb);
@@ -395,17 +393,16 @@ static int mei_ioctl_connect_client(struct file *file,
 
 	/* find ME client we're trying to connect to */
 	me_cl = mei_me_cl_by_uuid(dev, &data->in_client_uuid);
-	if (!me_cl || me_cl->props.fixed_address) {
+	if (!me_cl ||
+	    (me_cl->props.fixed_address && !dev->allow_fixed_address)) {
 		dev_dbg(dev->dev, "Cannot connect to FW Client UUID = %pUl\n",
-				&data->in_client_uuid);
+			&data->in_client_uuid);
+		mei_me_cl_put(me_cl);
 		return  -ENOTTY;
 	}
 
-	cl->me_client_id = me_cl->client_id;
-	cl->cl_uuid = me_cl->props.protocol_name;
-
 	dev_dbg(dev->dev, "Connect to FW Client ID = %d\n",
-			cl->me_client_id);
+			me_cl->client_id);
 	dev_dbg(dev->dev, "FW Client - Protocol Version = %d\n",
 			me_cl->props.protocol_version);
 	dev_dbg(dev->dev, "FW Client - Max Msg Len = %d\n",
@@ -441,7 +438,7 @@ static int mei_ioctl_connect_client(struct file *file,
 	client->protocol_version = me_cl->props.protocol_version;
 	dev_dbg(dev->dev, "Can connect?\n");
 
-	rets = mei_cl_connect(cl, file);
+	rets = mei_cl_connect(cl, me_cl, file);
 
 end:
 	mei_me_cl_put(me_cl);
@@ -449,6 +446,49 @@ end:
 }
 
 /**
+ * mei_ioctl_client_notify_request -
+ *     propagate event notification request to client
+ *
+ * @file: pointer to file structure
+ * @request: 0 - disable, 1 - enable
+ *
+ * Return: 0 on success , <0 on error
+ */
+static int mei_ioctl_client_notify_request(struct file *file, u32 request)
+{
+	struct mei_cl *cl = file->private_data;
+
+	if (request != MEI_HBM_NOTIFICATION_START &&
+	    request != MEI_HBM_NOTIFICATION_STOP)
+		return -EINVAL;
+
+	return mei_cl_notify_request(cl, file, (u8)request);
+}
+
+/**
+ * mei_ioctl_client_notify_get -  wait for notification request
+ *
+ * @file: pointer to file structure
+ * @notify_get: 0 - disable, 1 - enable
+ *
+ * Return: 0 on success , <0 on error
+ */
+static int mei_ioctl_client_notify_get(struct file *file, u32 *notify_get)
+{
+	struct mei_cl *cl = file->private_data;
+	bool notify_ev;
+	bool block = (file->f_flags & O_NONBLOCK) == 0;
+	int rets;
+
+	rets = mei_cl_notify_get(cl, block, &notify_ev);
+	if (rets)
+		return rets;
+
+	*notify_get = notify_ev ? 1 : 0;
+	return 0;
+}
+
+/**
  * mei_ioctl - the IOCTL function
  *
  * @file: pointer to file structure
@@ -462,6 +502,7 @@ static long mei_ioctl(struct file *file, unsigned int cmd, unsigned long data)
 	struct mei_device *dev;
 	struct mei_cl *cl = file->private_data;
 	struct mei_connect_client_data connect_data;
+	u32 notify_get, notify_req;
 	int rets;
 
 
@@ -502,6 +543,33 @@ static long mei_ioctl(struct file *file, unsigned int cmd, unsigned long data)
 
 		break;
 
+	case IOCTL_MEI_NOTIFY_SET:
+		dev_dbg(dev->dev, ": IOCTL_MEI_NOTIFY_SET.\n");
+		if (copy_from_user(&notify_req,
+				   (char __user *)data, sizeof(notify_req))) {
+			dev_dbg(dev->dev, "failed to copy data from userland\n");
+			rets = -EFAULT;
+			goto out;
+		}
+		rets = mei_ioctl_client_notify_request(file, notify_req);
+		break;
+
+	case IOCTL_MEI_NOTIFY_GET:
+		dev_dbg(dev->dev, ": IOCTL_MEI_NOTIFY_GET.\n");
+		rets = mei_ioctl_client_notify_get(file, &notify_get);
+		if (rets)
+			goto out;
+
+		dev_dbg(dev->dev, "copy connect data to user\n");
+		if (copy_to_user((char __user *)data,
+				&notify_get, sizeof(notify_get))) {
+			dev_dbg(dev->dev, "failed to copy data to userland\n");
+			rets = -EFAULT;
+			goto out;
+
+		}
+		break;
+
 	default:
 		dev_err(dev->dev, ": unsupported ioctl %d.\n", cmd);
 		rets = -ENOIOCTLCMD;
@@ -544,6 +612,7 @@ static unsigned int mei_poll(struct file *file, poll_table *wait)
 	struct mei_cl *cl = file->private_data;
 	struct mei_device *dev;
 	unsigned int mask = 0;
+	bool notify_en;
 
 	if (WARN_ON(!cl || !cl->dev))
 		return POLLERR;
@@ -552,6 +621,7 @@ static unsigned int mei_poll(struct file *file, poll_table *wait)
 
 	mutex_lock(&dev->device_lock);
 
+	notify_en = cl->notify_en && (req_events & POLLPRI);
 
 	if (dev->dev_state != MEI_DEV_ENABLED ||
 	    !mei_cl_is_connected(cl)) {
@@ -564,6 +634,12 @@ static unsigned int mei_poll(struct file *file, poll_table *wait)
 		goto out;
 	}
 
+	if (notify_en) {
+		poll_wait(file, &cl->ev_wait, wait);
+		if (cl->notify_ev)
+			mask |= POLLPRI;
+	}
+
 	if (req_events & (POLLIN | POLLRDNORM)) {
 		poll_wait(file, &cl->rx_wait, wait);
 
@@ -579,6 +655,28 @@ out:
 }
 
 /**
+ * mei_fasync - asynchronous io support
+ *
+ * @fd: file descriptor
+ * @file: pointer to file structure
+ * @band: band bitmap
+ *
+ * Return: negative on error,
+ *         0 if it did no changes,
+ *         and positive a process was added or deleted
+ */
+static int mei_fasync(int fd, struct file *file, int band)
+{
+
+	struct mei_cl *cl = file->private_data;
+
+	if (!mei_cl_is_connected(cl))
+		return -ENODEV;
+
+	return fasync_helper(fd, file, band, &cl->ev_async);
+}
+
+/**
  * fw_status_show - mei device attribute show method
  *
  * @device: device pointer
@@ -630,6 +728,7 @@ static const struct file_operations mei_fops = {
 	.release = mei_release,
 	.write = mei_write,
 	.poll = mei_poll,
+	.fasync = mei_fasync,
 	.llseek = no_llseek
 };
 
diff --git a/kernel/drivers/misc/mei/mei_dev.h b/kernel/drivers/misc/mei/mei_dev.h
index f84c39ee2..4250555d5 100644
--- a/kernel/drivers/misc/mei/mei_dev.h
+++ b/kernel/drivers/misc/mei/mei_dev.h
@@ -88,7 +88,9 @@ enum file_state {
 	MEI_FILE_CONNECTING,
 	MEI_FILE_CONNECTED,
 	MEI_FILE_DISCONNECTING,
-	MEI_FILE_DISCONNECTED
+	MEI_FILE_DISCONNECT_REPLY,
+	MEI_FILE_DISCONNECT_REQUIRED,
+	MEI_FILE_DISCONNECTED,
 };
 
 /* MEI device states */
@@ -134,6 +136,8 @@ enum mei_wd_states {
  * @MEI_FOP_CONNECT:    connect
  * @MEI_FOP_DISCONNECT: disconnect
  * @MEI_FOP_DISCONNECT_RSP: disconnect response
+ * @MEI_FOP_NOTIFY_START:   start notification
+ * @MEI_FOP_NOTIFY_STOP:    stop notification
  */
 enum mei_cb_file_ops {
 	MEI_FOP_READ = 0,
@@ -141,6 +145,8 @@ enum mei_cb_file_ops {
 	MEI_FOP_CONNECT,
 	MEI_FOP_DISCONNECT,
 	MEI_FOP_DISCONNECT_RSP,
+	MEI_FOP_NOTIFY_START,
+	MEI_FOP_NOTIFY_STOP,
 };
 
 /*
@@ -176,6 +182,8 @@ struct mei_fw_status {
  * @props: client properties
  * @client_id: me client id
  * @mei_flow_ctrl_creds: flow control credits
+ * @connect_count: number connections to this client
+ * @bus_added: added to bus
  */
 struct mei_me_client {
 	struct list_head list;
@@ -183,6 +191,8 @@ struct mei_me_client {
 	struct mei_client_properties props;
 	u8 client_id;
 	u8 mei_flow_ctrl_creds;
+	u8 connect_count;
+	u8 bus_added;
 };
 
 
@@ -225,18 +235,21 @@ struct mei_cl_cb {
  * @tx_wait: wait queue for tx completion
  * @rx_wait: wait queue for rx completion
  * @wait:  wait queue for management operation
+ * @ev_wait: notification wait queue
+ * @ev_async: event async notification
  * @status: connection status
- * @cl_uuid: client uuid name
+ * @me_cl: fw client connected
  * @host_client_id: host id
- * @me_client_id: me/fw id
  * @mei_flow_ctrl_creds: transmit flow credentials
  * @timer_count:  watchdog timer for operation completion
+ * @reserved: reserved for alignment
+ * @notify_en: notification - enabled/disabled
+ * @notify_ev: pending notification event
  * @writing_state: state of the tx
  * @rd_pending: pending read credits
  * @rd_completed: completed read
  *
- * @device: device on the mei client bus
- * @device_link:  link to bus clients
+ * @cldev: device on the mei client bus
  */
 struct mei_cl {
 	struct list_head link;
@@ -245,47 +258,50 @@ struct mei_cl {
 	wait_queue_head_t tx_wait;
 	wait_queue_head_t rx_wait;
 	wait_queue_head_t wait;
+	wait_queue_head_t ev_wait;
+	struct fasync_struct *ev_async;
 	int status;
-	uuid_le cl_uuid;
+	struct mei_me_client *me_cl;
 	u8 host_client_id;
-	u8 me_client_id;
 	u8 mei_flow_ctrl_creds;
 	u8 timer_count;
+	u8 reserved;
+	u8 notify_en;
+	u8 notify_ev;
 	enum mei_file_transaction_states writing_state;
 	struct list_head rd_pending;
 	struct list_head rd_completed;
 
-	/* MEI CL bus data */
-	struct mei_cl_device *device;
-	struct list_head device_link;
+	struct mei_cl_device *cldev;
 };
 
-/** struct mei_hw_ops
+/**
+ * struct mei_hw_ops - hw specific ops
  *
  * @host_is_ready    : query for host readiness
-
+ *
  * @hw_is_ready      : query if hw is ready
  * @hw_reset         : reset hw
  * @hw_start         : start hw after reset
  * @hw_config        : configure hw
-
+ *
  * @fw_status        : get fw status registers
  * @pg_state         : power gating state of the device
  * @pg_in_transition : is device now in pg transition
  * @pg_is_enabled    : is power gating enabled
-
+ *
  * @intr_clear       : clear pending interrupts
  * @intr_enable      : enable interrupts
  * @intr_disable     : disable interrupts
-
+ *
  * @hbuf_free_slots  : query for write buffer empty slots
  * @hbuf_is_ready    : query if write buffer is empty
  * @hbuf_max_len     : query for write buffer max len
-
+ *
  * @write            : write a message to FW
-
+ *
  * @rdbuf_full_slots : query how many slots are filled
-
+ *
  * @read_hdr         : get first 4 bytes (header)
  * @read             : read a buffer from the FW
  */
@@ -324,75 +340,16 @@ struct mei_hw_ops {
 };
 
 /* MEI bus API*/
-
-/**
- * struct mei_cl_ops - MEI CL device ops
- * This structure allows ME host clients to implement technology
- * specific operations.
- *
- * @enable: Enable an MEI CL device. Some devices require specific
- *	HECI commands to initialize completely.
- * @disable: Disable an MEI CL device.
- * @send: Tx hook for the device. This allows ME host clients to trap
- *	the device driver buffers before actually physically
- *	pushing it to the ME.
- * @recv: Rx hook for the device. This allows ME host clients to trap the
- *	ME buffers before forwarding them to the device driver.
- */
-struct mei_cl_ops {
-	int (*enable)(struct mei_cl_device *device);
-	int (*disable)(struct mei_cl_device *device);
-	int (*send)(struct mei_cl_device *device, u8 *buf, size_t length);
-	int (*recv)(struct mei_cl_device *device, u8 *buf, size_t length);
-};
-
-struct mei_cl_device *mei_cl_add_device(struct mei_device *dev,
-					uuid_le uuid, char *name,
-					struct mei_cl_ops *ops);
-void mei_cl_remove_device(struct mei_cl_device *device);
-
-ssize_t __mei_cl_async_send(struct mei_cl *cl, u8 *buf, size_t length);
-ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length);
+void mei_cl_bus_rescan(struct mei_device *bus);
+void mei_cl_bus_dev_fixup(struct mei_cl_device *dev);
+ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length,
+			bool blocking);
 ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length);
 void mei_cl_bus_rx_event(struct mei_cl *cl);
-void mei_cl_bus_remove_devices(struct mei_device *dev);
+void mei_cl_bus_notify_event(struct mei_cl *cl);
+void mei_cl_bus_remove_devices(struct mei_device *bus);
 int mei_cl_bus_init(void);
 void mei_cl_bus_exit(void);
-struct mei_cl *mei_cl_bus_find_cl_by_uuid(struct mei_device *dev, uuid_le uuid);
-
-
-/**
- * struct mei_cl_device - MEI device handle
- * An mei_cl_device pointer is returned from mei_add_device()
- * and links MEI bus clients to their actual ME host client pointer.
- * Drivers for MEI devices will get an mei_cl_device pointer
- * when being probed and shall use it for doing ME bus I/O.
- *
- * @dev: linux driver model device pointer
- * @cl: mei client
- * @ops: ME transport ops
- * @event_work: async work to execute event callback
- * @event_cb: Drivers register this callback to get asynchronous ME
- *	events (e.g. Rx buffer pending) notifications.
- * @event_context: event callback run context
- * @events: Events bitmask sent to the driver.
- * @priv_data: client private data
- */
-struct mei_cl_device {
-	struct device dev;
-
-	struct mei_cl *cl;
-
-	const struct mei_cl_ops *ops;
-
-	struct work_struct event_work;
-	mei_cl_event_cb_t event_cb;
-	void *event_context;
-	unsigned long events;
-
-	void *priv_data;
-};
-
 
 /**
  * enum mei_pg_event - power gating transition events
@@ -465,7 +422,10 @@ const char *mei_pg_state_str(enum mei_pg_state state);
  * @wr_msg      : the buffer for hbm control messages
  *
  * @version     : HBM protocol version in use
- * @hbm_f_pg_supported : hbm feature pgi protocol
+ * @hbm_f_pg_supported  : hbm feature pgi protocol
+ * @hbm_f_dc_supported  : hbm feature dynamic clients
+ * @hbm_f_dot_supported : hbm feature disconnect on timeout
+ * @hbm_f_ev_supported  : hbm feature event notification
  *
  * @me_clients_rwsem: rw lock over me_clients list
  * @me_clients  : list of FW clients
@@ -473,6 +433,8 @@ const char *mei_pg_state_str(enum mei_pg_state state);
  * @host_clients_map : host clients id pool
  * @me_client_index : last FW client index in enumeration
  *
+ * @allow_fixed_address: allow user space to connect a fixed client
+ *
  * @wd_cl       : watchdog client
  * @wd_state    : watchdog client state
  * @wd_pending  : watchdog command is pending
@@ -485,7 +447,6 @@ const char *mei_pg_state_str(enum mei_pg_state state);
  * @iamthif_cl  : amthif host client
  * @iamthif_current_cb : amthif current operation callback
  * @iamthif_open_count : number of opened amthif connections
- * @iamthif_mtu : amthif client max message length
  * @iamthif_timer : time stamp of current amthif command completion
  * @iamthif_stall_timer : timer to detect amthif hang
  * @iamthif_state : amthif processor state
@@ -495,6 +456,7 @@ const char *mei_pg_state_str(enum mei_pg_state state);
  * @reset_work  : work item for the device reset
  *
  * @device_list : mei client bus list
+ * @cl_bus_lock : client bus list lock
  *
  * @dbgfs_dir   : debugfs mei root directory
  *
@@ -557,6 +519,9 @@ struct mei_device {
 
 	struct hbm_version version;
 	unsigned int hbm_f_pg_supported:1;
+	unsigned int hbm_f_dc_supported:1;
+	unsigned int hbm_f_dot_supported:1;
+	unsigned int hbm_f_ev_supported:1;
 
 	struct rw_semaphore me_clients_rwsem;
 	struct list_head me_clients;
@@ -564,6 +529,8 @@ struct mei_device {
 	DECLARE_BITMAP(host_clients_map, MEI_CLIENTS_MAX);
 	unsigned long me_client_index;
 
+	bool allow_fixed_address;
+
 	struct mei_cl wd_cl;
 	enum mei_wd_states wd_state;
 	bool wd_pending;
@@ -579,7 +546,6 @@ struct mei_device {
 	struct mei_cl iamthif_cl;
 	struct mei_cl_cb *iamthif_current_cb;
 	long iamthif_open_count;
-	int iamthif_mtu;
 	unsigned long iamthif_timer;
 	u32 iamthif_stall_timer;
 	enum iamthif_states iamthif_state;
@@ -590,6 +556,7 @@ struct mei_device {
 
 	/* List of bus devices */
 	struct list_head device_list;
+	struct mutex cl_bus_lock;
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct dentry *dbgfs_dir;
@@ -658,7 +625,7 @@ void mei_irq_compl_handler(struct mei_device *dev, struct mei_cl_cb *cmpl_list);
  */
 void mei_amthif_reset_params(struct mei_device *dev);
 
-int mei_amthif_host_init(struct mei_device *dev);
+int mei_amthif_host_init(struct mei_device *dev, struct mei_me_client *me_cl);
 
 int mei_amthif_read(struct mei_device *dev, struct file *file,
 		char __user *ubuf, size_t length, loff_t *offset);
@@ -685,7 +652,7 @@ int mei_amthif_irq_read(struct mei_device *dev, s32 *slots);
 /*
  * NFC functions
  */
-int mei_nfc_host_init(struct mei_device *dev);
+int mei_nfc_host_init(struct mei_device *dev, struct mei_me_client *me_cl);
 void mei_nfc_host_exit(struct mei_device *dev);
 
 /*
@@ -695,7 +662,7 @@ extern const uuid_le mei_nfc_guid;
 
 int mei_wd_send(struct mei_device *dev);
 int mei_wd_stop(struct mei_device *dev);
-int mei_wd_host_init(struct mei_device *dev);
+int mei_wd_host_init(struct mei_device *dev, struct mei_me_client *me_cl);
 /*
  * mei_watchdog_register  - Registering watchdog interface
  *   once we got connection to the WD Client
diff --git a/kernel/drivers/misc/mei/nfc.c b/kernel/drivers/misc/mei/nfc.c
deleted file mode 100644
index c3bcb6368..000000000
--- a/kernel/drivers/misc/mei/nfc.c
+++ /dev/null
@@ -1,593 +0,0 @@
-/*
- *
- * Intel Management Engine Interface (Intel MEI) Linux driver
- * Copyright (c) 2003-2013, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/device.h>
-#include <linux/slab.h>
-
-#include <linux/mei_cl_bus.h>
-
-#include "mei_dev.h"
-#include "client.h"
-
-struct mei_nfc_cmd {
-	u8 command;
-	u8 status;
-	u16 req_id;
-	u32 reserved;
-	u16 data_size;
-	u8 sub_command;
-	u8 data[];
-} __packed;
-
-struct mei_nfc_reply {
-	u8 command;
-	u8 status;
-	u16 req_id;
-	u32 reserved;
-	u16 data_size;
-	u8 sub_command;
-	u8 reply_status;
-	u8 data[];
-} __packed;
-
-struct mei_nfc_if_version {
-	u8 radio_version_sw[3];
-	u8 reserved[3];
-	u8 radio_version_hw[3];
-	u8 i2c_addr;
-	u8 fw_ivn;
-	u8 vendor_id;
-	u8 radio_type;
-} __packed;
-
-struct mei_nfc_connect {
-	u8 fw_ivn;
-	u8 vendor_id;
-} __packed;
-
-struct mei_nfc_connect_resp {
-	u8 fw_ivn;
-	u8 vendor_id;
-	u16 me_major;
-	u16 me_minor;
-	u16 me_hotfix;
-	u16 me_build;
-} __packed;
-
-struct mei_nfc_hci_hdr {
-	u8 cmd;
-	u8 status;
-	u16 req_id;
-	u32 reserved;
-	u16 data_size;
-} __packed;
-
-#define MEI_NFC_CMD_MAINTENANCE 0x00
-#define MEI_NFC_CMD_HCI_SEND 0x01
-#define MEI_NFC_CMD_HCI_RECV 0x02
-
-#define MEI_NFC_SUBCMD_CONNECT    0x00
-#define MEI_NFC_SUBCMD_IF_VERSION 0x01
-
-#define MEI_NFC_HEADER_SIZE 10
-
-/**
- * struct mei_nfc_dev - NFC mei device
- *
- * @cl: NFC host client
- * @cl_info: NFC info host client
- * @init_work: perform connection to the info client
- * @send_wq: send completion wait queue
- * @fw_ivn: NFC Interface Version Number
- * @vendor_id: NFC manufacturer ID
- * @radio_type: NFC radio type
- * @bus_name: bus name
- *
- * @req_id:  message counter
- * @recv_req_id: reception message counter
- */
-struct mei_nfc_dev {
-	struct mei_cl *cl;
-	struct mei_cl *cl_info;
-	struct work_struct init_work;
-	wait_queue_head_t send_wq;
-	u8 fw_ivn;
-	u8 vendor_id;
-	u8 radio_type;
-	char *bus_name;
-
-	u16 req_id;
-	u16 recv_req_id;
-};
-
-/* UUIDs for NFC F/W clients */
-const uuid_le mei_nfc_guid = UUID_LE(0x0bb17a78, 0x2a8e, 0x4c50,
-				     0x94, 0xd4, 0x50, 0x26,
-				     0x67, 0x23, 0x77, 0x5c);
-
-static const uuid_le mei_nfc_info_guid = UUID_LE(0xd2de1625, 0x382d, 0x417d,
-					0x48, 0xa4, 0xef, 0xab,
-					0xba, 0x8a, 0x12, 0x06);
-
-/* Vendors */
-#define MEI_NFC_VENDOR_INSIDE 0x00
-#define MEI_NFC_VENDOR_NXP    0x01
-
-/* Radio types */
-#define MEI_NFC_VENDOR_INSIDE_UREAD 0x00
-#define MEI_NFC_VENDOR_NXP_PN544    0x01
-
-static void mei_nfc_free(struct mei_nfc_dev *ndev)
-{
-	if (!ndev)
-		return;
-
-	if (ndev->cl) {
-		list_del(&ndev->cl->device_link);
-		mei_cl_unlink(ndev->cl);
-		kfree(ndev->cl);
-	}
-
-	if (ndev->cl_info) {
-		list_del(&ndev->cl_info->device_link);
-		mei_cl_unlink(ndev->cl_info);
-		kfree(ndev->cl_info);
-	}
-
-	kfree(ndev);
-}
-
-static int mei_nfc_build_bus_name(struct mei_nfc_dev *ndev)
-{
-	struct mei_device *dev;
-
-	if (!ndev->cl)
-		return -ENODEV;
-
-	dev = ndev->cl->dev;
-
-	switch (ndev->vendor_id) {
-	case MEI_NFC_VENDOR_INSIDE:
-		switch (ndev->radio_type) {
-		case MEI_NFC_VENDOR_INSIDE_UREAD:
-			ndev->bus_name = "microread";
-			return 0;
-
-		default:
-			dev_err(dev->dev, "Unknown radio type 0x%x\n",
-				ndev->radio_type);
-
-			return -EINVAL;
-		}
-
-	case MEI_NFC_VENDOR_NXP:
-		switch (ndev->radio_type) {
-		case MEI_NFC_VENDOR_NXP_PN544:
-			ndev->bus_name = "pn544";
-			return 0;
-		default:
-			dev_err(dev->dev, "Unknown radio type 0x%x\n",
-				ndev->radio_type);
-
-			return -EINVAL;
-		}
-
-	default:
-		dev_err(dev->dev, "Unknown vendor ID 0x%x\n",
-			ndev->vendor_id);
-
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int mei_nfc_connect(struct mei_nfc_dev *ndev)
-{
-	struct mei_device *dev;
-	struct mei_cl *cl;
-	struct mei_nfc_cmd *cmd, *reply;
-	struct mei_nfc_connect *connect;
-	struct mei_nfc_connect_resp *connect_resp;
-	size_t connect_length, connect_resp_length;
-	int bytes_recv, ret;
-
-	cl = ndev->cl;
-	dev = cl->dev;
-
-	connect_length = sizeof(struct mei_nfc_cmd) +
-			sizeof(struct mei_nfc_connect);
-
-	connect_resp_length = sizeof(struct mei_nfc_cmd) +
-			sizeof(struct mei_nfc_connect_resp);
-
-	cmd = kzalloc(connect_length, GFP_KERNEL);
-	if (!cmd)
-		return -ENOMEM;
-	connect = (struct mei_nfc_connect *)cmd->data;
-
-	reply = kzalloc(connect_resp_length, GFP_KERNEL);
-	if (!reply) {
-		kfree(cmd);
-		return -ENOMEM;
-	}
-
-	connect_resp = (struct mei_nfc_connect_resp *)reply->data;
-
-	cmd->command = MEI_NFC_CMD_MAINTENANCE;
-	cmd->data_size = 3;
-	cmd->sub_command = MEI_NFC_SUBCMD_CONNECT;
-	connect->fw_ivn = ndev->fw_ivn;
-	connect->vendor_id = ndev->vendor_id;
-
-	ret = __mei_cl_send(cl, (u8 *)cmd, connect_length);
-	if (ret < 0) {
-		dev_err(dev->dev, "Could not send connect cmd\n");
-		goto err;
-	}
-
-	bytes_recv = __mei_cl_recv(cl, (u8 *)reply, connect_resp_length);
-	if (bytes_recv < 0) {
-		dev_err(dev->dev, "Could not read connect response\n");
-		ret = bytes_recv;
-		goto err;
-	}
-
-	dev_info(dev->dev, "IVN 0x%x Vendor ID 0x%x\n",
-		 connect_resp->fw_ivn, connect_resp->vendor_id);
-
-	dev_info(dev->dev, "ME FW %d.%d.%d.%d\n",
-		connect_resp->me_major, connect_resp->me_minor,
-		connect_resp->me_hotfix, connect_resp->me_build);
-
-	ret = 0;
-
-err:
-	kfree(reply);
-	kfree(cmd);
-
-	return ret;
-}
-
-static int mei_nfc_if_version(struct mei_nfc_dev *ndev)
-{
-	struct mei_device *dev;
-	struct mei_cl *cl;
-
-	struct mei_nfc_cmd cmd;
-	struct mei_nfc_reply *reply = NULL;
-	struct mei_nfc_if_version *version;
-	size_t if_version_length;
-	int bytes_recv, ret;
-
-	cl = ndev->cl_info;
-	dev = cl->dev;
-
-	memset(&cmd, 0, sizeof(struct mei_nfc_cmd));
-	cmd.command = MEI_NFC_CMD_MAINTENANCE;
-	cmd.data_size = 1;
-	cmd.sub_command = MEI_NFC_SUBCMD_IF_VERSION;
-
-	ret = __mei_cl_send(cl, (u8 *)&cmd, sizeof(struct mei_nfc_cmd));
-	if (ret < 0) {
-		dev_err(dev->dev, "Could not send IF version cmd\n");
-		return ret;
-	}
-
-	/* to be sure on the stack we alloc memory */
-	if_version_length = sizeof(struct mei_nfc_reply) +
-		sizeof(struct mei_nfc_if_version);
-
-	reply = kzalloc(if_version_length, GFP_KERNEL);
-	if (!reply)
-		return -ENOMEM;
-
-	bytes_recv = __mei_cl_recv(cl, (u8 *)reply, if_version_length);
-	if (bytes_recv < 0 || bytes_recv < sizeof(struct mei_nfc_reply)) {
-		dev_err(dev->dev, "Could not read IF version\n");
-		ret = -EIO;
-		goto err;
-	}
-
-	version = (struct mei_nfc_if_version *)reply->data;
-
-	ndev->fw_ivn = version->fw_ivn;
-	ndev->vendor_id = version->vendor_id;
-	ndev->radio_type = version->radio_type;
-
-err:
-	kfree(reply);
-	return ret;
-}
-
-static int mei_nfc_enable(struct mei_cl_device *cldev)
-{
-	struct mei_device *dev;
-	struct mei_nfc_dev *ndev;
-	int ret;
-
-	ndev = (struct mei_nfc_dev *)cldev->priv_data;
-	dev = ndev->cl->dev;
-
-	ret = mei_nfc_connect(ndev);
-	if (ret < 0) {
-		dev_err(dev->dev, "Could not connect to NFC");
-		return ret;
-	}
-
-	return 0;
-}
-
-static int mei_nfc_disable(struct mei_cl_device *cldev)
-{
-	return 0;
-}
-
-static int mei_nfc_send(struct mei_cl_device *cldev, u8 *buf, size_t length)
-{
-	struct mei_device *dev;
-	struct mei_nfc_dev *ndev;
-	struct mei_nfc_hci_hdr *hdr;
-	u8 *mei_buf;
-	int err;
-
-	ndev = (struct mei_nfc_dev *) cldev->priv_data;
-	dev = ndev->cl->dev;
-
-	err = -ENOMEM;
-	mei_buf = kzalloc(length + MEI_NFC_HEADER_SIZE, GFP_KERNEL);
-	if (!mei_buf)
-		goto out;
-
-	hdr = (struct mei_nfc_hci_hdr *) mei_buf;
-	hdr->cmd = MEI_NFC_CMD_HCI_SEND;
-	hdr->status = 0;
-	hdr->req_id = ndev->req_id;
-	hdr->reserved = 0;
-	hdr->data_size = length;
-
-	memcpy(mei_buf + MEI_NFC_HEADER_SIZE, buf, length);
-	err = __mei_cl_send(ndev->cl, mei_buf, length + MEI_NFC_HEADER_SIZE);
-	if (err < 0)
-		goto out;
-
-	if (!wait_event_interruptible_timeout(ndev->send_wq,
-				ndev->recv_req_id == ndev->req_id, HZ)) {
-		dev_err(dev->dev, "NFC MEI command timeout\n");
-		err = -ETIME;
-	} else {
-		ndev->req_id++;
-	}
-out:
-	kfree(mei_buf);
-	return err;
-}
-
-static int mei_nfc_recv(struct mei_cl_device *cldev, u8 *buf, size_t length)
-{
-	struct mei_nfc_dev *ndev;
-	struct mei_nfc_hci_hdr *hci_hdr;
-	int received_length;
-
-	ndev = (struct mei_nfc_dev *)cldev->priv_data;
-
-	received_length = __mei_cl_recv(ndev->cl, buf, length);
-	if (received_length < 0)
-		return received_length;
-
-	hci_hdr = (struct mei_nfc_hci_hdr *) buf;
-
-	if (hci_hdr->cmd == MEI_NFC_CMD_HCI_SEND) {
-		ndev->recv_req_id = hci_hdr->req_id;
-		wake_up(&ndev->send_wq);
-
-		return 0;
-	}
-
-	return received_length;
-}
-
-static struct mei_cl_ops nfc_ops = {
-	.enable = mei_nfc_enable,
-	.disable = mei_nfc_disable,
-	.send = mei_nfc_send,
-	.recv = mei_nfc_recv,
-};
-
-static void mei_nfc_init(struct work_struct *work)
-{
-	struct mei_device *dev;
-	struct mei_cl_device *cldev;
-	struct mei_nfc_dev *ndev;
-	struct mei_cl *cl_info;
-
-	ndev = container_of(work, struct mei_nfc_dev, init_work);
-
-	cl_info = ndev->cl_info;
-	dev = cl_info->dev;
-
-	mutex_lock(&dev->device_lock);
-
-	if (mei_cl_connect(cl_info, NULL) < 0) {
-		mutex_unlock(&dev->device_lock);
-		dev_err(dev->dev, "Could not connect to the NFC INFO ME client");
-
-		goto err;
-	}
-
-	mutex_unlock(&dev->device_lock);
-
-	if (mei_nfc_if_version(ndev) < 0) {
-		dev_err(dev->dev, "Could not get the NFC interface version");
-
-		goto err;
-	}
-
-	dev_info(dev->dev, "NFC MEI VERSION: IVN 0x%x Vendor ID 0x%x Type 0x%x\n",
-		ndev->fw_ivn, ndev->vendor_id, ndev->radio_type);
-
-	mutex_lock(&dev->device_lock);
-
-	if (mei_cl_disconnect(cl_info) < 0) {
-		mutex_unlock(&dev->device_lock);
-		dev_err(dev->dev, "Could not disconnect the NFC INFO ME client");
-
-		goto err;
-	}
-
-	mutex_unlock(&dev->device_lock);
-
-	if (mei_nfc_build_bus_name(ndev) < 0) {
-		dev_err(dev->dev, "Could not build the bus ID name\n");
-		return;
-	}
-
-	cldev = mei_cl_add_device(dev, mei_nfc_guid, ndev->bus_name, &nfc_ops);
-	if (!cldev) {
-		dev_err(dev->dev, "Could not add the NFC device to the MEI bus\n");
-
-		goto err;
-	}
-
-	cldev->priv_data = ndev;
-
-
-	return;
-
-err:
-	mutex_lock(&dev->device_lock);
-	mei_nfc_free(ndev);
-	mutex_unlock(&dev->device_lock);
-
-}
-
-
-int mei_nfc_host_init(struct mei_device *dev)
-{
-	struct mei_nfc_dev *ndev;
-	struct mei_cl *cl_info, *cl;
-	struct mei_me_client *me_cl = NULL;
-	int ret;
-
-
-	/* in case of internal reset bail out
-	 * as the device is already setup
-	 */
-	cl = mei_cl_bus_find_cl_by_uuid(dev, mei_nfc_guid);
-	if (cl)
-		return 0;
-
-	ndev = kzalloc(sizeof(struct mei_nfc_dev), GFP_KERNEL);
-	if (!ndev) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	/* check for valid client id */
-	me_cl = mei_me_cl_by_uuid(dev, &mei_nfc_info_guid);
-	if (!me_cl) {
-		dev_info(dev->dev, "nfc: failed to find the client\n");
-		ret = -ENOTTY;
-		goto err;
-	}
-
-	cl_info = mei_cl_alloc_linked(dev, MEI_HOST_CLIENT_ID_ANY);
-	if (IS_ERR(cl_info)) {
-		ret = PTR_ERR(cl_info);
-		goto err;
-	}
-
-	cl_info->me_client_id = me_cl->client_id;
-	cl_info->cl_uuid = me_cl->props.protocol_name;
-	mei_me_cl_put(me_cl);
-	me_cl = NULL;
-
-	list_add_tail(&cl_info->device_link, &dev->device_list);
-
-	ndev->cl_info = cl_info;
-
-	/* check for valid client id */
-	me_cl = mei_me_cl_by_uuid(dev, &mei_nfc_guid);
-	if (!me_cl) {
-		dev_info(dev->dev, "nfc: failed to find the client\n");
-		ret = -ENOTTY;
-		goto err;
-	}
-
-	cl = mei_cl_alloc_linked(dev, MEI_HOST_CLIENT_ID_ANY);
-	if (IS_ERR(cl)) {
-		ret = PTR_ERR(cl);
-		goto err;
-	}
-
-	cl->me_client_id = me_cl->client_id;
-	cl->cl_uuid = me_cl->props.protocol_name;
-	mei_me_cl_put(me_cl);
-	me_cl = NULL;
-
-	list_add_tail(&cl->device_link, &dev->device_list);
-
-	ndev->cl = cl;
-
-	ndev->req_id = 1;
-
-	INIT_WORK(&ndev->init_work, mei_nfc_init);
-	init_waitqueue_head(&ndev->send_wq);
-	schedule_work(&ndev->init_work);
-
-	return 0;
-
-err:
-	mei_me_cl_put(me_cl);
-	mei_nfc_free(ndev);
-
-	return ret;
-}
-
-void mei_nfc_host_exit(struct mei_device *dev)
-{
-	struct mei_nfc_dev *ndev;
-	struct mei_cl *cl;
-	struct mei_cl_device *cldev;
-
-	cl = mei_cl_bus_find_cl_by_uuid(dev, mei_nfc_guid);
-	if (!cl)
-		return;
-
-	cldev = cl->device;
-	if (!cldev)
-		return;
-
-	ndev = (struct mei_nfc_dev *)cldev->priv_data;
-	if (ndev)
-		cancel_work_sync(&ndev->init_work);
-
-	cldev->priv_data = NULL;
-
-	mutex_lock(&dev->device_lock);
-	/* Need to remove the device here
-	 * since mei_nfc_free will unlink the clients
-	 */
-	mei_cl_remove_device(cldev);
-	mei_nfc_free(ndev);
-	mutex_unlock(&dev->device_lock);
-}
-
-
diff --git a/kernel/drivers/misc/mei/pci-me.c b/kernel/drivers/misc/mei/pci-me.c
index 23f71f5ce..27678d815 100644
--- a/kernel/drivers/misc/mei/pci-me.c
+++ b/kernel/drivers/misc/mei/pci-me.c
@@ -82,6 +82,11 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
 	{MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP, mei_me_pch8_cfg)},
 	{MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP_2, mei_me_pch8_cfg)},
 
+	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT, mei_me_pch8_cfg)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, mei_me_pch8_cfg)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_cfg)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_cfg)},
+
 	/* required last entry */
 	{0, }
 };
@@ -128,6 +133,7 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	const struct mei_cfg *cfg = (struct mei_cfg *)(ent->driver_data);
 	struct mei_device *dev;
 	struct mei_me_hw *hw;
+	unsigned int irqflags;
 	int err;
 
 
@@ -180,17 +186,12 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pci_enable_msi(pdev);
 
 	 /* request and enable interrupt */
-	if (pci_dev_msi_enabled(pdev))
-		err = request_threaded_irq(pdev->irq,
-			NULL,
-			mei_me_irq_thread_handler,
-			IRQF_ONESHOT, KBUILD_MODNAME, dev);
-	else
-		err = request_threaded_irq(pdev->irq,
+	irqflags = pci_dev_msi_enabled(pdev) ? IRQF_ONESHOT : IRQF_SHARED;
+
+	err = request_threaded_irq(pdev->irq,
 			mei_me_irq_quick_handler,
 			mei_me_irq_thread_handler,
-			IRQF_SHARED, KBUILD_MODNAME, dev);
-
+			irqflags, KBUILD_MODNAME, dev);
 	if (err) {
 		dev_err(&pdev->dev, "request_threaded_irq failure. irq = %d\n",
 		       pdev->irq);
@@ -319,6 +320,7 @@ static int mei_me_pci_resume(struct device *device)
 {
 	struct pci_dev *pdev = to_pci_dev(device);
 	struct mei_device *dev;
+	unsigned int irqflags;
 	int err;
 
 	dev = pci_get_drvdata(pdev);
@@ -327,17 +329,13 @@ static int mei_me_pci_resume(struct device *device)
 
 	pci_enable_msi(pdev);
 
+	irqflags = pci_dev_msi_enabled(pdev) ? IRQF_ONESHOT : IRQF_SHARED;
+
 	/* request and enable interrupt */
-	if (pci_dev_msi_enabled(pdev))
-		err = request_threaded_irq(pdev->irq,
-			NULL,
-			mei_me_irq_thread_handler,
-			IRQF_ONESHOT, KBUILD_MODNAME, dev);
-	else
-		err = request_threaded_irq(pdev->irq,
+	err = request_threaded_irq(pdev->irq,
 			mei_me_irq_quick_handler,
 			mei_me_irq_thread_handler,
-			IRQF_SHARED, KBUILD_MODNAME, dev);
+			irqflags, KBUILD_MODNAME, dev);
 
 	if (err) {
 		dev_err(&pdev->dev, "request_threaded_irq failed: irq = %d.\n",
diff --git a/kernel/drivers/misc/mei/pci-txe.c b/kernel/drivers/misc/mei/pci-txe.c
index dcfcba44b..0882c0201 100644
--- a/kernel/drivers/misc/mei/pci-txe.c
+++ b/kernel/drivers/misc/mei/pci-txe.c
@@ -338,7 +338,7 @@ static int mei_txe_pm_runtime_suspend(struct device *device)
 	 * However if device is not wakeable we do not enter
 	 * D-low state and we need to keep the interrupt kicking
 	 */
-	 if (!ret && pci_dev_run_wake(pdev))
+	if (!ret && pci_dev_run_wake(pdev))
 		mei_disable_interrupts(dev);
 
 	dev_dbg(&pdev->dev, "rpm: txe: runtime suspend ret=%d\n", ret);
diff --git a/kernel/drivers/misc/mei/wd.c b/kernel/drivers/misc/mei/wd.c
index 2725f865c..b34663883 100644
--- a/kernel/drivers/misc/mei/wd.c
+++ b/kernel/drivers/misc/mei/wd.c
@@ -50,15 +50,15 @@ static void mei_wd_set_start_timeout(struct mei_device *dev, u16 timeout)
  * mei_wd_host_init - connect to the watchdog client
  *
  * @dev: the device structure
+ * @me_cl: me client
  *
  * Return: -ENOTTY if wd client cannot be found
  *         -EIO if write has failed
  *         0 on success
  */
-int mei_wd_host_init(struct mei_device *dev)
+int mei_wd_host_init(struct mei_device *dev, struct mei_me_client *me_cl)
 {
 	struct mei_cl *cl = &dev->wd_cl;
-	struct mei_me_client *me_cl;
 	int ret;
 
 	mei_cl_init(cl, dev);
@@ -66,27 +66,13 @@ int mei_wd_host_init(struct mei_device *dev)
 	dev->wd_timeout = MEI_WD_DEFAULT_TIMEOUT;
 	dev->wd_state = MEI_WD_IDLE;
 
-
-	/* check for valid client id */
-	me_cl = mei_me_cl_by_uuid(dev, &mei_wd_guid);
-	if (!me_cl) {
-		dev_info(dev->dev, "wd: failed to find the client\n");
-		return -ENOTTY;
-	}
-
-	cl->me_client_id = me_cl->client_id;
-	cl->cl_uuid = me_cl->props.protocol_name;
-	mei_me_cl_put(me_cl);
-
 	ret = mei_cl_link(cl, MEI_WD_HOST_CLIENT_ID);
-
 	if (ret < 0) {
 		dev_info(dev->dev, "wd: failed link client\n");
 		return ret;
 	}
 
-	ret = mei_cl_connect(cl, NULL);
-
+	ret = mei_cl_connect(cl, me_cl, NULL);
 	if (ret) {
 		dev_err(dev->dev, "wd: failed to connect = %d\n", ret);
 		mei_cl_unlink(cl);
@@ -118,7 +104,7 @@ int mei_wd_send(struct mei_device *dev)
 	int ret;
 
 	hdr.host_addr = cl->host_client_id;
-	hdr.me_addr = cl->me_client_id;
+	hdr.me_addr = mei_cl_me_id(cl);
 	hdr.msg_complete = 1;
 	hdr.reserved = 0;
 	hdr.internal = 0;
@@ -378,6 +364,7 @@ int mei_watchdog_register(struct mei_device *dev)
 
 	int ret;
 
+	amt_wd_dev.parent = dev->dev;
 	/* unlock to perserve correct locking order */
 	mutex_unlock(&dev->device_lock);
 	ret = watchdog_register_device(&amt_wd_dev);
diff --git a/kernel/drivers/misc/mic/Kconfig b/kernel/drivers/misc/mic/Kconfig
index cc4eef040..40677df7f 100644
--- a/kernel/drivers/misc/mic/Kconfig
+++ b/kernel/drivers/misc/mic/Kconfig
@@ -15,11 +15,28 @@ config INTEL_MIC_BUS
 	  OS and tools for MIC to use with this driver are available from
 	  <http://software.intel.com/en-us/mic-developer>.
 
+comment "SCIF Bus Driver"
+
+config SCIF_BUS
+	tristate "SCIF Bus Driver"
+	depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS
+	help
+	  This option is selected by any driver which registers a
+	  device or driver on the SCIF Bus, such as CONFIG_INTEL_MIC_HOST
+	  and CONFIG_INTEL_MIC_CARD.
+
+	  If you are building a host/card kernel with an Intel MIC device
+	  then say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
 comment "Intel MIC Host Driver"
 
 config INTEL_MIC_HOST
 	tristate "Intel MIC Host Driver"
-	depends on 64BIT && PCI && X86 && INTEL_MIC_BUS
+	depends on 64BIT && PCI && X86 && INTEL_MIC_BUS && SCIF_BUS && MIC_COSM
 	select VHOST_RING
 	help
 	  This enables Host Driver support for the Intel Many Integrated
@@ -39,7 +56,7 @@ comment "Intel MIC Card Driver"
 
 config INTEL_MIC_CARD
 	tristate "Intel MIC Card Driver"
-	depends on 64BIT && X86 && INTEL_MIC_BUS
+	depends on 64BIT && X86 && INTEL_MIC_BUS && SCIF_BUS && MIC_COSM
 	select VIRTIO
 	help
 	  This enables card driver support for the Intel Many Integrated
@@ -52,3 +69,41 @@ config INTEL_MIC_CARD
 
 	  For more information see
 	  <http://software.intel.com/en-us/mic-developer>.
+
+comment "SCIF Driver"
+
+config SCIF
+	tristate "SCIF Driver"
+	depends on 64BIT && PCI && X86 && SCIF_BUS && IOMMU_SUPPORT
+	select IOMMU_IOVA
+	help
+	  This enables SCIF Driver support for the Intel Many Integrated
+	  Core (MIC) family of PCIe form factor coprocessor devices that
+	  run a 64 bit Linux OS. The Symmetric Communication Interface
+	  (SCIF (pronounced as skiff)) is a low level communications API
+	  across PCIe currently implemented for MIC.
+
+	  If you are building a host kernel with an Intel MIC device then
+	  say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
+comment "Intel MIC Coprocessor State Management (COSM) Drivers"
+
+config MIC_COSM
+	tristate "Intel MIC Coprocessor State Management (COSM) Drivers"
+	depends on 64BIT && PCI && X86 && SCIF
+	help
+	  This enables COSM driver support for the Intel Many
+	  Integrated Core (MIC) family of PCIe form factor coprocessor
+	  devices. COSM drivers implement functions such as boot,
+	  shutdown, reset and reboot of MIC devices.
+
+	  If you are building a host kernel with an Intel MIC device then
+	  say M (recommended) or Y, else say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
diff --git a/kernel/drivers/misc/mic/Makefile b/kernel/drivers/misc/mic/Makefile
index e9bf14875..e288a1106 100644
--- a/kernel/drivers/misc/mic/Makefile
+++ b/kernel/drivers/misc/mic/Makefile
@@ -4,4 +4,7 @@
 #
 obj-$(CONFIG_INTEL_MIC_HOST) += host/
 obj-$(CONFIG_INTEL_MIC_CARD) += card/
-obj-$(CONFIG_INTEL_MIC_BUS) += bus/
+obj-y += bus/
+obj-$(CONFIG_SCIF) += scif/
+obj-$(CONFIG_MIC_COSM) += cosm/
+obj-$(CONFIG_MIC_COSM) += cosm_client/
diff --git a/kernel/drivers/misc/mic/bus/Makefile b/kernel/drivers/misc/mic/bus/Makefile
index d85c7f2a0..761842b0d 100644
--- a/kernel/drivers/misc/mic/bus/Makefile
+++ b/kernel/drivers/misc/mic/bus/Makefile
@@ -3,3 +3,5 @@
 # Copyright(c) 2014, Intel Corporation.
 #
 obj-$(CONFIG_INTEL_MIC_BUS) += mic_bus.o
+obj-$(CONFIG_SCIF_BUS) += scif_bus.o
+obj-$(CONFIG_MIC_COSM) += cosm_bus.o
diff --git a/kernel/drivers/misc/mic/bus/cosm_bus.c b/kernel/drivers/misc/mic/bus/cosm_bus.c
new file mode 100644
index 000000000..d31d6c6e6
--- /dev/null
+++ b/kernel/drivers/misc/mic/bus/cosm_bus.c
@@ -0,0 +1,141 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC COSM Bus Driver
+ */
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include "cosm_bus.h"
+
+/* Unique numbering for cosm devices. */
+static DEFINE_IDA(cosm_index_ida);
+
+static int cosm_dev_probe(struct device *d)
+{
+	struct cosm_device *dev = dev_to_cosm(d);
+	struct cosm_driver *drv = drv_to_cosm(dev->dev.driver);
+
+	return drv->probe(dev);
+}
+
+static int cosm_dev_remove(struct device *d)
+{
+	struct cosm_device *dev = dev_to_cosm(d);
+	struct cosm_driver *drv = drv_to_cosm(dev->dev.driver);
+
+	drv->remove(dev);
+	return 0;
+}
+
+static struct bus_type cosm_bus = {
+	.name  = "cosm_bus",
+	.probe = cosm_dev_probe,
+	.remove = cosm_dev_remove,
+};
+
+int cosm_register_driver(struct cosm_driver *driver)
+{
+	driver->driver.bus = &cosm_bus;
+	return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(cosm_register_driver);
+
+void cosm_unregister_driver(struct cosm_driver *driver)
+{
+	driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(cosm_unregister_driver);
+
+static inline void cosm_release_dev(struct device *d)
+{
+	struct cosm_device *cdev = dev_to_cosm(d);
+
+	kfree(cdev);
+}
+
+struct cosm_device *
+cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops)
+{
+	struct cosm_device *cdev;
+	int ret;
+
+	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+	if (!cdev)
+		return ERR_PTR(-ENOMEM);
+
+	cdev->dev.parent = pdev;
+	cdev->dev.release = cosm_release_dev;
+	cdev->hw_ops = hw_ops;
+	dev_set_drvdata(&cdev->dev, cdev);
+	cdev->dev.bus = &cosm_bus;
+
+	/* Assign a unique device index and hence name */
+	ret = ida_simple_get(&cosm_index_ida, 0, 0, GFP_KERNEL);
+	if (ret < 0)
+		goto free_cdev;
+
+	cdev->index = ret;
+	cdev->dev.id = ret;
+	dev_set_name(&cdev->dev, "cosm-dev%u", cdev->index);
+
+	ret = device_register(&cdev->dev);
+	if (ret)
+		goto ida_remove;
+	return cdev;
+ida_remove:
+	ida_simple_remove(&cosm_index_ida, cdev->index);
+free_cdev:
+	put_device(&cdev->dev);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(cosm_register_device);
+
+void cosm_unregister_device(struct cosm_device *dev)
+{
+	int index = dev->index; /* save for after device release */
+
+	device_unregister(&dev->dev);
+	ida_simple_remove(&cosm_index_ida, index);
+}
+EXPORT_SYMBOL_GPL(cosm_unregister_device);
+
+struct cosm_device *cosm_find_cdev_by_id(int id)
+{
+	struct device *dev = subsys_find_device_by_id(&cosm_bus, id, NULL);
+
+	return dev ? container_of(dev, struct cosm_device, dev) : NULL;
+}
+EXPORT_SYMBOL_GPL(cosm_find_cdev_by_id);
+
+static int __init cosm_init(void)
+{
+	return bus_register(&cosm_bus);
+}
+
+static void __exit cosm_exit(void)
+{
+	bus_unregister(&cosm_bus);
+	ida_destroy(&cosm_index_ida);
+}
+
+core_initcall(cosm_init);
+module_exit(cosm_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC card OS state management bus driver");
+MODULE_LICENSE("GPL v2");
diff --git a/kernel/drivers/misc/mic/bus/cosm_bus.h b/kernel/drivers/misc/mic/bus/cosm_bus.h
new file mode 100644
index 000000000..f7c57f266
--- /dev/null
+++ b/kernel/drivers/misc/mic/bus/cosm_bus.h
@@ -0,0 +1,134 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC COSM Bus Driver
+ */
+#ifndef _COSM_BUS_H_
+#define _COSM_BUS_H_
+
+#include <linux/scif.h>
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+
+/**
+ * cosm_device - representation of a cosm device
+ *
+ * @attr_group: Pointer to list of sysfs attribute groups.
+ * @sdev: Device for sysfs entries.
+ * @state: MIC state.
+ * @shutdown_status: MIC status reported by card for shutdown/crashes.
+ * @shutdown_status_int: Internal shutdown status maintained by the driver
+ * @cosm_mutex: Mutex for synchronizing access to data structures.
+ * @reset_trigger_work: Work for triggering reset requests.
+ * @scif_work: Work for handling per device SCIF connections
+ * @cmdline: Kernel command line.
+ * @firmware: Firmware file name.
+ * @ramdisk: Ramdisk file name.
+ * @bootmode: Boot mode i.e. "linux" or "elf" for flash updates.
+ * @log_buf_addr: Log buffer address for MIC.
+ * @log_buf_len: Log buffer length address for MIC.
+ * @state_sysfs: Sysfs dirent for notifying ring 3 about MIC state changes.
+ * @hw_ops: the hardware bus ops for this device.
+ * @dev: underlying device.
+ * @index: unique position on the cosm bus
+ * @dbg_dir: debug fs directory
+ * @newepd: new endpoint from scif accept to be assigned to this cdev
+ * @epd: SCIF endpoint for this cdev
+ * @heartbeat_watchdog_enable: if heartbeat watchdog is enabled for this cdev
+ * @sysfs_heartbeat_enable: sysfs setting for disabling heartbeat notification
+ */
+struct cosm_device {
+	const struct attribute_group **attr_group;
+	struct device *sdev;
+	u8 state;
+	u8 shutdown_status;
+	u8 shutdown_status_int;
+	struct mutex cosm_mutex;
+	struct work_struct reset_trigger_work;
+	struct work_struct scif_work;
+	char *cmdline;
+	char *firmware;
+	char *ramdisk;
+	char *bootmode;
+	void *log_buf_addr;
+	int *log_buf_len;
+	struct kernfs_node *state_sysfs;
+	struct cosm_hw_ops *hw_ops;
+	struct device dev;
+	int index;
+	struct dentry *dbg_dir;
+	scif_epd_t newepd;
+	scif_epd_t epd;
+	bool heartbeat_watchdog_enable;
+	bool sysfs_heartbeat_enable;
+};
+
+/**
+ * cosm_driver - operations for a cosm driver
+ *
+ * @driver: underlying device driver (populate name and owner).
+ * @probe: the function to call when a device is found.  Returns 0 or -errno.
+ * @remove: the function to call when a device is removed.
+ */
+struct cosm_driver {
+	struct device_driver driver;
+	int (*probe)(struct cosm_device *dev);
+	void (*remove)(struct cosm_device *dev);
+};
+
+/**
+ * cosm_hw_ops - cosm bus ops
+ *
+ * @reset: trigger MIC reset
+ * @force_reset: force MIC reset
+ * @post_reset: inform MIC reset is complete
+ * @ready: is MIC ready for OS download
+ * @start: boot MIC
+ * @stop: prepare MIC for reset
+ * @family: return MIC HW family string
+ * @stepping: return MIC HW stepping string
+ * @aper: return MIC PCIe aperture
+ */
+struct cosm_hw_ops {
+	void (*reset)(struct cosm_device *cdev);
+	void (*force_reset)(struct cosm_device *cdev);
+	void (*post_reset)(struct cosm_device *cdev, enum mic_states state);
+	bool (*ready)(struct cosm_device *cdev);
+	int (*start)(struct cosm_device *cdev, int id);
+	void (*stop)(struct cosm_device *cdev, bool force);
+	ssize_t (*family)(struct cosm_device *cdev, char *buf);
+	ssize_t (*stepping)(struct cosm_device *cdev, char *buf);
+	struct mic_mw *(*aper)(struct cosm_device *cdev);
+};
+
+struct cosm_device *
+cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops);
+void cosm_unregister_device(struct cosm_device *dev);
+int cosm_register_driver(struct cosm_driver *drv);
+void cosm_unregister_driver(struct cosm_driver *drv);
+struct cosm_device *cosm_find_cdev_by_id(int id);
+
+static inline struct cosm_device *dev_to_cosm(struct device *dev)
+{
+	return container_of(dev, struct cosm_device, dev);
+}
+
+static inline struct cosm_driver *drv_to_cosm(struct device_driver *drv)
+{
+	return container_of(drv, struct cosm_driver, driver);
+}
+#endif /* _COSM_BUS_H */
diff --git a/kernel/drivers/misc/mic/bus/mic_bus.c b/kernel/drivers/misc/mic/bus/mic_bus.c
index 961ae90aa..be37890ab 100644
--- a/kernel/drivers/misc/mic/bus/mic_bus.c
+++ b/kernel/drivers/misc/mic/bus/mic_bus.c
@@ -25,9 +25,6 @@
 #include <linux/idr.h>
 #include <linux/mic_bus.h>
 
-/* Unique numbering for mbus devices. */
-static DEFINE_IDA(mbus_index_ida);
-
 static ssize_t device_show(struct device *d,
 			   struct device_attribute *attr, char *buf)
 {
@@ -147,7 +144,8 @@ static void mbus_release_dev(struct device *d)
 
 struct mbus_device *
 mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
-		     struct mbus_hw_ops *hw_ops, void __iomem *mmio_va)
+		     struct mbus_hw_ops *hw_ops, int index,
+		     void __iomem *mmio_va)
 {
 	int ret;
 	struct mbus_device *mbdev;
@@ -166,13 +164,7 @@ mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
 	mbdev->dev.release = mbus_release_dev;
 	mbdev->hw_ops = hw_ops;
 	mbdev->dev.bus = &mic_bus;
-
-	/* Assign a unique device index and hence name. */
-	ret = ida_simple_get(&mbus_index_ida, 0, 0, GFP_KERNEL);
-	if (ret < 0)
-		goto free_mbdev;
-
-	mbdev->index = ret;
+	mbdev->index = index;
 	dev_set_name(&mbdev->dev, "mbus-dev%u", mbdev->index);
 	/*
 	 * device_register() causes the bus infrastructure to look for a
@@ -180,22 +172,17 @@ mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
 	 */
 	ret = device_register(&mbdev->dev);
 	if (ret)
-		goto ida_remove;
+		goto free_mbdev;
 	return mbdev;
-ida_remove:
-	ida_simple_remove(&mbus_index_ida, mbdev->index);
 free_mbdev:
-	kfree(mbdev);
+	put_device(&mbdev->dev);
 	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(mbus_register_device);
 
 void mbus_unregister_device(struct mbus_device *mbdev)
 {
-	int index = mbdev->index; /* save for after device release */
-
 	device_unregister(&mbdev->dev);
-	ida_simple_remove(&mbus_index_ida, index);
 }
 EXPORT_SYMBOL_GPL(mbus_unregister_device);
 
@@ -207,7 +194,6 @@ static int __init mbus_init(void)
 static void __exit mbus_exit(void)
 {
 	bus_unregister(&mic_bus);
-	ida_destroy(&mbus_index_ida);
 }
 
 core_initcall(mbus_init);
diff --git a/kernel/drivers/misc/mic/bus/scif_bus.c b/kernel/drivers/misc/mic/bus/scif_bus.c
new file mode 100644
index 000000000..ff6e01c25
--- /dev/null
+++ b/kernel/drivers/misc/mic/bus/scif_bus.c
@@ -0,0 +1,209 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel Symmetric Communications Interface Bus driver.
+ */
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include <linux/dma-mapping.h>
+
+#include "scif_bus.h"
+
+static ssize_t device_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	struct scif_hw_dev *dev = dev_to_scif(d);
+
+	return sprintf(buf, "0x%04x\n", dev->id.device);
+}
+static DEVICE_ATTR_RO(device);
+
+static ssize_t vendor_show(struct device *d,
+			   struct device_attribute *attr, char *buf)
+{
+	struct scif_hw_dev *dev = dev_to_scif(d);
+
+	return sprintf(buf, "0x%04x\n", dev->id.vendor);
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t modalias_show(struct device *d,
+			     struct device_attribute *attr, char *buf)
+{
+	struct scif_hw_dev *dev = dev_to_scif(d);
+
+	return sprintf(buf, "scif:d%08Xv%08X\n",
+		       dev->id.device, dev->id.vendor);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *scif_dev_attrs[] = {
+	&dev_attr_device.attr,
+	&dev_attr_vendor.attr,
+	&dev_attr_modalias.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(scif_dev);
+
+static inline int scif_id_match(const struct scif_hw_dev *dev,
+				const struct scif_hw_dev_id *id)
+{
+	if (id->device != dev->id.device && id->device != SCIF_DEV_ANY_ID)
+		return 0;
+
+	return id->vendor == SCIF_DEV_ANY_ID || id->vendor == dev->id.vendor;
+}
+
+/*
+ * This looks through all the IDs a driver claims to support.  If any of them
+ * match, we return 1 and the kernel will call scif_dev_probe().
+ */
+static int scif_dev_match(struct device *dv, struct device_driver *dr)
+{
+	unsigned int i;
+	struct scif_hw_dev *dev = dev_to_scif(dv);
+	const struct scif_hw_dev_id *ids;
+
+	ids = drv_to_scif(dr)->id_table;
+	for (i = 0; ids[i].device; i++)
+		if (scif_id_match(dev, &ids[i]))
+			return 1;
+	return 0;
+}
+
+static int scif_uevent(struct device *dv, struct kobj_uevent_env *env)
+{
+	struct scif_hw_dev *dev = dev_to_scif(dv);
+
+	return add_uevent_var(env, "MODALIAS=scif:d%08Xv%08X",
+			      dev->id.device, dev->id.vendor);
+}
+
+static int scif_dev_probe(struct device *d)
+{
+	struct scif_hw_dev *dev = dev_to_scif(d);
+	struct scif_driver *drv = drv_to_scif(dev->dev.driver);
+
+	return drv->probe(dev);
+}
+
+static int scif_dev_remove(struct device *d)
+{
+	struct scif_hw_dev *dev = dev_to_scif(d);
+	struct scif_driver *drv = drv_to_scif(dev->dev.driver);
+
+	drv->remove(dev);
+	return 0;
+}
+
+static struct bus_type scif_bus = {
+	.name  = "scif_bus",
+	.match = scif_dev_match,
+	.dev_groups = scif_dev_groups,
+	.uevent = scif_uevent,
+	.probe = scif_dev_probe,
+	.remove = scif_dev_remove,
+};
+
+int scif_register_driver(struct scif_driver *driver)
+{
+	driver->driver.bus = &scif_bus;
+	return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(scif_register_driver);
+
+void scif_unregister_driver(struct scif_driver *driver)
+{
+	driver_unregister(&driver->driver);
+}
+EXPORT_SYMBOL_GPL(scif_unregister_driver);
+
+static void scif_release_dev(struct device *d)
+{
+	struct scif_hw_dev *sdev = dev_to_scif(d);
+
+	kfree(sdev);
+}
+
+struct scif_hw_dev *
+scif_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
+		     struct scif_hw_ops *hw_ops, u8 dnode, u8 snode,
+		     struct mic_mw *mmio, struct mic_mw *aper, void *dp,
+		     void __iomem *rdp, struct dma_chan **chan, int num_chan,
+		     bool card_rel_da)
+{
+	int ret;
+	struct scif_hw_dev *sdev;
+
+	sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+	if (!sdev)
+		return ERR_PTR(-ENOMEM);
+
+	sdev->dev.parent = pdev;
+	sdev->id.device = id;
+	sdev->id.vendor = SCIF_DEV_ANY_ID;
+	sdev->dev.archdata.dma_ops = dma_ops;
+	sdev->dev.release = scif_release_dev;
+	sdev->hw_ops = hw_ops;
+	sdev->dnode = dnode;
+	sdev->snode = snode;
+	dev_set_drvdata(&sdev->dev, sdev);
+	sdev->dev.bus = &scif_bus;
+	sdev->mmio = mmio;
+	sdev->aper = aper;
+	sdev->dp = dp;
+	sdev->rdp = rdp;
+	sdev->dev.dma_mask = &sdev->dev.coherent_dma_mask;
+	dma_set_mask(&sdev->dev, DMA_BIT_MASK(64));
+	sdev->dma_ch = chan;
+	sdev->num_dma_ch = num_chan;
+	sdev->card_rel_da = card_rel_da;
+	dev_set_name(&sdev->dev, "scif-dev%u", sdev->dnode);
+	/*
+	 * device_register() causes the bus infrastructure to look for a
+	 * matching driver.
+	 */
+	ret = device_register(&sdev->dev);
+	if (ret)
+		goto free_sdev;
+	return sdev;
+free_sdev:
+	put_device(&sdev->dev);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(scif_register_device);
+
+void scif_unregister_device(struct scif_hw_dev *sdev)
+{
+	device_unregister(&sdev->dev);
+}
+EXPORT_SYMBOL_GPL(scif_unregister_device);
+
+static int __init scif_init(void)
+{
+	return bus_register(&scif_bus);
+}
+
+static void __exit scif_exit(void)
+{
+	bus_unregister(&scif_bus);
+}
+
+core_initcall(scif_init);
+module_exit(scif_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) SCIF Bus driver");
+MODULE_LICENSE("GPL v2");
diff --git a/kernel/drivers/misc/mic/bus/scif_bus.h b/kernel/drivers/misc/mic/bus/scif_bus.h
new file mode 100644
index 000000000..94f29ac60
--- /dev/null
+++ b/kernel/drivers/misc/mic/bus/scif_bus.h
@@ -0,0 +1,133 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel Symmetric Communications Interface Bus driver.
+ */
+#ifndef _SCIF_BUS_H_
+#define _SCIF_BUS_H_
+/*
+ * Everything a scif driver needs to work with any particular scif
+ * hardware abstraction layer.
+ */
+#include <linux/dma-mapping.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+
+struct scif_hw_dev_id {
+	u32 device;
+	u32 vendor;
+};
+
+#define MIC_SCIF_DEV 1
+#define SCIF_DEV_ANY_ID 0xffffffff
+
+/**
+ * scif_hw_dev - representation of a hardware device abstracted for scif
+ * @hw_ops: the hardware ops supported by this device
+ * @id: the device type identification (used to match it with a driver)
+ * @mmio: MMIO memory window
+ * @aper: Aperture memory window
+ * @dev: underlying device
+ * @dnode - The destination node which this device will communicate with.
+ * @snode - The source node for this device.
+ * @dp - Self device page
+ * @rdp - Remote device page
+ * @dma_ch - Array of DMA channels
+ * @num_dma_ch - Number of DMA channels available
+ * @card_rel_da - Set to true if DMA addresses programmed in the DMA engine
+ *		are relative to the card point of view
+ */
+struct scif_hw_dev {
+	struct scif_hw_ops *hw_ops;
+	struct scif_hw_dev_id id;
+	struct mic_mw *mmio;
+	struct mic_mw *aper;
+	struct device dev;
+	u8 dnode;
+	u8 snode;
+	void *dp;
+	void __iomem *rdp;
+	struct dma_chan **dma_ch;
+	int num_dma_ch;
+	bool card_rel_da;
+};
+
+/**
+ * scif_driver - operations for a scif I/O driver
+ * @driver: underlying device driver (populate name and owner).
+ * @id_table: the ids serviced by this driver.
+ * @probe: the function to call when a device is found.  Returns 0 or -errno.
+ * @remove: the function to call when a device is removed.
+ */
+struct scif_driver {
+	struct device_driver driver;
+	const struct scif_hw_dev_id *id_table;
+	int (*probe)(struct scif_hw_dev *dev);
+	void (*remove)(struct scif_hw_dev *dev);
+};
+
+/**
+ * scif_hw_ops - Hardware operations for accessing a SCIF device on the SCIF bus.
+ *
+ * @next_db: Obtain the next available doorbell.
+ * @request_irq: Request an interrupt on a particular doorbell.
+ * @free_irq: Free an interrupt requested previously.
+ * @ack_interrupt: acknowledge an interrupt in the ISR.
+ * @send_intr: Send an interrupt to the remote node on a specified doorbell.
+ * @send_p2p_intr: Send an interrupt to the peer node on a specified doorbell
+ * which is specifically targeted for a peer to peer node.
+ * @ioremap: Map a buffer with the specified physical address and length.
+ * @iounmap: Unmap a buffer previously mapped.
+ */
+struct scif_hw_ops {
+	int (*next_db)(struct scif_hw_dev *sdev);
+	struct mic_irq * (*request_irq)(struct scif_hw_dev *sdev,
+					irqreturn_t (*func)(int irq,
+							    void *data),
+					const char *name, void *data,
+					int db);
+	void (*free_irq)(struct scif_hw_dev *sdev,
+			 struct mic_irq *cookie, void *data);
+	void (*ack_interrupt)(struct scif_hw_dev *sdev, int num);
+	void (*send_intr)(struct scif_hw_dev *sdev, int db);
+	void (*send_p2p_intr)(struct scif_hw_dev *sdev, int db,
+			      struct mic_mw *mw);
+	void __iomem * (*ioremap)(struct scif_hw_dev *sdev,
+				  phys_addr_t pa, size_t len);
+	void (*iounmap)(struct scif_hw_dev *sdev, void __iomem *va);
+};
+
+int scif_register_driver(struct scif_driver *driver);
+void scif_unregister_driver(struct scif_driver *driver);
+struct scif_hw_dev *
+scif_register_device(struct device *pdev, int id,
+		     struct dma_map_ops *dma_ops,
+		     struct scif_hw_ops *hw_ops, u8 dnode, u8 snode,
+		     struct mic_mw *mmio, struct mic_mw *aper,
+		     void *dp, void __iomem *rdp,
+		     struct dma_chan **chan, int num_chan,
+		     bool card_rel_da);
+void scif_unregister_device(struct scif_hw_dev *sdev);
+
+static inline struct scif_hw_dev *dev_to_scif(struct device *dev)
+{
+	return container_of(dev, struct scif_hw_dev, dev);
+}
+
+static inline struct scif_driver *drv_to_scif(struct device_driver *drv)
+{
+	return container_of(drv, struct scif_driver, driver);
+}
+#endif /* _SCIF_BUS_H */
diff --git a/kernel/drivers/misc/mic/card/mic_device.c b/kernel/drivers/misc/mic/card/mic_device.c
index 83819eee5..d0edaf7e0 100644
--- a/kernel/drivers/misc/mic/card/mic_device.c
+++ b/kernel/drivers/misc/mic/card/mic_device.c
@@ -28,6 +28,8 @@
 #include <linux/pci.h>
 #include <linux/interrupt.h>
 #include <linux/reboot.h>
+#include <linux/dmaengine.h>
+#include <linux/kmod.h>
 
 #include <linux/mic_common.h>
 #include "../common/mic_dev.h"
@@ -35,71 +37,6 @@
 #include "mic_virtio.h"
 
 static struct mic_driver *g_drv;
-static struct mic_irq *shutdown_cookie;
-
-static void mic_notify_host(u8 state)
-{
-	struct mic_driver *mdrv = g_drv;
-	struct mic_bootparam __iomem *bootparam = mdrv->dp;
-
-	iowrite8(state, &bootparam->shutdown_status);
-	dev_dbg(mdrv->dev, "%s %d system_state %d\n",
-		__func__, __LINE__, state);
-	mic_send_intr(&mdrv->mdev, ioread8(&bootparam->c2h_shutdown_db));
-}
-
-static int mic_panic_event(struct notifier_block *this, unsigned long event,
-		void *ptr)
-{
-	struct mic_driver *mdrv = g_drv;
-	struct mic_bootparam __iomem *bootparam = mdrv->dp;
-
-	iowrite8(-1, &bootparam->h2c_config_db);
-	iowrite8(-1, &bootparam->h2c_shutdown_db);
-	mic_notify_host(MIC_CRASHED);
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block mic_panic = {
-	.notifier_call  = mic_panic_event,
-};
-
-static irqreturn_t mic_shutdown_isr(int irq, void *data)
-{
-	struct mic_driver *mdrv = g_drv;
-	struct mic_bootparam __iomem *bootparam = mdrv->dp;
-
-	mic_ack_interrupt(&g_drv->mdev);
-	if (ioread8(&bootparam->shutdown_card))
-		orderly_poweroff(true);
-	return IRQ_HANDLED;
-}
-
-static int mic_shutdown_init(void)
-{
-	int rc = 0;
-	struct mic_driver *mdrv = g_drv;
-	struct mic_bootparam __iomem *bootparam = mdrv->dp;
-	int shutdown_db;
-
-	shutdown_db = mic_next_card_db();
-	shutdown_cookie = mic_request_card_irq(mic_shutdown_isr, NULL,
-					       "Shutdown", mdrv, shutdown_db);
-	if (IS_ERR(shutdown_cookie))
-		rc = PTR_ERR(shutdown_cookie);
-	else
-		iowrite8(shutdown_db, &bootparam->h2c_shutdown_db);
-	return rc;
-}
-
-static void mic_shutdown_uninit(void)
-{
-	struct mic_driver *mdrv = g_drv;
-	struct mic_bootparam __iomem *bootparam = mdrv->dp;
-
-	iowrite8(-1, &bootparam->h2c_shutdown_db);
-	mic_free_card_irq(shutdown_cookie, mdrv);
-}
 
 static int __init mic_dp_init(void)
 {
@@ -240,6 +177,111 @@ static void mic_uninit_irq(void)
 	kfree(mdrv->irq_info.irq_usage_count);
 }
 
+static inline struct mic_driver *scdev_to_mdrv(struct scif_hw_dev *scdev)
+{
+	return dev_get_drvdata(scdev->dev.parent);
+}
+
+static struct mic_irq *
+___mic_request_irq(struct scif_hw_dev *scdev,
+		   irqreturn_t (*func)(int irq, void *data),
+				       const char *name, void *data,
+				       int db)
+{
+	return mic_request_card_irq(func, NULL, name, data, db);
+}
+
+static void
+___mic_free_irq(struct scif_hw_dev *scdev,
+		struct mic_irq *cookie, void *data)
+{
+	return mic_free_card_irq(cookie, data);
+}
+
+static void ___mic_ack_interrupt(struct scif_hw_dev *scdev, int num)
+{
+	struct mic_driver *mdrv = scdev_to_mdrv(scdev);
+
+	mic_ack_interrupt(&mdrv->mdev);
+}
+
+static int ___mic_next_db(struct scif_hw_dev *scdev)
+{
+	return mic_next_card_db();
+}
+
+static void ___mic_send_intr(struct scif_hw_dev *scdev, int db)
+{
+	struct mic_driver *mdrv = scdev_to_mdrv(scdev);
+
+	mic_send_intr(&mdrv->mdev, db);
+}
+
+static void ___mic_send_p2p_intr(struct scif_hw_dev *scdev, int db,
+				 struct mic_mw *mw)
+{
+	mic_send_p2p_intr(db, mw);
+}
+
+static void __iomem *
+___mic_ioremap(struct scif_hw_dev *scdev,
+	       phys_addr_t pa, size_t len)
+{
+	struct mic_driver *mdrv = scdev_to_mdrv(scdev);
+
+	return mic_card_map(&mdrv->mdev, pa, len);
+}
+
+static void ___mic_iounmap(struct scif_hw_dev *scdev, void __iomem *va)
+{
+	struct mic_driver *mdrv = scdev_to_mdrv(scdev);
+
+	mic_card_unmap(&mdrv->mdev, va);
+}
+
+static struct scif_hw_ops scif_hw_ops = {
+	.request_irq = ___mic_request_irq,
+	.free_irq = ___mic_free_irq,
+	.ack_interrupt = ___mic_ack_interrupt,
+	.next_db = ___mic_next_db,
+	.send_intr = ___mic_send_intr,
+	.send_p2p_intr = ___mic_send_p2p_intr,
+	.ioremap = ___mic_ioremap,
+	.iounmap = ___mic_iounmap,
+};
+
+static int mic_request_dma_chans(struct mic_driver *mdrv)
+{
+	dma_cap_mask_t mask;
+	struct dma_chan *chan;
+
+	request_module("mic_x100_dma");
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_MEMCPY, mask);
+
+	do {
+		chan = dma_request_channel(mask, NULL, NULL);
+		if (chan) {
+			mdrv->dma_ch[mdrv->num_dma_ch++] = chan;
+			if (mdrv->num_dma_ch >= MIC_MAX_DMA_CHAN)
+				break;
+		}
+	} while (chan);
+	dev_info(mdrv->dev, "DMA channels # %d\n", mdrv->num_dma_ch);
+	return mdrv->num_dma_ch;
+}
+
+static void mic_free_dma_chans(struct mic_driver *mdrv)
+{
+	int i = 0;
+
+	for (i = 0; i < mdrv->num_dma_ch; i++) {
+		dma_release_channel(mdrv->dma_ch[i]);
+		mdrv->dma_ch[i] = NULL;
+	}
+	mdrv->num_dma_ch = 0;
+}
+
 /*
  * mic_driver_init - MIC driver initialization tasks.
  *
@@ -248,13 +290,11 @@ static void mic_uninit_irq(void)
 int __init mic_driver_init(struct mic_driver *mdrv)
 {
 	int rc;
+	struct mic_bootparam __iomem *bootparam;
+	u8 node_id;
 
 	g_drv = mdrv;
-	/*
-	 * Unloading the card module is not supported. The MIC card module
-	 * handles fundamental operations like host/card initiated shutdowns
-	 * and informing the host about card crashes and cannot be unloaded.
-	 */
+	/* Unloading the card module is not supported. */
 	if (!try_module_get(mdrv->dev->driver->owner)) {
 		rc = -ENODEV;
 		goto done;
@@ -265,18 +305,31 @@ int __init mic_driver_init(struct mic_driver *mdrv)
 	rc = mic_init_irq();
 	if (rc)
 		goto dp_uninit;
-	rc = mic_shutdown_init();
-	if (rc)
+	if (!mic_request_dma_chans(mdrv)) {
+		rc = -ENODEV;
 		goto irq_uninit;
+	}
 	rc = mic_devices_init(mdrv);
 	if (rc)
-		goto shutdown_uninit;
+		goto dma_free;
+	bootparam = mdrv->dp;
+	node_id = ioread8(&bootparam->node_id);
+	mdrv->scdev = scif_register_device(mdrv->dev, MIC_SCIF_DEV,
+					   NULL, &scif_hw_ops,
+					   0, node_id, &mdrv->mdev.mmio, NULL,
+					   NULL, mdrv->dp, mdrv->dma_ch,
+					   mdrv->num_dma_ch, true);
+	if (IS_ERR(mdrv->scdev)) {
+		rc = PTR_ERR(mdrv->scdev);
+		goto device_uninit;
+	}
 	mic_create_card_debug_dir(mdrv);
-	atomic_notifier_chain_register(&panic_notifier_list, &mic_panic);
 done:
 	return rc;
-shutdown_uninit:
-	mic_shutdown_uninit();
+device_uninit:
+	mic_devices_uninit(mdrv);
+dma_free:
+	mic_free_dma_chans(mdrv);
 irq_uninit:
 	mic_uninit_irq();
 dp_uninit:
@@ -294,14 +347,9 @@ put:
 void mic_driver_uninit(struct mic_driver *mdrv)
 {
 	mic_delete_card_debug_dir(mdrv);
+	scif_unregister_device(mdrv->scdev);
 	mic_devices_uninit(mdrv);
-	/*
-	 * Inform the host about the shutdown status i.e. poweroff/restart etc.
-	 * The module cannot be unloaded so the only code path to call
-	 * mic_devices_uninit(..) is the shutdown callback.
-	 */
-	mic_notify_host(system_state);
-	mic_shutdown_uninit();
+	mic_free_dma_chans(mdrv);
 	mic_uninit_irq();
 	mic_dp_uninit();
 	module_put(mdrv->dev->driver->owner);
diff --git a/kernel/drivers/misc/mic/card/mic_device.h b/kernel/drivers/misc/mic/card/mic_device.h
index 844be8fc9..1dbf83c41 100644
--- a/kernel/drivers/misc/mic/card/mic_device.h
+++ b/kernel/drivers/misc/mic/card/mic_device.h
@@ -29,9 +29,9 @@
 
 #include <linux/workqueue.h>
 #include <linux/io.h>
-#include <linux/irqreturn.h>
 #include <linux/interrupt.h>
 #include <linux/mic_bus.h>
+#include "../bus/scif_bus.h"
 
 /**
  * struct mic_intr_info - Contains h/w specific interrupt sources info
@@ -73,6 +73,9 @@ struct mic_device {
  * @irq_info: The OS specific irq information
  * @intr_info: H/W specific interrupt information.
  * @dma_mbdev: dma device on the MIC virtual bus.
+ * @dma_ch - Array of DMA channels
+ * @num_dma_ch - Number of DMA channels available
+ * @scdev: SCIF device on the SCIF virtual bus.
  */
 struct mic_driver {
 	char name[20];
@@ -84,6 +87,9 @@ struct mic_driver {
 	struct mic_irq_info irq_info;
 	struct mic_intr_info intr_info;
 	struct mbus_device *dma_mbdev;
+	struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN];
+	int num_dma_ch;
+	struct scif_hw_dev *scdev;
 };
 
 /**
@@ -122,10 +128,11 @@ void mic_driver_uninit(struct mic_driver *mdrv);
 int mic_next_card_db(void);
 struct mic_irq *
 mic_request_card_irq(irq_handler_t handler, irq_handler_t thread_fn,
-		     const char *name, void *data, int intr_src);
+		     const char *name, void *data, int db);
 void mic_free_card_irq(struct mic_irq *cookie, void *data);
 u32 mic_read_spad(struct mic_device *mdev, unsigned int idx);
 void mic_send_intr(struct mic_device *mdev, int doorbell);
+void mic_send_p2p_intr(int doorbell, struct mic_mw *mw);
 int mic_db_to_irq(struct mic_driver *mdrv, int db);
 u32 mic_ack_interrupt(struct mic_device *mdev);
 void mic_hw_intr_init(struct mic_driver *mdrv);
diff --git a/kernel/drivers/misc/mic/card/mic_x100.c b/kernel/drivers/misc/mic/card/mic_x100.c
index e98e537d6..b2958ce23 100644
--- a/kernel/drivers/misc/mic/card/mic_x100.c
+++ b/kernel/drivers/misc/mic/card/mic_x100.c
@@ -70,6 +70,41 @@ void mic_send_intr(struct mic_device *mdev, int doorbell)
 		       (MIC_X100_SBOX_SDBIC0 + (4 * doorbell)));
 }
 
+/*
+ * mic_x100_send_sbox_intr - Send an MIC_X100_SBOX interrupt to MIC.
+ */
+static void mic_x100_send_sbox_intr(struct mic_mw *mw, int doorbell)
+{
+	u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8;
+	u32 apicicr_low = mic_mmio_read(mw, MIC_X100_SBOX_BASE_ADDRESS +
+					apic_icr_offset);
+
+	/* for MIC we need to make sure we "hit" the send_icr bit (13) */
+	apicicr_low = (apicicr_low | (1 << 13));
+	/*
+	 * Ensure that the interrupt is ordered w.r.t. previous stores
+	 * to main memory. Fence instructions are not implemented in X100
+	 * since execution is in order but a compiler barrier is still
+	 * required.
+	 */
+	wmb();
+	mic_mmio_write(mw, apicicr_low,
+		       MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset);
+}
+
+static void mic_x100_send_rdmasr_intr(struct mic_mw *mw, int doorbell)
+{
+	int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2);
+	/*
+	 * Ensure that the interrupt is ordered w.r.t. previous stores
+	 * to main memory. Fence instructions are not implemented in X100
+	 * since execution is in order but a compiler barrier is still
+	 * required.
+	 */
+	wmb();
+	mic_mmio_write(mw, 0, MIC_X100_SBOX_BASE_ADDRESS + rdmasr_offset);
+}
+
 /**
  * mic_ack_interrupt - Device specific interrupt handling.
  * @mdev: pointer to mic_device instance
@@ -91,6 +126,18 @@ static inline int mic_get_rdmasr_irq(int index)
 	return  MIC_X100_RDMASR_IRQ_BASE + index;
 }
 
+void mic_send_p2p_intr(int db, struct mic_mw *mw)
+{
+	int rdmasr_index;
+
+	if (db < MIC_X100_NUM_SBOX_IRQ) {
+		mic_x100_send_sbox_intr(mw, db);
+	} else {
+		rdmasr_index = db - MIC_X100_NUM_SBOX_IRQ;
+		mic_x100_send_rdmasr_intr(mw, rdmasr_index);
+	}
+}
+
 /**
  * mic_hw_intr_init - Initialize h/w specific interrupt
  * information.
@@ -113,11 +160,15 @@ void mic_hw_intr_init(struct mic_driver *mdrv)
 int mic_db_to_irq(struct mic_driver *mdrv, int db)
 {
 	int rdmasr_index;
+
+	/*
+	 * The total number of doorbell interrupts on the card are 16. Indices
+	 * 0-8 falls in the SBOX category and 8-15 fall in the RDMASR category.
+	 */
 	if (db < MIC_X100_NUM_SBOX_IRQ) {
 		return mic_get_sbox_irq(db);
 	} else {
-		rdmasr_index = db - MIC_X100_NUM_SBOX_IRQ +
-			MIC_X100_RDMASR_IRQ_BASE;
+		rdmasr_index = db - MIC_X100_NUM_SBOX_IRQ;
 		return mic_get_rdmasr_irq(rdmasr_index);
 	}
 }
@@ -210,7 +261,7 @@ static int __init mic_probe(struct platform_device *pdev)
 	mic_hw_intr_init(mdrv);
 	platform_set_drvdata(pdev, mdrv);
 	mdrv->dma_mbdev = mbus_register_device(mdrv->dev, MBUS_DEV_DMA_MIC,
-					       NULL, &mbus_hw_ops,
+					       NULL, &mbus_hw_ops, 0,
 					       mdrv->mdev.mmio.va);
 	if (IS_ERR(mdrv->dma_mbdev)) {
 		rc = PTR_ERR(mdrv->dma_mbdev);
@@ -243,10 +294,16 @@ static void mic_platform_shutdown(struct platform_device *pdev)
 	mic_remove(pdev);
 }
 
+static u64 mic_dma_mask = DMA_BIT_MASK(64);
+
 static struct platform_device mic_platform_dev = {
 	.name = mic_driver_name,
 	.id   = 0,
 	.num_resources = 0,
+	.dev = {
+		.dma_mask = &mic_dma_mask,
+		.coherent_dma_mask = DMA_BIT_MASK(64),
+	},
 };
 
 static struct platform_driver __refdata mic_platform_driver = {
diff --git a/kernel/drivers/misc/mic/card/mic_x100.h b/kernel/drivers/misc/mic/card/mic_x100.h
index d66ea5563..7e2224934 100644
--- a/kernel/drivers/misc/mic/card/mic_x100.h
+++ b/kernel/drivers/misc/mic/card/mic_x100.h
@@ -35,6 +35,7 @@
 #define MIC_X100_SBOX_SDBIC0 0x0000CC90
 #define MIC_X100_SBOX_SDBIC0_DBREQ_BIT 0x80000000
 #define MIC_X100_SBOX_RDMASR0	0x0000B180
+#define MIC_X100_SBOX_APICICR0 0x0000A9D0
 
 #define MIC_X100_MAX_DOORBELL_IDX 8
 
diff --git a/kernel/drivers/misc/mic/common/mic_dev.h b/kernel/drivers/misc/mic/common/mic_dev.h
index 92999c2bb..50776772e 100644
--- a/kernel/drivers/misc/mic/common/mic_dev.h
+++ b/kernel/drivers/misc/mic/common/mic_dev.h
@@ -21,6 +21,19 @@
 #ifndef __MIC_DEV_H__
 #define __MIC_DEV_H__
 
+/* The maximum number of MIC devices supported in a single host system. */
+#define MIC_MAX_NUM_DEVS 128
+
+/**
+ * enum mic_hw_family - The hardware family to which a device belongs.
+ */
+enum mic_hw_family {
+	MIC_FAMILY_X100 = 0,
+	MIC_FAMILY_X200,
+	MIC_FAMILY_UNKNOWN,
+	MIC_FAMILY_LAST
+};
+
 /**
  * struct mic_mw - MIC memory window
  *
@@ -48,4 +61,7 @@ struct mic_mw {
 #define MIC_VIRTIO_PARAM_DEV_REMOVE 0x1
 #define MIC_VIRTIO_PARAM_CONFIG_CHANGED 0x2
 
+/* Maximum number of DMA channels */
+#define MIC_MAX_DMA_CHAN 4
+
 #endif
diff --git a/kernel/drivers/misc/mic/cosm/Makefile b/kernel/drivers/misc/mic/cosm/Makefile
new file mode 100644
index 000000000..b85d4d49d
--- /dev/null
+++ b/kernel/drivers/misc/mic/cosm/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile - Intel MIC Coprocessor State Management (COSM) Driver
+# Copyright(c) 2015, Intel Corporation.
+#
+obj-$(CONFIG_MIC_COSM) += mic_cosm.o
+
+mic_cosm-objs := cosm_main.o
+mic_cosm-objs += cosm_debugfs.o
+mic_cosm-objs += cosm_sysfs.o
+mic_cosm-objs += cosm_scif_server.o
diff --git a/kernel/drivers/misc/mic/cosm/cosm_debugfs.c b/kernel/drivers/misc/mic/cosm/cosm_debugfs.c
new file mode 100644
index 000000000..216cb3cd2
--- /dev/null
+++ b/kernel/drivers/misc/mic/cosm/cosm_debugfs.c
@@ -0,0 +1,156 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include "cosm_main.h"
+
+/* Debugfs parent dir */
+static struct dentry *cosm_dbg;
+
+/**
+ * cosm_log_buf_show - Display MIC kernel log buffer
+ *
+ * log_buf addr/len is read from System.map by user space
+ * and populated in sysfs entries.
+ */
+static int cosm_log_buf_show(struct seq_file *s, void *unused)
+{
+	void __iomem *log_buf_va;
+	int __iomem *log_buf_len_va;
+	struct cosm_device *cdev = s->private;
+	void *kva;
+	int size;
+	u64 aper_offset;
+
+	if (!cdev || !cdev->log_buf_addr || !cdev->log_buf_len)
+		goto done;
+
+	mutex_lock(&cdev->cosm_mutex);
+	switch (cdev->state) {
+	case MIC_BOOTING:
+	case MIC_ONLINE:
+	case MIC_SHUTTING_DOWN:
+		break;
+	default:
+		goto unlock;
+	}
+
+	/*
+	 * Card kernel will never be relocated and any kernel text/data mapping
+	 * can be translated to phys address by subtracting __START_KERNEL_map.
+	 */
+	aper_offset = (u64)cdev->log_buf_len - __START_KERNEL_map;
+	log_buf_len_va = cdev->hw_ops->aper(cdev)->va + aper_offset;
+	aper_offset = (u64)cdev->log_buf_addr - __START_KERNEL_map;
+	log_buf_va = cdev->hw_ops->aper(cdev)->va + aper_offset;
+
+	size = ioread32(log_buf_len_va);
+	kva = kmalloc(size, GFP_KERNEL);
+	if (!kva)
+		goto unlock;
+
+	memcpy_fromio(kva, log_buf_va, size);
+	seq_write(s, kva, size);
+	kfree(kva);
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+done:
+	return 0;
+}
+
+static int cosm_log_buf_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, cosm_log_buf_show, inode->i_private);
+}
+
+static const struct file_operations log_buf_ops = {
+	.owner   = THIS_MODULE,
+	.open    = cosm_log_buf_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release
+};
+
+/**
+ * cosm_force_reset_show - Force MIC reset
+ *
+ * Invokes the force_reset COSM bus op instead of the standard reset
+ * op in case a force reset of the MIC device is required
+ */
+static int cosm_force_reset_show(struct seq_file *s, void *pos)
+{
+	struct cosm_device *cdev = s->private;
+
+	cosm_stop(cdev, true);
+	return 0;
+}
+
+static int cosm_force_reset_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, cosm_force_reset_show, inode->i_private);
+}
+
+static const struct file_operations force_reset_ops = {
+	.owner   = THIS_MODULE,
+	.open    = cosm_force_reset_debug_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release
+};
+
+void cosm_create_debug_dir(struct cosm_device *cdev)
+{
+	char name[16];
+
+	if (!cosm_dbg)
+		return;
+
+	scnprintf(name, sizeof(name), "mic%d", cdev->index);
+	cdev->dbg_dir = debugfs_create_dir(name, cosm_dbg);
+	if (!cdev->dbg_dir)
+		return;
+
+	debugfs_create_file("log_buf", 0444, cdev->dbg_dir, cdev, &log_buf_ops);
+	debugfs_create_file("force_reset", 0444, cdev->dbg_dir, cdev,
+			    &force_reset_ops);
+}
+
+void cosm_delete_debug_dir(struct cosm_device *cdev)
+{
+	if (!cdev->dbg_dir)
+		return;
+
+	debugfs_remove_recursive(cdev->dbg_dir);
+}
+
+void cosm_init_debugfs(void)
+{
+	cosm_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	if (!cosm_dbg)
+		pr_err("can't create debugfs dir\n");
+}
+
+void cosm_exit_debugfs(void)
+{
+	debugfs_remove(cosm_dbg);
+}
diff --git a/kernel/drivers/misc/mic/cosm/cosm_main.c b/kernel/drivers/misc/mic/cosm/cosm_main.c
new file mode 100644
index 000000000..4b4b356c7
--- /dev/null
+++ b/kernel/drivers/misc/mic/cosm/cosm_main.c
@@ -0,0 +1,388 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/cred.h>
+#include "cosm_main.h"
+
+static const char cosm_driver_name[] = "mic";
+
+/* COSM ID allocator */
+static struct ida g_cosm_ida;
+/* Class of MIC devices for sysfs accessibility. */
+static struct class *g_cosm_class;
+/* Number of MIC devices */
+static atomic_t g_num_dev;
+
+/**
+ * cosm_hw_reset - Issue a HW reset for the MIC device
+ * @cdev: pointer to cosm_device instance
+ */
+static void cosm_hw_reset(struct cosm_device *cdev, bool force)
+{
+	int i;
+
+#define MIC_RESET_TO (45)
+	if (force && cdev->hw_ops->force_reset)
+		cdev->hw_ops->force_reset(cdev);
+	else
+		cdev->hw_ops->reset(cdev);
+
+	for (i = 0; i < MIC_RESET_TO; i++) {
+		if (cdev->hw_ops->ready(cdev)) {
+			cosm_set_state(cdev, MIC_READY);
+			return;
+		}
+		/*
+		 * Resets typically take 10s of seconds to complete.
+		 * Since an MMIO read is required to check if the
+		 * firmware is ready or not, a 1 second delay works nicely.
+		 */
+		msleep(1000);
+	}
+	cosm_set_state(cdev, MIC_RESET_FAILED);
+}
+
+/**
+ * cosm_start - Start the MIC
+ * @cdev: pointer to cosm_device instance
+ *
+ * This function prepares an MIC for boot and initiates boot.
+ * RETURNS: An appropriate -ERRNO error value on error, or 0 for success.
+ */
+int cosm_start(struct cosm_device *cdev)
+{
+	const struct cred *orig_cred;
+	struct cred *override_cred;
+	int rc;
+
+	mutex_lock(&cdev->cosm_mutex);
+	if (!cdev->bootmode) {
+		dev_err(&cdev->dev, "%s %d bootmode not set\n",
+			__func__, __LINE__);
+		rc = -EINVAL;
+		goto unlock_ret;
+	}
+retry:
+	if (cdev->state != MIC_READY) {
+		dev_err(&cdev->dev, "%s %d MIC state not READY\n",
+			__func__, __LINE__);
+		rc = -EINVAL;
+		goto unlock_ret;
+	}
+	if (!cdev->hw_ops->ready(cdev)) {
+		cosm_hw_reset(cdev, false);
+		/*
+		 * The state will either be MIC_READY if the reset succeeded
+		 * or MIC_RESET_FAILED if the firmware reset failed.
+		 */
+		goto retry;
+	}
+
+	/*
+	 * Set credentials to root to allow non-root user to download initramsfs
+	 * with 600 permissions
+	 */
+	override_cred = prepare_creds();
+	if (!override_cred) {
+		dev_err(&cdev->dev, "%s %d prepare_creds failed\n",
+			__func__, __LINE__);
+		rc = -ENOMEM;
+		goto unlock_ret;
+	}
+	override_cred->fsuid = GLOBAL_ROOT_UID;
+	orig_cred = override_creds(override_cred);
+
+	rc = cdev->hw_ops->start(cdev, cdev->index);
+
+	revert_creds(orig_cred);
+	put_cred(override_cred);
+	if (rc)
+		goto unlock_ret;
+
+	/*
+	 * If linux is being booted, card is treated 'online' only
+	 * when the scif interface in the card is up. If anything else
+	 * is booted, we set card to 'online' immediately.
+	 */
+	if (!strcmp(cdev->bootmode, "linux"))
+		cosm_set_state(cdev, MIC_BOOTING);
+	else
+		cosm_set_state(cdev, MIC_ONLINE);
+unlock_ret:
+	mutex_unlock(&cdev->cosm_mutex);
+	if (rc)
+		dev_err(&cdev->dev, "cosm_start failed rc %d\n", rc);
+	return rc;
+}
+
+/**
+ * cosm_stop - Prepare the MIC for reset and trigger reset
+ * @cdev: pointer to cosm_device instance
+ * @force: force a MIC to reset even if it is already reset and ready.
+ *
+ * RETURNS: None
+ */
+void cosm_stop(struct cosm_device *cdev, bool force)
+{
+	mutex_lock(&cdev->cosm_mutex);
+	if (cdev->state != MIC_READY || force) {
+		/*
+		 * Don't call hw_ops if they have been called previously.
+		 * stop(..) calls device_unregister and will crash the system if
+		 * called multiple times.
+		 */
+		bool call_hw_ops = cdev->state != MIC_RESET_FAILED &&
+					cdev->state != MIC_READY;
+
+		if (cdev->state != MIC_RESETTING)
+			cosm_set_state(cdev, MIC_RESETTING);
+		cdev->heartbeat_watchdog_enable = false;
+		if (call_hw_ops)
+			cdev->hw_ops->stop(cdev, force);
+		cosm_hw_reset(cdev, force);
+		cosm_set_shutdown_status(cdev, MIC_NOP);
+		if (call_hw_ops && cdev->hw_ops->post_reset)
+			cdev->hw_ops->post_reset(cdev, cdev->state);
+	}
+	mutex_unlock(&cdev->cosm_mutex);
+	flush_work(&cdev->scif_work);
+}
+
+/**
+ * cosm_reset_trigger_work - Trigger MIC reset
+ * @work: The work structure
+ *
+ * This work is scheduled whenever the host wants to reset the MIC.
+ */
+static void cosm_reset_trigger_work(struct work_struct *work)
+{
+	struct cosm_device *cdev = container_of(work, struct cosm_device,
+						reset_trigger_work);
+	cosm_stop(cdev, false);
+}
+
+/**
+ * cosm_reset - Schedule MIC reset
+ * @cdev: pointer to cosm_device instance
+ *
+ * RETURNS: An -EINVAL if the card is already READY or 0 for success.
+ */
+int cosm_reset(struct cosm_device *cdev)
+{
+	int rc = 0;
+
+	mutex_lock(&cdev->cosm_mutex);
+	if (cdev->state != MIC_READY) {
+		cosm_set_state(cdev, MIC_RESETTING);
+		schedule_work(&cdev->reset_trigger_work);
+	} else {
+		dev_err(&cdev->dev, "%s %d MIC is READY\n", __func__, __LINE__);
+		rc = -EINVAL;
+	}
+	mutex_unlock(&cdev->cosm_mutex);
+	return rc;
+}
+
+/**
+ * cosm_shutdown - Initiate MIC shutdown.
+ * @cdev: pointer to cosm_device instance
+ *
+ * RETURNS: None
+ */
+int cosm_shutdown(struct cosm_device *cdev)
+{
+	struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN };
+	int rc = 0;
+
+	mutex_lock(&cdev->cosm_mutex);
+	if (cdev->state != MIC_ONLINE) {
+		rc = -EINVAL;
+		dev_err(&cdev->dev, "%s %d skipping shutdown in state: %s\n",
+			__func__, __LINE__, cosm_state_string[cdev->state]);
+		goto err;
+	}
+
+	if (!cdev->epd) {
+		rc = -ENOTCONN;
+		dev_err(&cdev->dev, "%s %d scif endpoint not connected rc %d\n",
+			__func__, __LINE__, rc);
+		goto err;
+	}
+
+	rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+	if (rc < 0) {
+		dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n",
+			__func__, __LINE__, rc);
+		goto err;
+	}
+	cdev->heartbeat_watchdog_enable = false;
+	cosm_set_state(cdev, MIC_SHUTTING_DOWN);
+	rc = 0;
+err:
+	mutex_unlock(&cdev->cosm_mutex);
+	return rc;
+}
+
+static int cosm_driver_probe(struct cosm_device *cdev)
+{
+	int rc;
+
+	/* Initialize SCIF server at first probe */
+	if (atomic_add_return(1, &g_num_dev) == 1) {
+		rc = cosm_scif_init();
+		if (rc)
+			goto scif_exit;
+	}
+	mutex_init(&cdev->cosm_mutex);
+	INIT_WORK(&cdev->reset_trigger_work, cosm_reset_trigger_work);
+	INIT_WORK(&cdev->scif_work, cosm_scif_work);
+	cdev->sysfs_heartbeat_enable = true;
+	cosm_sysfs_init(cdev);
+	cdev->sdev = device_create_with_groups(g_cosm_class, cdev->dev.parent,
+			       MKDEV(0, cdev->index), cdev, cdev->attr_group,
+			       "mic%d", cdev->index);
+	if (IS_ERR(cdev->sdev)) {
+		rc = PTR_ERR(cdev->sdev);
+		dev_err(&cdev->dev, "device_create_with_groups failed rc %d\n",
+			rc);
+		goto scif_exit;
+	}
+
+	cdev->state_sysfs = sysfs_get_dirent(cdev->sdev->kobj.sd,
+		"state");
+	if (!cdev->state_sysfs) {
+		rc = -ENODEV;
+		dev_err(&cdev->dev, "sysfs_get_dirent failed rc %d\n", rc);
+		goto destroy_device;
+	}
+	cosm_create_debug_dir(cdev);
+	return 0;
+destroy_device:
+	device_destroy(g_cosm_class, MKDEV(0, cdev->index));
+scif_exit:
+	if (atomic_dec_and_test(&g_num_dev))
+		cosm_scif_exit();
+	return rc;
+}
+
+static void cosm_driver_remove(struct cosm_device *cdev)
+{
+	cosm_delete_debug_dir(cdev);
+	sysfs_put(cdev->state_sysfs);
+	device_destroy(g_cosm_class, MKDEV(0, cdev->index));
+	flush_work(&cdev->reset_trigger_work);
+	cosm_stop(cdev, false);
+	if (atomic_dec_and_test(&g_num_dev))
+		cosm_scif_exit();
+
+	/* These sysfs entries might have allocated */
+	kfree(cdev->cmdline);
+	kfree(cdev->firmware);
+	kfree(cdev->ramdisk);
+	kfree(cdev->bootmode);
+}
+
+static int cosm_suspend(struct device *dev)
+{
+	struct cosm_device *cdev = dev_to_cosm(dev);
+
+	mutex_lock(&cdev->cosm_mutex);
+	switch (cdev->state) {
+	/**
+	 * Suspend/freeze hooks in userspace have already shutdown the card.
+	 * Card should be 'ready' in most cases. It is however possible that
+	 * some userspace application initiated a boot. In those cases, we
+	 * simply reset the card.
+	 */
+	case MIC_ONLINE:
+	case MIC_BOOTING:
+	case MIC_SHUTTING_DOWN:
+		mutex_unlock(&cdev->cosm_mutex);
+		cosm_stop(cdev, false);
+		break;
+	default:
+		mutex_unlock(&cdev->cosm_mutex);
+		break;
+	}
+	return 0;
+}
+
+static const struct dev_pm_ops cosm_pm_ops = {
+	.suspend = cosm_suspend,
+	.freeze = cosm_suspend
+};
+
+static struct cosm_driver cosm_driver = {
+	.driver = {
+		.name =  KBUILD_MODNAME,
+		.owner = THIS_MODULE,
+		.pm = &cosm_pm_ops,
+	},
+	.probe = cosm_driver_probe,
+	.remove = cosm_driver_remove
+};
+
+static int __init cosm_init(void)
+{
+	int ret;
+
+	cosm_init_debugfs();
+
+	g_cosm_class = class_create(THIS_MODULE, cosm_driver_name);
+	if (IS_ERR(g_cosm_class)) {
+		ret = PTR_ERR(g_cosm_class);
+		pr_err("class_create failed ret %d\n", ret);
+		goto cleanup_debugfs;
+	}
+
+	ida_init(&g_cosm_ida);
+	ret = cosm_register_driver(&cosm_driver);
+	if (ret) {
+		pr_err("cosm_register_driver failed ret %d\n", ret);
+		goto ida_destroy;
+	}
+	return 0;
+ida_destroy:
+	ida_destroy(&g_cosm_ida);
+	class_destroy(g_cosm_class);
+cleanup_debugfs:
+	cosm_exit_debugfs();
+	return ret;
+}
+
+static void __exit cosm_exit(void)
+{
+	cosm_unregister_driver(&cosm_driver);
+	ida_destroy(&g_cosm_ida);
+	class_destroy(g_cosm_class);
+	cosm_exit_debugfs();
+}
+
+module_init(cosm_init);
+module_exit(cosm_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC Coprocessor State Management (COSM) Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/kernel/drivers/misc/mic/cosm/cosm_main.h b/kernel/drivers/misc/mic/cosm/cosm_main.h
new file mode 100644
index 000000000..f01156fca
--- /dev/null
+++ b/kernel/drivers/misc/mic/cosm/cosm_main.h
@@ -0,0 +1,70 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+#ifndef _COSM_COSM_H_
+#define _COSM_COSM_H_
+
+#include <linux/scif.h>
+#include "../bus/cosm_bus.h"
+
+#define COSM_HEARTBEAT_SEND_SEC 30
+#define SCIF_COSM_LISTEN_PORT  201
+
+/**
+ * enum COSM msg id's
+ * @COSM_MSG_SHUTDOWN: host->card trigger shutdown
+ * @COSM_MSG_SYNC_TIME: host->card send host time to card to sync time
+ * @COSM_MSG_HEARTBEAT: card->host heartbeat
+ * @COSM_MSG_SHUTDOWN_STATUS: card->host with shutdown status as payload
+ */
+enum cosm_msg_id {
+	COSM_MSG_SHUTDOWN,
+	COSM_MSG_SYNC_TIME,
+	COSM_MSG_HEARTBEAT,
+	COSM_MSG_SHUTDOWN_STATUS,
+};
+
+struct cosm_msg {
+	u64 id;
+	union {
+		u64 shutdown_status;
+		struct timespec64 timespec;
+	};
+};
+
+extern const char * const cosm_state_string[];
+extern const char * const cosm_shutdown_status_string[];
+
+void cosm_sysfs_init(struct cosm_device *cdev);
+int cosm_start(struct cosm_device *cdev);
+void cosm_stop(struct cosm_device *cdev, bool force);
+int cosm_reset(struct cosm_device *cdev);
+int cosm_shutdown(struct cosm_device *cdev);
+void cosm_set_state(struct cosm_device *cdev, u8 state);
+void cosm_set_shutdown_status(struct cosm_device *cdev, u8 status);
+void cosm_init_debugfs(void);
+void cosm_exit_debugfs(void);
+void cosm_create_debug_dir(struct cosm_device *cdev);
+void cosm_delete_debug_dir(struct cosm_device *cdev);
+int cosm_scif_init(void);
+void cosm_scif_exit(void);
+void cosm_scif_work(struct work_struct *work);
+
+#endif
diff --git a/kernel/drivers/misc/mic/cosm/cosm_scif_server.c b/kernel/drivers/misc/mic/cosm/cosm_scif_server.c
new file mode 100644
index 000000000..5696df432
--- /dev/null
+++ b/kernel/drivers/misc/mic/cosm/cosm_scif_server.c
@@ -0,0 +1,405 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+#include <linux/kthread.h>
+#include "cosm_main.h"
+
+/*
+ * The COSM driver uses SCIF to communicate between the management node and the
+ * MIC cards. SCIF is used to (a) Send a shutdown command to the card (b)
+ * receive a shutdown status back from the card upon completion of shutdown and
+ * (c) receive periodic heartbeat messages from the card used to deduce if the
+ * card has crashed.
+ *
+ * A COSM server consisting of a SCIF listening endpoint waits for incoming
+ * connections from the card. Upon acceptance of the connection, a separate
+ * work-item is scheduled to handle SCIF message processing for that card. The
+ * life-time of this work-item is therefore the time from which the connection
+ * from a card is accepted to the time at which the connection is closed. A new
+ * work-item starts each time the card boots and is alive till the card (a)
+ * shuts down (b) is reset (c) crashes (d) cosm_client driver on the card is
+ * unloaded.
+ *
+ * From the point of view of COSM interactions with SCIF during card
+ * shutdown, reset and crash are as follows:
+ *
+ * Card shutdown
+ * -------------
+ * 1. COSM client on the card invokes orderly_poweroff() in response to SHUTDOWN
+ *    message from the host.
+ * 2. Card driver shutdown callback invokes scif_unregister_device(..) resulting
+ *    in scif_remove(..) getting called on the card
+ * 3. scif_remove -> scif_stop -> scif_handle_remove_node ->
+ *    scif_peer_unregister_device -> device_unregister for the host peer device
+ * 4. During device_unregister remove(..) method of cosm_client is invoked which
+ *    closes the COSM SCIF endpoint on the card. This results in a SCIF_DISCNCT
+ *    message being sent to host SCIF. SCIF_DISCNCT message processing on the
+ *    host SCIF sets the host COSM SCIF endpoint state to DISCONNECTED and wakes
+ *    up the host COSM thread blocked in scif_poll(..) resulting in
+ *    scif_poll(..)  returning POLLHUP.
+ * 5. On the card, scif_peer_release_dev is next called which results in an
+ *    SCIF_EXIT message being sent to the host and after receiving the
+ *    SCIF_EXIT_ACK from the host the peer device teardown on the card is
+ *    complete.
+ * 6. As part of the SCIF_EXIT message processing on the host, host sends a
+ *    SCIF_REMOVE_NODE to itself corresponding to the card being removed. This
+ *    starts a similar SCIF peer device teardown sequence on the host
+ *    corresponding to the card being shut down.
+ *
+ * Card reset
+ * ----------
+ * The case of interest here is when the card has not been previously shut down
+ * since most of the steps below are skipped in that case:
+
+ * 1. cosm_stop(..) invokes hw_ops->stop(..) method of the base PCIe driver
+ *    which unregisters the SCIF HW device resulting in scif_remove(..) being
+ *    called on the host.
+ * 2. scif_remove(..) calls scif_disconnect_node(..) which results in a
+ *    SCIF_EXIT message being sent to the card.
+ * 3. The card executes scif_stop() as part of SCIF_EXIT message
+ *    processing. This results in the COSM endpoint on the card being closed and
+ *    the SCIF host peer device on the card getting unregistered similar to
+ *    steps 3, 4 and 5 for the card shutdown case above. scif_poll(..) on the
+ *    host returns POLLHUP as a result.
+ * 4. On the host, card peer device unregister and SCIF HW remove(..) also
+ *    subsequently complete.
+ *
+ * Card crash
+ * ----------
+ * If a reset is issued after the card has crashed, there is no SCIF_DISCNT
+ * message from the card which would result in scif_poll(..) returning
+ * POLLHUP. In this case when the host SCIF driver sends a SCIF_REMOVE_NODE
+ * message to itself resulting in the card SCIF peer device being unregistered,
+ * this results in a scif_peer_release_dev -> scif_cleanup_scifdev->
+ * scif_invalidate_ep call sequence which sets the endpoint state to
+ * DISCONNECTED and results in scif_poll(..) returning POLLHUP.
+ */
+
+#define COSM_SCIF_BACKLOG 16
+#define COSM_HEARTBEAT_CHECK_DELTA_SEC 10
+#define COSM_HEARTBEAT_TIMEOUT_SEC \
+		(COSM_HEARTBEAT_SEND_SEC + COSM_HEARTBEAT_CHECK_DELTA_SEC)
+#define COSM_HEARTBEAT_TIMEOUT_MSEC (COSM_HEARTBEAT_TIMEOUT_SEC * MSEC_PER_SEC)
+
+static struct task_struct *server_thread;
+static scif_epd_t listen_epd;
+
+/* Publish MIC card's shutdown status to user space MIC daemon */
+static void cosm_update_mic_status(struct cosm_device *cdev)
+{
+	if (cdev->shutdown_status_int != MIC_NOP) {
+		cosm_set_shutdown_status(cdev, cdev->shutdown_status_int);
+		cdev->shutdown_status_int = MIC_NOP;
+	}
+}
+
+/* Store MIC card's shutdown status internally when it is received */
+static void cosm_shutdown_status_int(struct cosm_device *cdev,
+				     enum mic_status shutdown_status)
+{
+	switch (shutdown_status) {
+	case MIC_HALTED:
+	case MIC_POWER_OFF:
+	case MIC_RESTART:
+	case MIC_CRASHED:
+		break;
+	default:
+		dev_err(&cdev->dev, "%s %d Unexpected shutdown_status %d\n",
+			__func__, __LINE__, shutdown_status);
+		return;
+	};
+	cdev->shutdown_status_int = shutdown_status;
+	cdev->heartbeat_watchdog_enable = false;
+
+	if (cdev->state != MIC_SHUTTING_DOWN)
+		cosm_set_state(cdev, MIC_SHUTTING_DOWN);
+}
+
+/* Non-blocking recv. Read and process all available messages */
+static void cosm_scif_recv(struct cosm_device *cdev)
+{
+	struct cosm_msg msg;
+	int rc;
+
+	while (1) {
+		rc = scif_recv(cdev->epd, &msg, sizeof(msg), 0);
+		if (!rc) {
+			break;
+		} else if (rc < 0) {
+			dev_dbg(&cdev->dev, "%s: %d rc %d\n",
+				__func__, __LINE__, rc);
+			break;
+		}
+		dev_dbg(&cdev->dev, "%s: %d rc %d id 0x%llx\n",
+			__func__, __LINE__, rc, msg.id);
+
+		switch (msg.id) {
+		case COSM_MSG_SHUTDOWN_STATUS:
+			cosm_shutdown_status_int(cdev, msg.shutdown_status);
+			break;
+		case COSM_MSG_HEARTBEAT:
+			/* Nothing to do, heartbeat only unblocks scif_poll */
+			break;
+		default:
+			dev_err(&cdev->dev, "%s: %d unknown msg.id %lld\n",
+				__func__, __LINE__, msg.id);
+			break;
+		}
+	}
+}
+
+/* Publish crashed status for this MIC card */
+static void cosm_set_crashed(struct cosm_device *cdev)
+{
+	dev_err(&cdev->dev, "node alive timeout\n");
+	cosm_shutdown_status_int(cdev, MIC_CRASHED);
+	cosm_update_mic_status(cdev);
+}
+
+/* Send host time to the MIC card to sync system time between host and MIC */
+static void cosm_send_time(struct cosm_device *cdev)
+{
+	struct cosm_msg msg = { .id = COSM_MSG_SYNC_TIME };
+	int rc;
+
+	getnstimeofday64(&msg.timespec);
+	rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+	if (rc < 0)
+		dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n",
+			__func__, __LINE__, rc);
+}
+
+/*
+ * Close this cosm_device's endpoint after its peer endpoint on the card has
+ * been closed. In all cases except MIC card crash POLLHUP on the host is
+ * triggered by the client's endpoint being closed.
+ */
+static void cosm_scif_close(struct cosm_device *cdev)
+{
+	/*
+	 * Because SHUTDOWN_STATUS message is sent by the MIC cards in the
+	 * reboot notifier when shutdown is still not complete, we notify mpssd
+	 * to reset the card when SCIF endpoint is closed.
+	 */
+	cosm_update_mic_status(cdev);
+	scif_close(cdev->epd);
+	cdev->epd = NULL;
+	dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__);
+}
+
+/*
+ * Set card state to ONLINE when a new SCIF connection from a MIC card is
+ * received. Normally the state is BOOTING when the connection comes in, but can
+ * be ONLINE if cosm_client driver on the card was unloaded and then reloaded.
+ */
+static int cosm_set_online(struct cosm_device *cdev)
+{
+	int rc = 0;
+
+	if (MIC_BOOTING == cdev->state || MIC_ONLINE == cdev->state) {
+		cdev->heartbeat_watchdog_enable = cdev->sysfs_heartbeat_enable;
+		cdev->epd = cdev->newepd;
+		if (cdev->state == MIC_BOOTING)
+			cosm_set_state(cdev, MIC_ONLINE);
+		cosm_send_time(cdev);
+		dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__);
+	} else {
+		dev_warn(&cdev->dev, "%s %d not going online in state: %s\n",
+			 __func__, __LINE__, cosm_state_string[cdev->state]);
+		rc = -EINVAL;
+	}
+	/* Drop reference acquired by bus_find_device in the server thread */
+	put_device(&cdev->dev);
+	return rc;
+}
+
+/*
+ * Work function for handling work for a SCIF connection from a particular MIC
+ * card. It first sets the card state to ONLINE and then calls scif_poll to
+ * block on activity such as incoming messages on the SCIF endpoint. When the
+ * endpoint is closed, the work function exits, completing its life cycle, from
+ * MIC card boot to card shutdown/reset/crash.
+ */
+void cosm_scif_work(struct work_struct *work)
+{
+	struct cosm_device *cdev = container_of(work, struct cosm_device,
+						scif_work);
+	struct scif_pollepd pollepd;
+	int rc;
+
+	mutex_lock(&cdev->cosm_mutex);
+	if (cosm_set_online(cdev))
+		goto exit;
+
+	while (1) {
+		pollepd.epd = cdev->epd;
+		pollepd.events = POLLIN;
+
+		/* Drop the mutex before blocking in scif_poll(..) */
+		mutex_unlock(&cdev->cosm_mutex);
+		/* poll(..) with timeout on our endpoint */
+		rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_TIMEOUT_MSEC);
+		mutex_lock(&cdev->cosm_mutex);
+		if (rc < 0) {
+			dev_err(&cdev->dev, "%s %d scif_poll rc %d\n",
+				__func__, __LINE__, rc);
+			continue;
+		}
+
+		/* There is a message from the card */
+		if (pollepd.revents & POLLIN)
+			cosm_scif_recv(cdev);
+
+		/* The peer endpoint is closed or this endpoint disconnected */
+		if (pollepd.revents & POLLHUP) {
+			cosm_scif_close(cdev);
+			break;
+		}
+
+		/* Did we timeout from poll? */
+		if (!rc && cdev->heartbeat_watchdog_enable)
+			cosm_set_crashed(cdev);
+	}
+exit:
+	dev_dbg(&cdev->dev, "%s %d exiting\n", __func__, __LINE__);
+	mutex_unlock(&cdev->cosm_mutex);
+}
+
+/*
+ * COSM SCIF server thread function. Accepts incoming SCIF connections from MIC
+ * cards, finds the correct cosm_device to associate that connection with and
+ * schedules individual work items for each MIC card.
+ */
+static int cosm_scif_server(void *unused)
+{
+	struct cosm_device *cdev;
+	scif_epd_t newepd;
+	struct scif_port_id port_id;
+	int rc;
+
+	allow_signal(SIGKILL);
+
+	while (!kthread_should_stop()) {
+		rc = scif_accept(listen_epd, &port_id, &newepd,
+				 SCIF_ACCEPT_SYNC);
+		if (rc < 0) {
+			if (-ERESTARTSYS != rc)
+				pr_err("%s %d rc %d\n", __func__, __LINE__, rc);
+			continue;
+		}
+
+		/*
+		 * Associate the incoming connection with a particular
+		 * cosm_device, COSM device ID == SCIF node ID - 1
+		 */
+		cdev = cosm_find_cdev_by_id(port_id.node - 1);
+		if (!cdev)
+			continue;
+		cdev->newepd = newepd;
+		schedule_work(&cdev->scif_work);
+	}
+
+	pr_debug("%s %d Server thread stopped\n", __func__, __LINE__);
+	return 0;
+}
+
+static int cosm_scif_listen(void)
+{
+	int rc;
+
+	listen_epd = scif_open();
+	if (!listen_epd) {
+		pr_err("%s %d scif_open failed\n", __func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	rc = scif_bind(listen_epd, SCIF_COSM_LISTEN_PORT);
+	if (rc < 0) {
+		pr_err("%s %d scif_bind failed rc %d\n",
+		       __func__, __LINE__, rc);
+		goto err;
+	}
+
+	rc = scif_listen(listen_epd, COSM_SCIF_BACKLOG);
+	if (rc < 0) {
+		pr_err("%s %d scif_listen rc %d\n", __func__, __LINE__, rc);
+		goto err;
+	}
+	pr_debug("%s %d listen_epd set up\n", __func__, __LINE__);
+	return 0;
+err:
+	scif_close(listen_epd);
+	listen_epd = NULL;
+	return rc;
+}
+
+static void cosm_scif_listen_exit(void)
+{
+	pr_debug("%s %d closing listen_epd\n", __func__, __LINE__);
+	if (listen_epd) {
+		scif_close(listen_epd);
+		listen_epd = NULL;
+	}
+}
+
+/*
+ * Create a listening SCIF endpoint and a server kthread which accepts incoming
+ * SCIF connections from MIC cards
+ */
+int cosm_scif_init(void)
+{
+	int rc = cosm_scif_listen();
+
+	if (rc) {
+		pr_err("%s %d cosm_scif_listen rc %d\n",
+		       __func__, __LINE__, rc);
+		goto err;
+	}
+
+	server_thread = kthread_run(cosm_scif_server, NULL, "cosm_server");
+	if (IS_ERR(server_thread)) {
+		rc = PTR_ERR(server_thread);
+		pr_err("%s %d kthread_run rc %d\n", __func__, __LINE__, rc);
+		goto listen_exit;
+	}
+	return 0;
+listen_exit:
+	cosm_scif_listen_exit();
+err:
+	return rc;
+}
+
+/* Stop the running server thread and close the listening SCIF endpoint */
+void cosm_scif_exit(void)
+{
+	int rc;
+
+	if (!IS_ERR_OR_NULL(server_thread)) {
+		rc = send_sig(SIGKILL, server_thread, 0);
+		if (rc) {
+			pr_err("%s %d send_sig rc %d\n",
+			       __func__, __LINE__, rc);
+			return;
+		}
+		kthread_stop(server_thread);
+	}
+
+	cosm_scif_listen_exit();
+}
diff --git a/kernel/drivers/misc/mic/cosm/cosm_sysfs.c b/kernel/drivers/misc/mic/cosm/cosm_sysfs.c
new file mode 100644
index 000000000..29d6863b6
--- /dev/null
+++ b/kernel/drivers/misc/mic/cosm/cosm_sysfs.c
@@ -0,0 +1,461 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC Coprocessor State Management (COSM) Driver
+ *
+ */
+#include <linux/slab.h>
+#include "cosm_main.h"
+
+/*
+ * A state-to-string lookup table, for exposing a human readable state
+ * via sysfs. Always keep in sync with enum cosm_states
+ */
+const char * const cosm_state_string[] = {
+	[MIC_READY] = "ready",
+	[MIC_BOOTING] = "booting",
+	[MIC_ONLINE] = "online",
+	[MIC_SHUTTING_DOWN] = "shutting_down",
+	[MIC_RESETTING] = "resetting",
+	[MIC_RESET_FAILED] = "reset_failed",
+};
+
+/*
+ * A shutdown-status-to-string lookup table, for exposing a human
+ * readable state via sysfs. Always keep in sync with enum cosm_shutdown_status
+ */
+const char * const cosm_shutdown_status_string[] = {
+	[MIC_NOP] = "nop",
+	[MIC_CRASHED] = "crashed",
+	[MIC_HALTED] = "halted",
+	[MIC_POWER_OFF] = "poweroff",
+	[MIC_RESTART] = "restart",
+};
+
+void cosm_set_shutdown_status(struct cosm_device *cdev, u8 shutdown_status)
+{
+	dev_dbg(&cdev->dev, "Shutdown Status %s -> %s\n",
+		cosm_shutdown_status_string[cdev->shutdown_status],
+		cosm_shutdown_status_string[shutdown_status]);
+	cdev->shutdown_status = shutdown_status;
+}
+
+void cosm_set_state(struct cosm_device *cdev, u8 state)
+{
+	dev_dbg(&cdev->dev, "State %s -> %s\n",
+		cosm_state_string[cdev->state],
+		cosm_state_string[state]);
+	cdev->state = state;
+	sysfs_notify_dirent(cdev->state_sysfs);
+}
+
+static ssize_t
+family_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	return cdev->hw_ops->family(cdev, buf);
+}
+static DEVICE_ATTR_RO(family);
+
+static ssize_t
+stepping_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	return cdev->hw_ops->stepping(cdev, buf);
+}
+static DEVICE_ATTR_RO(stepping);
+
+static ssize_t
+state_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev || cdev->state >= MIC_LAST)
+		return -EINVAL;
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n",
+		cosm_state_string[cdev->state]);
+}
+
+static ssize_t
+state_store(struct device *dev, struct device_attribute *attr,
+	    const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	int rc;
+
+	if (!cdev)
+		return -EINVAL;
+
+	if (sysfs_streq(buf, "boot")) {
+		rc = cosm_start(cdev);
+		goto done;
+	}
+	if (sysfs_streq(buf, "reset")) {
+		rc = cosm_reset(cdev);
+		goto done;
+	}
+
+	if (sysfs_streq(buf, "shutdown")) {
+		rc = cosm_shutdown(cdev);
+		goto done;
+	}
+	rc = -EINVAL;
+done:
+	if (rc)
+		count = rc;
+	return count;
+}
+static DEVICE_ATTR_RW(state);
+
+static ssize_t shutdown_status_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev || cdev->shutdown_status >= MIC_STATUS_LAST)
+		return -EINVAL;
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n",
+		cosm_shutdown_status_string[cdev->shutdown_status]);
+}
+static DEVICE_ATTR_RO(shutdown_status);
+
+static ssize_t
+heartbeat_enable_show(struct device *dev,
+		      struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", cdev->sysfs_heartbeat_enable);
+}
+
+static ssize_t
+heartbeat_enable_store(struct device *dev,
+		       struct device_attribute *attr,
+		       const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	int enable;
+	int ret;
+
+	if (!cdev)
+		return -EINVAL;
+
+	mutex_lock(&cdev->cosm_mutex);
+	ret = kstrtoint(buf, 10, &enable);
+	if (ret)
+		goto unlock;
+
+	cdev->sysfs_heartbeat_enable = enable;
+	/* if state is not online, cdev->heartbeat_watchdog_enable is 0 */
+	if (cdev->state == MIC_ONLINE)
+		cdev->heartbeat_watchdog_enable = enable;
+	ret = count;
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+	return ret;
+}
+static DEVICE_ATTR_RW(heartbeat_enable);
+
+static ssize_t
+cmdline_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	char *cmdline;
+
+	if (!cdev)
+		return -EINVAL;
+
+	cmdline = cdev->cmdline;
+
+	if (cmdline)
+		return scnprintf(buf, PAGE_SIZE, "%s\n", cmdline);
+	return 0;
+}
+
+static ssize_t
+cmdline_store(struct device *dev, struct device_attribute *attr,
+	      const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	mutex_lock(&cdev->cosm_mutex);
+	kfree(cdev->cmdline);
+
+	cdev->cmdline = kmalloc(count + 1, GFP_KERNEL);
+	if (!cdev->cmdline) {
+		count = -ENOMEM;
+		goto unlock;
+	}
+
+	strncpy(cdev->cmdline, buf, count);
+
+	if (cdev->cmdline[count - 1] == '\n')
+		cdev->cmdline[count - 1] = '\0';
+	else
+		cdev->cmdline[count] = '\0';
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+	return count;
+}
+static DEVICE_ATTR_RW(cmdline);
+
+static ssize_t
+firmware_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	char *firmware;
+
+	if (!cdev)
+		return -EINVAL;
+
+	firmware = cdev->firmware;
+
+	if (firmware)
+		return scnprintf(buf, PAGE_SIZE, "%s\n", firmware);
+	return 0;
+}
+
+static ssize_t
+firmware_store(struct device *dev, struct device_attribute *attr,
+	       const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	mutex_lock(&cdev->cosm_mutex);
+	kfree(cdev->firmware);
+
+	cdev->firmware = kmalloc(count + 1, GFP_KERNEL);
+	if (!cdev->firmware) {
+		count = -ENOMEM;
+		goto unlock;
+	}
+	strncpy(cdev->firmware, buf, count);
+
+	if (cdev->firmware[count - 1] == '\n')
+		cdev->firmware[count - 1] = '\0';
+	else
+		cdev->firmware[count] = '\0';
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+	return count;
+}
+static DEVICE_ATTR_RW(firmware);
+
+static ssize_t
+ramdisk_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	char *ramdisk;
+
+	if (!cdev)
+		return -EINVAL;
+
+	ramdisk = cdev->ramdisk;
+
+	if (ramdisk)
+		return scnprintf(buf, PAGE_SIZE, "%s\n", ramdisk);
+	return 0;
+}
+
+static ssize_t
+ramdisk_store(struct device *dev, struct device_attribute *attr,
+	      const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	mutex_lock(&cdev->cosm_mutex);
+	kfree(cdev->ramdisk);
+
+	cdev->ramdisk = kmalloc(count + 1, GFP_KERNEL);
+	if (!cdev->ramdisk) {
+		count = -ENOMEM;
+		goto unlock;
+	}
+
+	strncpy(cdev->ramdisk, buf, count);
+
+	if (cdev->ramdisk[count - 1] == '\n')
+		cdev->ramdisk[count - 1] = '\0';
+	else
+		cdev->ramdisk[count] = '\0';
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+	return count;
+}
+static DEVICE_ATTR_RW(ramdisk);
+
+static ssize_t
+bootmode_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	char *bootmode;
+
+	if (!cdev)
+		return -EINVAL;
+
+	bootmode = cdev->bootmode;
+
+	if (bootmode)
+		return scnprintf(buf, PAGE_SIZE, "%s\n", bootmode);
+	return 0;
+}
+
+static ssize_t
+bootmode_store(struct device *dev, struct device_attribute *attr,
+	       const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	if (!sysfs_streq(buf, "linux") && !sysfs_streq(buf, "flash"))
+		return -EINVAL;
+
+	mutex_lock(&cdev->cosm_mutex);
+	kfree(cdev->bootmode);
+
+	cdev->bootmode = kmalloc(count + 1, GFP_KERNEL);
+	if (!cdev->bootmode) {
+		count = -ENOMEM;
+		goto unlock;
+	}
+
+	strncpy(cdev->bootmode, buf, count);
+
+	if (cdev->bootmode[count - 1] == '\n')
+		cdev->bootmode[count - 1] = '\0';
+	else
+		cdev->bootmode[count] = '\0';
+unlock:
+	mutex_unlock(&cdev->cosm_mutex);
+	return count;
+}
+static DEVICE_ATTR_RW(bootmode);
+
+static ssize_t
+log_buf_addr_show(struct device *dev, struct device_attribute *attr,
+		  char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_addr);
+}
+
+static ssize_t
+log_buf_addr_store(struct device *dev, struct device_attribute *attr,
+		   const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	int ret;
+	unsigned long addr;
+
+	if (!cdev)
+		return -EINVAL;
+
+	ret = kstrtoul(buf, 16, &addr);
+	if (ret)
+		goto exit;
+
+	cdev->log_buf_addr = (void *)addr;
+	ret = count;
+exit:
+	return ret;
+}
+static DEVICE_ATTR_RW(log_buf_addr);
+
+static ssize_t
+log_buf_len_show(struct device *dev, struct device_attribute *attr,
+		 char *buf)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+
+	if (!cdev)
+		return -EINVAL;
+
+	return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_len);
+}
+
+static ssize_t
+log_buf_len_store(struct device *dev, struct device_attribute *attr,
+		  const char *buf, size_t count)
+{
+	struct cosm_device *cdev = dev_get_drvdata(dev);
+	int ret;
+	unsigned long addr;
+
+	if (!cdev)
+		return -EINVAL;
+
+	ret = kstrtoul(buf, 16, &addr);
+	if (ret)
+		goto exit;
+
+	cdev->log_buf_len = (int *)addr;
+	ret = count;
+exit:
+	return ret;
+}
+static DEVICE_ATTR_RW(log_buf_len);
+
+static struct attribute *cosm_default_attrs[] = {
+	&dev_attr_family.attr,
+	&dev_attr_stepping.attr,
+	&dev_attr_state.attr,
+	&dev_attr_shutdown_status.attr,
+	&dev_attr_heartbeat_enable.attr,
+	&dev_attr_cmdline.attr,
+	&dev_attr_firmware.attr,
+	&dev_attr_ramdisk.attr,
+	&dev_attr_bootmode.attr,
+	&dev_attr_log_buf_addr.attr,
+	&dev_attr_log_buf_len.attr,
+
+	NULL
+};
+
+ATTRIBUTE_GROUPS(cosm_default);
+
+void cosm_sysfs_init(struct cosm_device *cdev)
+{
+	cdev->attr_group = cosm_default_groups;
+}
diff --git a/kernel/drivers/misc/mic/cosm_client/Makefile b/kernel/drivers/misc/mic/cosm_client/Makefile
new file mode 100644
index 000000000..6f751a519
--- /dev/null
+++ b/kernel/drivers/misc/mic/cosm_client/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile - Intel MIC COSM Client Driver
+# Copyright(c) 2015, Intel Corporation.
+#
+obj-$(CONFIG_MIC_COSM) += cosm_client.o
+
+cosm_client-objs += cosm_scif_client.o
diff --git a/kernel/drivers/misc/mic/cosm_client/cosm_scif_client.c b/kernel/drivers/misc/mic/cosm_client/cosm_scif_client.c
new file mode 100644
index 000000000..03e98bf1a
--- /dev/null
+++ b/kernel/drivers/misc/mic/cosm_client/cosm_scif_client.c
@@ -0,0 +1,275 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC COSM Client Driver
+ *
+ */
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/kthread.h>
+#include "../cosm/cosm_main.h"
+
+#define COSM_SCIF_MAX_RETRIES 10
+#define COSM_HEARTBEAT_SEND_MSEC (COSM_HEARTBEAT_SEND_SEC * MSEC_PER_SEC)
+
+static struct task_struct *client_thread;
+static scif_epd_t client_epd;
+static struct scif_peer_dev *client_spdev;
+
+/*
+ * Reboot notifier: receives shutdown status from the OS and communicates it
+ * back to the COSM process on the host
+ */
+static int cosm_reboot_event(struct notifier_block *this, unsigned long event,
+			     void *ptr)
+{
+	struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN_STATUS };
+	int rc;
+
+	event = (event == SYS_RESTART) ? SYSTEM_RESTART : event;
+	dev_info(&client_spdev->dev, "%s %d received event %ld\n",
+		 __func__, __LINE__, event);
+
+	msg.shutdown_status = event;
+	rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+	if (rc < 0)
+		dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n",
+			__func__, __LINE__, rc);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block cosm_reboot = {
+	.notifier_call  = cosm_reboot_event,
+};
+
+/* Set system time from timespec value received from the host */
+static void cosm_set_time(struct cosm_msg *msg)
+{
+	int rc = do_settimeofday64(&msg->timespec);
+
+	if (rc)
+		dev_err(&client_spdev->dev, "%s: %d settimeofday rc %d\n",
+			__func__, __LINE__, rc);
+}
+
+/* COSM client receive message processing */
+static void cosm_client_recv(void)
+{
+	struct cosm_msg msg;
+	int rc;
+
+	while (1) {
+		rc = scif_recv(client_epd, &msg, sizeof(msg), 0);
+		if (!rc) {
+			return;
+		} else if (rc < 0) {
+			dev_err(&client_spdev->dev, "%s: %d rc %d\n",
+				__func__, __LINE__, rc);
+			return;
+		}
+
+		dev_dbg(&client_spdev->dev, "%s: %d rc %d id 0x%llx\n",
+			__func__, __LINE__, rc, msg.id);
+
+		switch (msg.id) {
+		case COSM_MSG_SYNC_TIME:
+			cosm_set_time(&msg);
+			break;
+		case COSM_MSG_SHUTDOWN:
+			orderly_poweroff(true);
+			break;
+		default:
+			dev_err(&client_spdev->dev, "%s: %d unknown id %lld\n",
+				__func__, __LINE__, msg.id);
+			break;
+		}
+	}
+}
+
+/* Initiate connection to the COSM server on the host */
+static int cosm_scif_connect(void)
+{
+	struct scif_port_id port_id;
+	int i, rc;
+
+	client_epd = scif_open();
+	if (!client_epd) {
+		dev_err(&client_spdev->dev, "%s %d scif_open failed\n",
+			__func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	port_id.node = 0;
+	port_id.port = SCIF_COSM_LISTEN_PORT;
+
+	for (i = 0; i < COSM_SCIF_MAX_RETRIES; i++) {
+		rc = scif_connect(client_epd, &port_id);
+		if (rc < 0)
+			msleep(1000);
+		else
+			break;
+	}
+
+	if (rc < 0) {
+		dev_err(&client_spdev->dev, "%s %d scif_connect rc %d\n",
+			__func__, __LINE__, rc);
+		scif_close(client_epd);
+		client_epd = NULL;
+	}
+	return rc < 0 ? rc : 0;
+}
+
+/* Close host SCIF connection */
+static void cosm_scif_connect_exit(void)
+{
+	if (client_epd) {
+		scif_close(client_epd);
+		client_epd = NULL;
+	}
+}
+
+/*
+ * COSM SCIF client thread function: waits for messages from the host and sends
+ * a heartbeat to the host
+ */
+static int cosm_scif_client(void *unused)
+{
+	struct cosm_msg msg = { .id = COSM_MSG_HEARTBEAT };
+	struct scif_pollepd pollepd;
+	int rc;
+
+	allow_signal(SIGKILL);
+
+	while (!kthread_should_stop()) {
+		pollepd.epd = client_epd;
+		pollepd.events = POLLIN;
+
+		rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_SEND_MSEC);
+		if (rc < 0) {
+			if (-EINTR != rc)
+				dev_err(&client_spdev->dev,
+					"%s %d scif_poll rc %d\n",
+					__func__, __LINE__, rc);
+			continue;
+		}
+
+		if (pollepd.revents & POLLIN)
+			cosm_client_recv();
+
+		msg.id = COSM_MSG_HEARTBEAT;
+		rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
+		if (rc < 0)
+			dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n",
+				__func__, __LINE__, rc);
+	}
+
+	dev_dbg(&client_spdev->dev, "%s %d Client thread stopped\n",
+		__func__, __LINE__);
+	return 0;
+}
+
+static void cosm_scif_probe(struct scif_peer_dev *spdev)
+{
+	int rc;
+
+	dev_dbg(&spdev->dev, "%s %d: dnode %d\n",
+		__func__, __LINE__, spdev->dnode);
+
+	/* We are only interested in the host with spdev->dnode == 0 */
+	if (spdev->dnode)
+		return;
+
+	client_spdev = spdev;
+	rc = cosm_scif_connect();
+	if (rc)
+		goto exit;
+
+	rc = register_reboot_notifier(&cosm_reboot);
+	if (rc) {
+		dev_err(&spdev->dev,
+			"reboot notifier registration failed rc %d\n", rc);
+		goto connect_exit;
+	}
+
+	client_thread = kthread_run(cosm_scif_client, NULL, "cosm_client");
+	if (IS_ERR(client_thread)) {
+		rc = PTR_ERR(client_thread);
+		dev_err(&spdev->dev, "%s %d kthread_run rc %d\n",
+			__func__, __LINE__, rc);
+		goto unreg_reboot;
+	}
+	return;
+unreg_reboot:
+	unregister_reboot_notifier(&cosm_reboot);
+connect_exit:
+	cosm_scif_connect_exit();
+exit:
+	client_spdev = NULL;
+}
+
+static void cosm_scif_remove(struct scif_peer_dev *spdev)
+{
+	int rc;
+
+	dev_dbg(&spdev->dev, "%s %d: dnode %d\n",
+		__func__, __LINE__, spdev->dnode);
+
+	if (spdev->dnode)
+		return;
+
+	if (!IS_ERR_OR_NULL(client_thread)) {
+		rc = send_sig(SIGKILL, client_thread, 0);
+		if (rc) {
+			pr_err("%s %d send_sig rc %d\n",
+			       __func__, __LINE__, rc);
+			return;
+		}
+		kthread_stop(client_thread);
+	}
+	unregister_reboot_notifier(&cosm_reboot);
+	cosm_scif_connect_exit();
+	client_spdev = NULL;
+}
+
+static struct scif_client scif_client_cosm = {
+	.name = KBUILD_MODNAME,
+	.probe = cosm_scif_probe,
+	.remove = cosm_scif_remove,
+};
+
+static int __init cosm_client_init(void)
+{
+	int rc = scif_client_register(&scif_client_cosm);
+
+	if (rc)
+		pr_err("scif_client_register failed rc %d\n", rc);
+	return rc;
+}
+
+static void __exit cosm_client_exit(void)
+{
+	scif_client_unregister(&scif_client_cosm);
+}
+
+module_init(cosm_client_init);
+module_exit(cosm_client_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC card OS state management client driver");
+MODULE_LICENSE("GPL v2");
diff --git a/kernel/drivers/misc/mic/host/Makefile b/kernel/drivers/misc/mic/host/Makefile
index c2197f999..004d3db0f 100644
--- a/kernel/drivers/misc/mic/host/Makefile
+++ b/kernel/drivers/misc/mic/host/Makefile
@@ -5,7 +5,6 @@
 obj-$(CONFIG_INTEL_MIC_HOST) += mic_host.o
 mic_host-objs := mic_main.o
 mic_host-objs += mic_x100.o
-mic_host-objs += mic_sysfs.o
 mic_host-objs += mic_smpt.o
 mic_host-objs += mic_intr.o
 mic_host-objs += mic_boot.o
diff --git a/kernel/drivers/misc/mic/host/mic_boot.c b/kernel/drivers/misc/mic/host/mic_boot.c
index d9fa609da..7845564df 100644
--- a/kernel/drivers/misc/mic/host/mic_boot.c
+++ b/kernel/drivers/misc/mic/host/mic_boot.c
@@ -21,14 +21,197 @@
 #include <linux/delay.h>
 #include <linux/firmware.h>
 #include <linux/pci.h>
-
+#include <linux/kmod.h>
 #include <linux/mic_common.h>
 #include <linux/mic_bus.h>
+#include "../bus/scif_bus.h"
 #include "../common/mic_dev.h"
 #include "mic_device.h"
 #include "mic_smpt.h"
 #include "mic_virtio.h"
 
+static inline struct mic_device *scdev_to_mdev(struct scif_hw_dev *scdev)
+{
+	return dev_get_drvdata(scdev->dev.parent);
+}
+
+static void *__mic_dma_alloc(struct device *dev, size_t size,
+			     dma_addr_t *dma_handle, gfp_t gfp,
+			     struct dma_attrs *attrs)
+{
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+	dma_addr_t tmp;
+	void *va = kmalloc(size, gfp);
+
+	if (va) {
+		tmp = mic_map_single(mdev, va, size);
+		if (dma_mapping_error(dev, tmp)) {
+			kfree(va);
+			va = NULL;
+		} else {
+			*dma_handle = tmp;
+		}
+	}
+	return va;
+}
+
+static void __mic_dma_free(struct device *dev, size_t size, void *vaddr,
+			   dma_addr_t dma_handle, struct dma_attrs *attrs)
+{
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	mic_unmap_single(mdev, dma_handle, size);
+	kfree(vaddr);
+}
+
+static dma_addr_t
+__mic_dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+		   size_t size, enum dma_data_direction dir,
+		   struct dma_attrs *attrs)
+{
+	void *va = phys_to_virt(page_to_phys(page)) + offset;
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	return mic_map_single(mdev, va, size);
+}
+
+static void
+__mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
+		     size_t size, enum dma_data_direction dir,
+		     struct dma_attrs *attrs)
+{
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	mic_unmap_single(mdev, dma_addr, size);
+}
+
+static int __mic_dma_map_sg(struct device *dev, struct scatterlist *sg,
+			    int nents, enum dma_data_direction dir,
+			    struct dma_attrs *attrs)
+{
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+	struct scatterlist *s;
+	int i, j, ret;
+	dma_addr_t da;
+
+	ret = dma_map_sg(&mdev->pdev->dev, sg, nents, dir);
+	if (ret <= 0)
+		return 0;
+
+	for_each_sg(sg, s, nents, i) {
+		da = mic_map(mdev, sg_dma_address(s) + s->offset, s->length);
+		if (!da)
+			goto err;
+		sg_dma_address(s) = da;
+	}
+	return nents;
+err:
+	for_each_sg(sg, s, i, j) {
+		mic_unmap(mdev, sg_dma_address(s), s->length);
+		sg_dma_address(s) = mic_to_dma_addr(mdev, sg_dma_address(s));
+	}
+	dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir);
+	return 0;
+}
+
+static void __mic_dma_unmap_sg(struct device *dev,
+			       struct scatterlist *sg, int nents,
+			       enum dma_data_direction dir,
+			       struct dma_attrs *attrs)
+{
+	struct scif_hw_dev *scdev = dev_get_drvdata(dev);
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+	struct scatterlist *s;
+	dma_addr_t da;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		da = mic_to_dma_addr(mdev, sg_dma_address(s));
+		mic_unmap(mdev, sg_dma_address(s), s->length);
+		sg_dma_address(s) = da;
+	}
+	dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir);
+}
+
+static struct dma_map_ops __mic_dma_ops = {
+	.alloc = __mic_dma_alloc,
+	.free = __mic_dma_free,
+	.map_page = __mic_dma_map_page,
+	.unmap_page = __mic_dma_unmap_page,
+	.map_sg = __mic_dma_map_sg,
+	.unmap_sg = __mic_dma_unmap_sg,
+};
+
+static struct mic_irq *
+___mic_request_irq(struct scif_hw_dev *scdev,
+		   irqreturn_t (*func)(int irq, void *data),
+				       const char *name,
+				       void *data, int db)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	return mic_request_threaded_irq(mdev, func, NULL, name, data,
+					db, MIC_INTR_DB);
+}
+
+static void
+___mic_free_irq(struct scif_hw_dev *scdev,
+		struct mic_irq *cookie, void *data)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	return mic_free_irq(mdev, cookie, data);
+}
+
+static void ___mic_ack_interrupt(struct scif_hw_dev *scdev, int num)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	mdev->ops->intr_workarounds(mdev);
+}
+
+static int ___mic_next_db(struct scif_hw_dev *scdev)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	return mic_next_db(mdev);
+}
+
+static void ___mic_send_intr(struct scif_hw_dev *scdev, int db)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	mdev->ops->send_intr(mdev, db);
+}
+
+static void __iomem *___mic_ioremap(struct scif_hw_dev *scdev,
+				    phys_addr_t pa, size_t len)
+{
+	struct mic_device *mdev = scdev_to_mdev(scdev);
+
+	return mdev->aper.va + pa;
+}
+
+static void ___mic_iounmap(struct scif_hw_dev *scdev, void __iomem *va)
+{
+	/* nothing to do */
+}
+
+static struct scif_hw_ops scif_hw_ops = {
+	.request_irq = ___mic_request_irq,
+	.free_irq = ___mic_free_irq,
+	.ack_interrupt = ___mic_ack_interrupt,
+	.next_db = ___mic_next_db,
+	.send_intr = ___mic_send_intr,
+	.ioremap = ___mic_ioremap,
+	.iounmap = ___mic_iounmap,
+};
+
 static inline struct mic_device *mbdev_to_mdev(struct mbus_device *mbdev)
 {
 	return dev_get_drvdata(mbdev->dev.parent);
@@ -87,295 +270,213 @@ static struct mbus_hw_ops mbus_hw_ops = {
 	.ack_interrupt = _mic_ack_interrupt,
 };
 
-/**
- * mic_reset - Reset the MIC device.
- * @mdev: pointer to mic_device instance
- */
-static void mic_reset(struct mic_device *mdev)
+/* Initialize the MIC bootparams */
+void mic_bootparam_init(struct mic_device *mdev)
 {
-	int i;
+	struct mic_bootparam *bootparam = mdev->dp;
+
+	bootparam->magic = cpu_to_le32(MIC_MAGIC);
+	bootparam->h2c_config_db = -1;
+	bootparam->node_id = mdev->id + 1;
+	bootparam->scif_host_dma_addr = 0x0;
+	bootparam->scif_card_dma_addr = 0x0;
+	bootparam->c2h_scif_db = -1;
+	bootparam->h2c_scif_db = -1;
+}
+
+static inline struct mic_device *cosmdev_to_mdev(struct cosm_device *cdev)
+{
+	return dev_get_drvdata(cdev->dev.parent);
+}
 
-#define MIC_RESET_TO (45)
+static void _mic_reset(struct cosm_device *cdev)
+{
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
 
-	reinit_completion(&mdev->reset_wait);
 	mdev->ops->reset_fw_ready(mdev);
 	mdev->ops->reset(mdev);
-
-	for (i = 0; i < MIC_RESET_TO; i++) {
-		if (mdev->ops->is_fw_ready(mdev))
-			goto done;
-		/*
-		 * Resets typically take 10s of seconds to complete.
-		 * Since an MMIO read is required to check if the
-		 * firmware is ready or not, a 1 second delay works nicely.
-		 */
-		msleep(1000);
-	}
-	mic_set_state(mdev, MIC_RESET_FAILED);
-done:
-	complete_all(&mdev->reset_wait);
 }
 
-/* Initialize the MIC bootparams */
-void mic_bootparam_init(struct mic_device *mdev)
+static bool _mic_ready(struct cosm_device *cdev)
 {
-	struct mic_bootparam *bootparam = mdev->dp;
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
 
-	bootparam->magic = cpu_to_le32(MIC_MAGIC);
-	bootparam->c2h_shutdown_db = mdev->shutdown_db;
-	bootparam->h2c_shutdown_db = -1;
-	bootparam->h2c_config_db = -1;
-	bootparam->shutdown_status = 0;
-	bootparam->shutdown_card = 0;
+	return mdev->ops->is_fw_ready(mdev);
+}
+
+/**
+ * mic_request_dma_chans - Request DMA channels
+ * @mdev: pointer to mic_device instance
+ *
+ * returns number of DMA channels acquired
+ */
+static int mic_request_dma_chans(struct mic_device *mdev)
+{
+	dma_cap_mask_t mask;
+	struct dma_chan *chan;
+
+	request_module("mic_x100_dma");
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_MEMCPY, mask);
+
+	do {
+		chan = dma_request_channel(mask, mdev->ops->dma_filter,
+					   &mdev->pdev->dev);
+		if (chan) {
+			mdev->dma_ch[mdev->num_dma_ch++] = chan;
+			if (mdev->num_dma_ch >= MIC_MAX_DMA_CHAN)
+				break;
+		}
+	} while (chan);
+	dev_info(&mdev->pdev->dev, "DMA channels # %d\n", mdev->num_dma_ch);
+	return mdev->num_dma_ch;
 }
 
 /**
- * mic_start - Start the MIC.
+ * mic_free_dma_chans - release DMA channels
  * @mdev: pointer to mic_device instance
- * @buf: buffer containing boot string including firmware/ramdisk path.
+ *
+ * returns none
+ */
+static void mic_free_dma_chans(struct mic_device *mdev)
+{
+	int i = 0;
+
+	for (i = 0; i < mdev->num_dma_ch; i++) {
+		dma_release_channel(mdev->dma_ch[i]);
+		mdev->dma_ch[i] = NULL;
+	}
+	mdev->num_dma_ch = 0;
+}
+
+/**
+ * _mic_start - Start the MIC.
+ * @cdev: pointer to cosm_device instance
+ * @id: MIC device id/index provided by COSM used in other drivers like SCIF
  *
  * This function prepares an MIC for boot and initiates boot.
  * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ *
+ * For all cosm_hw_ops the caller holds a mutex to ensure serialization.
  */
-int mic_start(struct mic_device *mdev, const char *buf)
+static int _mic_start(struct cosm_device *cdev, int id)
 {
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
 	int rc;
-	mutex_lock(&mdev->mic_mutex);
-retry:
-	if (MIC_OFFLINE != mdev->state) {
-		rc = -EINVAL;
-		goto unlock_ret;
-	}
-	if (!mdev->ops->is_fw_ready(mdev)) {
-		mic_reset(mdev);
-		/*
-		 * The state will either be MIC_OFFLINE if the reset succeeded
-		 * or MIC_RESET_FAILED if the firmware reset failed.
-		 */
-		goto retry;
-	}
-	mdev->dma_mbdev = mbus_register_device(mdev->sdev->parent,
+
+	mic_bootparam_init(mdev);
+	mdev->dma_mbdev = mbus_register_device(&mdev->pdev->dev,
 					       MBUS_DEV_DMA_HOST, &mic_dma_ops,
-					       &mbus_hw_ops, mdev->mmio.va);
+					       &mbus_hw_ops, id, mdev->mmio.va);
 	if (IS_ERR(mdev->dma_mbdev)) {
 		rc = PTR_ERR(mdev->dma_mbdev);
 		goto unlock_ret;
 	}
-	mdev->dma_ch = mic_request_dma_chan(mdev);
-	if (!mdev->dma_ch) {
-		rc = -ENXIO;
+	if (!mic_request_dma_chans(mdev)) {
+		rc = -ENODEV;
 		goto dma_remove;
 	}
-	rc = mdev->ops->load_mic_fw(mdev, buf);
+	mdev->scdev = scif_register_device(&mdev->pdev->dev, MIC_SCIF_DEV,
+					   &__mic_dma_ops, &scif_hw_ops,
+					   id + 1, 0, &mdev->mmio,
+					   &mdev->aper, mdev->dp, NULL,
+					   mdev->dma_ch, mdev->num_dma_ch,
+					   true);
+	if (IS_ERR(mdev->scdev)) {
+		rc = PTR_ERR(mdev->scdev);
+		goto dma_free;
+	}
+
+	rc = mdev->ops->load_mic_fw(mdev, NULL);
 	if (rc)
-		goto dma_release;
+		goto scif_remove;
 	mic_smpt_restore(mdev);
 	mic_intr_restore(mdev);
 	mdev->intr_ops->enable_interrupts(mdev);
 	mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr);
 	mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32);
 	mdev->ops->send_firmware_intr(mdev);
-	mic_set_state(mdev, MIC_ONLINE);
 	goto unlock_ret;
-dma_release:
-	dma_release_channel(mdev->dma_ch);
+scif_remove:
+	scif_unregister_device(mdev->scdev);
+dma_free:
+	mic_free_dma_chans(mdev);
 dma_remove:
 	mbus_unregister_device(mdev->dma_mbdev);
 unlock_ret:
-	mutex_unlock(&mdev->mic_mutex);
 	return rc;
 }
 
 /**
- * mic_stop - Prepare the MIC for reset and trigger reset.
- * @mdev: pointer to mic_device instance
+ * _mic_stop - Prepare the MIC for reset and trigger reset.
+ * @cdev: pointer to cosm_device instance
  * @force: force a MIC to reset even if it is already offline.
  *
  * RETURNS: None.
  */
-void mic_stop(struct mic_device *mdev, bool force)
+static void _mic_stop(struct cosm_device *cdev, bool force)
 {
-	mutex_lock(&mdev->mic_mutex);
-	if (MIC_OFFLINE != mdev->state || force) {
-		mic_virtio_reset_devices(mdev);
-		if (mdev->dma_ch) {
-			dma_release_channel(mdev->dma_ch);
-			mdev->dma_ch = NULL;
-		}
-		mbus_unregister_device(mdev->dma_mbdev);
-		mic_bootparam_init(mdev);
-		mic_reset(mdev);
-		if (MIC_RESET_FAILED == mdev->state)
-			goto unlock;
-		mic_set_shutdown_status(mdev, MIC_NOP);
-		if (MIC_SUSPENDED != mdev->state)
-			mic_set_state(mdev, MIC_OFFLINE);
-	}
-unlock:
-	mutex_unlock(&mdev->mic_mutex);
-}
-
-/**
- * mic_shutdown - Initiate MIC shutdown.
- * @mdev: pointer to mic_device instance
- *
- * RETURNS: None.
- */
-void mic_shutdown(struct mic_device *mdev)
-{
-	struct mic_bootparam *bootparam = mdev->dp;
-	s8 db = bootparam->h2c_shutdown_db;
-
-	mutex_lock(&mdev->mic_mutex);
-	if (MIC_ONLINE == mdev->state && db != -1) {
-		bootparam->shutdown_card = 1;
-		mdev->ops->send_intr(mdev, db);
-		mic_set_state(mdev, MIC_SHUTTING_DOWN);
-	}
-	mutex_unlock(&mdev->mic_mutex);
-}
-
-/**
- * mic_shutdown_work - Handle shutdown interrupt from MIC.
- * @work: The work structure.
- *
- * This work is scheduled whenever the host has received a shutdown
- * interrupt from the MIC.
- */
-void mic_shutdown_work(struct work_struct *work)
-{
-	struct mic_device *mdev = container_of(work, struct mic_device,
-			shutdown_work);
-	struct mic_bootparam *bootparam = mdev->dp;
-
-	mutex_lock(&mdev->mic_mutex);
-	mic_set_shutdown_status(mdev, bootparam->shutdown_status);
-	bootparam->shutdown_status = 0;
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
 
 	/*
-	 * if state is MIC_SUSPENDED, OSPM suspend is in progress. We do not
-	 * change the state here so as to prevent users from booting the card
-	 * during and after the suspend operation.
+	 * Since SCIF handles card shutdown and reset (using COSM), it will
+	 * will be the first to be registered and the last to be
+	 * unregistered.
 	 */
-	if (MIC_SHUTTING_DOWN != mdev->state &&
-	    MIC_SUSPENDED != mdev->state)
-		mic_set_state(mdev, MIC_SHUTTING_DOWN);
-	mutex_unlock(&mdev->mic_mutex);
+	mic_virtio_reset_devices(mdev);
+	scif_unregister_device(mdev->scdev);
+	mic_free_dma_chans(mdev);
+	mbus_unregister_device(mdev->dma_mbdev);
+	mic_bootparam_init(mdev);
 }
 
-/**
- * mic_reset_trigger_work - Trigger MIC reset.
- * @work: The work structure.
- *
- * This work is scheduled whenever the host wants to reset the MIC.
- */
-void mic_reset_trigger_work(struct work_struct *work)
+static ssize_t _mic_family(struct cosm_device *cdev, char *buf)
 {
-	struct mic_device *mdev = container_of(work, struct mic_device,
-			reset_trigger_work);
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
+	static const char *family[MIC_FAMILY_LAST] = { "x100", "Unknown" };
 
-	mic_stop(mdev, false);
+	return scnprintf(buf, PAGE_SIZE, "%s\n", family[mdev->family]);
 }
 
-/**
- * mic_complete_resume - Complete MIC Resume after an OSPM suspend/hibernate
- * event.
- * @mdev: pointer to mic_device instance
- *
- * RETURNS: None.
- */
-void mic_complete_resume(struct mic_device *mdev)
+static ssize_t _mic_stepping(struct cosm_device *cdev, char *buf)
 {
-	if (mdev->state != MIC_SUSPENDED) {
-		dev_warn(mdev->sdev->parent, "state %d should be %d\n",
-			 mdev->state, MIC_SUSPENDED);
-		return;
-	}
-
-	/* Make sure firmware is ready */
-	if (!mdev->ops->is_fw_ready(mdev))
-		mic_stop(mdev, true);
-
-	mutex_lock(&mdev->mic_mutex);
-	mic_set_state(mdev, MIC_OFFLINE);
-	mutex_unlock(&mdev->mic_mutex);
-}
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
+	const char *string = "??";
 
-/**
- * mic_prepare_suspend - Handle suspend notification for the MIC device.
- * @mdev: pointer to mic_device instance
- *
- * RETURNS: None.
- */
-void mic_prepare_suspend(struct mic_device *mdev)
-{
-	unsigned long timeout;
-
-#define MIC_SUSPEND_TIMEOUT (60 * HZ)
-
-	mutex_lock(&mdev->mic_mutex);
-	switch (mdev->state) {
-	case MIC_OFFLINE:
-		/*
-		 * Card is already offline. Set state to MIC_SUSPENDED
-		 * to prevent users from booting the card.
-		 */
-		mic_set_state(mdev, MIC_SUSPENDED);
-		mutex_unlock(&mdev->mic_mutex);
+	switch (mdev->stepping) {
+	case MIC_A0_STEP:
+		string = "A0";
 		break;
-	case MIC_ONLINE:
-		/*
-		 * Card is online. Set state to MIC_SUSPENDING and notify
-		 * MIC user space daemon which will issue card
-		 * shutdown and reset.
-		 */
-		mic_set_state(mdev, MIC_SUSPENDING);
-		mutex_unlock(&mdev->mic_mutex);
-		timeout = wait_for_completion_timeout(&mdev->reset_wait,
-						      MIC_SUSPEND_TIMEOUT);
-		/* Force reset the card if the shutdown completion timed out */
-		if (!timeout) {
-			mutex_lock(&mdev->mic_mutex);
-			mic_set_state(mdev, MIC_SUSPENDED);
-			mutex_unlock(&mdev->mic_mutex);
-			mic_stop(mdev, true);
-		}
+	case MIC_B0_STEP:
+		string = "B0";
 		break;
-	case MIC_SHUTTING_DOWN:
-		/*
-		 * Card is shutting down. Set state to MIC_SUSPENDED
-		 * to prevent further boot of the card.
-		 */
-		mic_set_state(mdev, MIC_SUSPENDED);
-		mutex_unlock(&mdev->mic_mutex);
-		timeout = wait_for_completion_timeout(&mdev->reset_wait,
-						      MIC_SUSPEND_TIMEOUT);
-		/* Force reset the card if the shutdown completion timed out */
-		if (!timeout)
-			mic_stop(mdev, true);
+	case MIC_B1_STEP:
+		string = "B1";
+		break;
+	case MIC_C0_STEP:
+		string = "C0";
 		break;
 	default:
-		mutex_unlock(&mdev->mic_mutex);
 		break;
 	}
+	return scnprintf(buf, PAGE_SIZE, "%s\n", string);
 }
 
-/**
- * mic_suspend - Initiate MIC suspend. Suspend merely issues card shutdown.
- * @mdev: pointer to mic_device instance
- *
- * RETURNS: None.
- */
-void mic_suspend(struct mic_device *mdev)
+static struct mic_mw *_mic_aper(struct cosm_device *cdev)
 {
-	struct mic_bootparam *bootparam = mdev->dp;
-	s8 db = bootparam->h2c_shutdown_db;
+	struct mic_device *mdev = cosmdev_to_mdev(cdev);
 
-	mutex_lock(&mdev->mic_mutex);
-	if (MIC_SUSPENDING == mdev->state && db != -1) {
-		bootparam->shutdown_card = 1;
-		mdev->ops->send_intr(mdev, db);
-		mic_set_state(mdev, MIC_SUSPENDED);
-	}
-	mutex_unlock(&mdev->mic_mutex);
+	return &mdev->aper;
 }
+
+struct cosm_hw_ops cosm_hw_ops = {
+	.reset = _mic_reset,
+	.force_reset = _mic_reset,
+	.post_reset = NULL,
+	.ready = _mic_ready,
+	.start = _mic_start,
+	.stop = _mic_stop,
+	.family = _mic_family,
+	.stepping = _mic_stepping,
+	.aper = _mic_aper,
+};
diff --git a/kernel/drivers/misc/mic/host/mic_debugfs.c b/kernel/drivers/misc/mic/host/mic_debugfs.c
index 687e9aacf..105816007 100644
--- a/kernel/drivers/misc/mic/host/mic_debugfs.c
+++ b/kernel/drivers/misc/mic/host/mic_debugfs.c
@@ -31,71 +31,6 @@
 /* Debugfs parent dir */
 static struct dentry *mic_dbg;
 
-/**
- * mic_log_buf_show - Display MIC kernel log buffer.
- *
- * log_buf addr/len is read from System.map by user space
- * and populated in sysfs entries.
- */
-static int mic_log_buf_show(struct seq_file *s, void *unused)
-{
-	void __iomem *log_buf_va;
-	int __iomem *log_buf_len_va;
-	struct mic_device *mdev = s->private;
-	void *kva;
-	int size;
-	unsigned long aper_offset;
-
-	if (!mdev || !mdev->log_buf_addr || !mdev->log_buf_len)
-		goto done;
-	/*
-	 * Card kernel will never be relocated and any kernel text/data mapping
-	 * can be translated to phys address by subtracting __START_KERNEL_map.
-	 */
-	aper_offset = (unsigned long)mdev->log_buf_len - __START_KERNEL_map;
-	log_buf_len_va = mdev->aper.va + aper_offset;
-	aper_offset = (unsigned long)mdev->log_buf_addr - __START_KERNEL_map;
-	log_buf_va = mdev->aper.va + aper_offset;
-	size = ioread32(log_buf_len_va);
-
-	kva = kmalloc(size, GFP_KERNEL);
-	if (!kva)
-		goto done;
-	mutex_lock(&mdev->mic_mutex);
-	memcpy_fromio(kva, log_buf_va, size);
-	switch (mdev->state) {
-	case MIC_ONLINE:
-		/* Fall through */
-	case MIC_SHUTTING_DOWN:
-		seq_write(s, kva, size);
-		break;
-	default:
-		break;
-	}
-	mutex_unlock(&mdev->mic_mutex);
-	kfree(kva);
-done:
-	return 0;
-}
-
-static int mic_log_buf_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, mic_log_buf_show, inode->i_private);
-}
-
-static int mic_log_buf_release(struct inode *inode, struct file *file)
-{
-	return single_release(inode, file);
-}
-
-static const struct file_operations log_buf_ops = {
-	.owner   = THIS_MODULE,
-	.open    = mic_log_buf_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = mic_log_buf_release
-};
-
 static int mic_smpt_show(struct seq_file *s, void *pos)
 {
 	int i;
@@ -138,32 +73,6 @@ static const struct file_operations smpt_file_ops = {
 	.release = mic_smpt_debug_release
 };
 
-static int mic_soft_reset_show(struct seq_file *s, void *pos)
-{
-	struct mic_device *mdev = s->private;
-
-	mic_stop(mdev, true);
-	return 0;
-}
-
-static int mic_soft_reset_debug_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, mic_soft_reset_show, inode->i_private);
-}
-
-static int mic_soft_reset_debug_release(struct inode *inode, struct file *file)
-{
-	return single_release(inode, file);
-}
-
-static const struct file_operations soft_reset_ops = {
-	.owner   = THIS_MODULE,
-	.open    = mic_soft_reset_debug_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = mic_soft_reset_debug_release
-};
-
 static int mic_post_code_show(struct seq_file *s, void *pos)
 {
 	struct mic_device *mdev = s->private;
@@ -204,16 +113,19 @@ static int mic_dp_show(struct seq_file *s, void *pos)
 
 	seq_printf(s, "Bootparam: magic 0x%x\n",
 		   bootparam->magic);
-	seq_printf(s, "Bootparam: h2c_shutdown_db %d\n",
-		   bootparam->h2c_shutdown_db);
 	seq_printf(s, "Bootparam: h2c_config_db %d\n",
 		   bootparam->h2c_config_db);
-	seq_printf(s, "Bootparam: c2h_shutdown_db %d\n",
-		   bootparam->c2h_shutdown_db);
-	seq_printf(s, "Bootparam: shutdown_status %d\n",
-		   bootparam->shutdown_status);
-	seq_printf(s, "Bootparam: shutdown_card %d\n",
-		   bootparam->shutdown_card);
+	seq_printf(s, "Bootparam: node_id %d\n",
+		   bootparam->node_id);
+	seq_printf(s, "Bootparam: c2h_scif_db %d\n",
+		   bootparam->c2h_scif_db);
+	seq_printf(s, "Bootparam: h2c_scif_db %d\n",
+		   bootparam->h2c_scif_db);
+	seq_printf(s, "Bootparam: scif_host_dma_addr 0x%llx\n",
+		   bootparam->scif_host_dma_addr);
+	seq_printf(s, "Bootparam: scif_card_dma_addr 0x%llx\n",
+		   bootparam->scif_card_dma_addr);
+
 
 	for (i = sizeof(*bootparam); i < MIC_DP_SIZE;
 	     i += mic_total_desc_size(d)) {
@@ -379,8 +291,7 @@ static int mic_msi_irq_info_show(struct seq_file *s, void *pos)
 	int i, j;
 	u16 entry;
 	u16 vector;
-	struct pci_dev *pdev = container_of(mdev->sdev->parent,
-		struct pci_dev, dev);
+	struct pci_dev *pdev = mdev->pdev;
 
 	if (pci_dev_msi_enabled(pdev)) {
 		for (i = 0; i < mdev->irq_info.num_vectors; i++) {
@@ -441,20 +352,18 @@ static const struct file_operations msi_irq_info_ops = {
  */
 void mic_create_debug_dir(struct mic_device *mdev)
 {
+	char name[16];
+
 	if (!mic_dbg)
 		return;
 
-	mdev->dbg_dir = debugfs_create_dir(dev_name(mdev->sdev), mic_dbg);
+	scnprintf(name, sizeof(name), "mic%d", mdev->id);
+	mdev->dbg_dir = debugfs_create_dir(name, mic_dbg);
 	if (!mdev->dbg_dir)
 		return;
 
-	debugfs_create_file("log_buf", 0444, mdev->dbg_dir, mdev, &log_buf_ops);
-
 	debugfs_create_file("smpt", 0444, mdev->dbg_dir, mdev, &smpt_file_ops);
 
-	debugfs_create_file("soft_reset", 0444, mdev->dbg_dir, mdev,
-			    &soft_reset_ops);
-
 	debugfs_create_file("post_code", 0444, mdev->dbg_dir, mdev,
 			    &post_code_ops);
 
diff --git a/kernel/drivers/misc/mic/host/mic_device.h b/kernel/drivers/misc/mic/host/mic_device.h
index 016bd15a7..461184a12 100644
--- a/kernel/drivers/misc/mic/host/mic_device.h
+++ b/kernel/drivers/misc/mic/host/mic_device.h
@@ -26,21 +26,12 @@
 #include <linux/notifier.h>
 #include <linux/irqreturn.h>
 #include <linux/dmaengine.h>
+#include <linux/miscdevice.h>
 #include <linux/mic_bus.h>
-
+#include "../bus/scif_bus.h"
+#include "../bus/cosm_bus.h"
 #include "mic_intr.h"
 
-/* The maximum number of MIC devices supported in a single host system. */
-#define MIC_MAX_NUM_DEVS 256
-
-/**
- * enum mic_hw_family - The hardware family to which a device belongs.
- */
-enum mic_hw_family {
-	MIC_FAMILY_X100 = 0,
-	MIC_FAMILY_UNKNOWN
-};
-
 /**
  * enum mic_stepping - MIC stepping ids.
  */
@@ -51,6 +42,8 @@ enum mic_stepping {
 	MIC_C0_STEP = 0x20,
 };
 
+extern struct cosm_hw_ops cosm_hw_ops;
+
 /**
  * struct mic_device -  MIC device information for each card.
  *
@@ -60,8 +53,7 @@ enum mic_stepping {
  * @ops: MIC HW specific operations.
  * @id: The unique device id for this MIC device.
  * @stepping: Stepping ID.
- * @attr_group: Pointer to list of sysfs attribute groups.
- * @sdev: Device for sysfs entries.
+ * @pdev: Underlying PCI device.
  * @mic_mutex: Mutex for synchronizing access to mic_device.
  * @intr_ops: HW specific interrupt operations.
  * @smpt_ops: Hardware specific SMPT operations.
@@ -69,28 +61,17 @@ enum mic_stepping {
  * @intr_info: H/W specific interrupt information.
  * @irq_info: The OS specific irq information
  * @dbg_dir: debugfs directory of this MIC device.
- * @cmdline: Kernel command line.
- * @firmware: Firmware file name.
- * @ramdisk: Ramdisk file name.
- * @bootmode: Boot mode i.e. "linux" or "elf" for flash updates.
  * @bootaddr: MIC boot address.
- * @reset_trigger_work: Work for triggering reset requests.
- * @shutdown_work: Work for handling shutdown interrupts.
- * @state: MIC state.
- * @shutdown_status: MIC status reported by card for shutdown/crashes.
- * @state_sysfs: Sysfs dirent for notifying ring 3 about MIC state changes.
- * @reset_wait: Waitqueue for sleeping while reset completes.
- * @log_buf_addr: Log buffer address for MIC.
- * @log_buf_len: Log buffer length address for MIC.
  * @dp: virtio device page
  * @dp_dma_addr: virtio device page DMA address.
- * @shutdown_db: shutdown doorbell.
- * @shutdown_cookie: shutdown cookie.
- * @cdev: Character device for MIC.
+ * @name: name for the misc char device
+ * @miscdev: registered misc char device
  * @vdev_list: list of virtio devices.
- * @pm_notifier: Handles PM notifications from the OS.
  * @dma_mbdev: MIC BUS DMA device.
- * @dma_ch: DMA channel reserved by this driver for use by virtio devices.
+ * @dma_ch - Array of DMA channels
+ * @num_dma_ch - Number of DMA channels available
+ * @scdev: SCIF device on the SCIF virtual bus.
+ * @cosm_dev: COSM device
  */
 struct mic_device {
 	struct mic_mw mmio;
@@ -99,8 +80,7 @@ struct mic_device {
 	struct mic_hw_ops *ops;
 	int id;
 	enum mic_stepping stepping;
-	const struct attribute_group **attr_group;
-	struct device *sdev;
+	struct pci_dev *pdev;
 	struct mutex mic_mutex;
 	struct mic_hw_intr_ops *intr_ops;
 	struct mic_smpt_ops *smpt_ops;
@@ -108,28 +88,17 @@ struct mic_device {
 	struct mic_intr_info *intr_info;
 	struct mic_irq_info irq_info;
 	struct dentry *dbg_dir;
-	char *cmdline;
-	char *firmware;
-	char *ramdisk;
-	char *bootmode;
 	u32 bootaddr;
-	struct work_struct reset_trigger_work;
-	struct work_struct shutdown_work;
-	u8 state;
-	u8 shutdown_status;
-	struct kernfs_node *state_sysfs;
-	struct completion reset_wait;
-	void *log_buf_addr;
-	int *log_buf_len;
 	void *dp;
 	dma_addr_t dp_dma_addr;
-	int shutdown_db;
-	struct mic_irq *shutdown_cookie;
-	struct cdev cdev;
+	char name[16];
+	struct miscdevice miscdev;
 	struct list_head vdev_list;
-	struct notifier_block pm_notifier;
 	struct mbus_device *dma_mbdev;
-	struct dma_chan *dma_ch;
+	struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN];
+	int num_dma_ch;
+	struct scif_hw_dev *scdev;
+	struct cosm_device *cosm_dev;
 };
 
 /**
@@ -195,37 +164,9 @@ mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset)
 	iowrite32(val, mw->va + offset);
 }
 
-static inline struct dma_chan *mic_request_dma_chan(struct mic_device *mdev)
-{
-	dma_cap_mask_t mask;
-	struct dma_chan *chan;
-
-	dma_cap_zero(mask);
-	dma_cap_set(DMA_MEMCPY, mask);
-	chan = dma_request_channel(mask, mdev->ops->dma_filter,
-				   mdev->sdev->parent);
-	if (chan)
-		return chan;
-	dev_err(mdev->sdev->parent, "%s %d unable to acquire channel\n",
-		__func__, __LINE__);
-	return NULL;
-}
-
-void mic_sysfs_init(struct mic_device *mdev);
-int mic_start(struct mic_device *mdev, const char *buf);
-void mic_stop(struct mic_device *mdev, bool force);
-void mic_shutdown(struct mic_device *mdev);
-void mic_reset_delayed_work(struct work_struct *work);
-void mic_reset_trigger_work(struct work_struct *work);
-void mic_shutdown_work(struct work_struct *work);
 void mic_bootparam_init(struct mic_device *mdev);
-void mic_set_state(struct mic_device *mdev, u8 state);
-void mic_set_shutdown_status(struct mic_device *mdev, u8 status);
 void mic_create_debug_dir(struct mic_device *dev);
 void mic_delete_debug_dir(struct mic_device *dev);
 void __init mic_init_debugfs(void);
 void mic_exit_debugfs(void);
-void mic_prepare_suspend(struct mic_device *mdev);
-void mic_complete_resume(struct mic_device *mdev);
-void mic_suspend(struct mic_device *mdev);
 #endif
diff --git a/kernel/drivers/misc/mic/host/mic_fops.c b/kernel/drivers/misc/mic/host/mic_fops.c
index 85776d732..8cc1d90cd 100644
--- a/kernel/drivers/misc/mic/host/mic_fops.c
+++ b/kernel/drivers/misc/mic/host/mic_fops.c
@@ -30,8 +30,8 @@
 int mic_open(struct inode *inode, struct file *f)
 {
 	struct mic_vdev *mvdev;
-	struct mic_device *mdev = container_of(inode->i_cdev,
-		struct mic_device, cdev);
+	struct mic_device *mdev = container_of(f->private_data,
+		struct mic_device, miscdev);
 
 	mvdev = kzalloc(sizeof(*mvdev), GFP_KERNEL);
 	if (!mvdev)
diff --git a/kernel/drivers/misc/mic/host/mic_intr.c b/kernel/drivers/misc/mic/host/mic_intr.c
index b4ca6c884..08ca3e372 100644
--- a/kernel/drivers/misc/mic/host/mic_intr.c
+++ b/kernel/drivers/misc/mic/host/mic_intr.c
@@ -30,8 +30,7 @@ static irqreturn_t mic_thread_fn(int irq, void *dev)
 	struct mic_intr_info *intr_info = mdev->intr_info;
 	struct mic_irq_info *irq_info = &mdev->irq_info;
 	struct mic_intr_cb *intr_cb;
-	struct pci_dev *pdev = container_of(mdev->sdev->parent,
-					    struct pci_dev, dev);
+	struct pci_dev *pdev = mdev->pdev;
 	int i;
 
 	spin_lock(&irq_info->mic_thread_lock);
@@ -57,8 +56,7 @@ static irqreturn_t mic_interrupt(int irq, void *dev)
 	struct mic_intr_info *intr_info = mdev->intr_info;
 	struct mic_irq_info *irq_info = &mdev->irq_info;
 	struct mic_intr_cb *intr_cb;
-	struct pci_dev *pdev = container_of(mdev->sdev->parent,
-					    struct pci_dev, dev);
+	struct pci_dev *pdev = mdev->pdev;
 	u32 mask;
 	int i;
 
@@ -83,7 +81,7 @@ static irqreturn_t mic_interrupt(int irq, void *dev)
 
 /* Return the interrupt offset from the index. Index is 0 based. */
 static u16 mic_map_src_to_offset(struct mic_device *mdev,
-		int intr_src, enum mic_intr_type type)
+				 int intr_src, enum mic_intr_type type)
 {
 	if (type >= MIC_NUM_INTR_TYPES)
 		return MIC_NUM_OFFSETS;
@@ -214,7 +212,7 @@ static int mic_setup_msix(struct mic_device *mdev, struct pci_dev *pdev)
 		mdev->irq_info.msix_entries[i].entry = i;
 
 	rc = pci_enable_msix_exact(pdev, mdev->irq_info.msix_entries,
-		MIC_MIN_MSIX);
+				   MIC_MIN_MSIX);
 	if (rc) {
 		dev_dbg(&pdev->dev, "Error enabling MSIx. rc = %d\n", rc);
 		goto err_enable_msix;
@@ -229,7 +227,7 @@ static int mic_setup_msix(struct mic_device *mdev, struct pci_dev *pdev)
 		goto err_nomem2;
 	}
 
-	dev_dbg(mdev->sdev->parent,
+	dev_dbg(&mdev->pdev->dev,
 		"%d MSIx irqs setup\n", mdev->irq_info.num_vectors);
 	return 0;
 err_nomem2:
@@ -281,7 +279,6 @@ static void mic_release_callbacks(struct mic_device *mdev)
 	spin_lock(&mdev->irq_info.mic_thread_lock);
 	spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags);
 	for (i = 0; i < MIC_NUM_OFFSETS; i++) {
-
 		if (list_empty(&mdev->irq_info.cb_list[i]))
 			break;
 
@@ -443,12 +440,11 @@ mic_request_threaded_irq(struct mic_device *mdev,
 	unsigned long cookie = 0;
 	u16 entry;
 	struct mic_intr_cb *intr_cb;
-	struct pci_dev *pdev = container_of(mdev->sdev->parent,
-		struct pci_dev, dev);
+	struct pci_dev *pdev = mdev->pdev;
 
 	offset = mic_map_src_to_offset(mdev, intr_src, type);
 	if (offset >= MIC_NUM_OFFSETS) {
-		dev_err(mdev->sdev->parent,
+		dev_err(&mdev->pdev->dev,
 			"Error mapping index %d to a valid source id.\n",
 			intr_src);
 		rc = -EINVAL;
@@ -458,7 +454,7 @@ mic_request_threaded_irq(struct mic_device *mdev,
 	if (mdev->irq_info.num_vectors > 1) {
 		msix = mic_get_available_vector(mdev);
 		if (!msix) {
-			dev_err(mdev->sdev->parent,
+			dev_err(&mdev->pdev->dev,
 				"No MSIx vectors available for use.\n");
 			rc = -ENOSPC;
 			goto err;
@@ -467,7 +463,7 @@ mic_request_threaded_irq(struct mic_device *mdev,
 		rc = request_threaded_irq(msix->vector, handler, thread_fn,
 					  0, name, data);
 		if (rc) {
-			dev_dbg(mdev->sdev->parent,
+			dev_dbg(&mdev->pdev->dev,
 				"request irq failed rc = %d\n", rc);
 			goto err;
 		}
@@ -476,13 +472,13 @@ mic_request_threaded_irq(struct mic_device *mdev,
 		mdev->intr_ops->program_msi_to_src_map(mdev,
 				entry, offset, true);
 		cookie = MK_COOKIE(entry, offset);
-		dev_dbg(mdev->sdev->parent, "irq: %d assigned for src: %d\n",
+		dev_dbg(&mdev->pdev->dev, "irq: %d assigned for src: %d\n",
 			msix->vector, intr_src);
 	} else {
 		intr_cb = mic_register_intr_callback(mdev, offset, handler,
 						     thread_fn, data);
 		if (IS_ERR(intr_cb)) {
-			dev_err(mdev->sdev->parent,
+			dev_err(&mdev->pdev->dev,
 				"No available callback entries for use\n");
 			rc = PTR_ERR(intr_cb);
 			goto err;
@@ -495,7 +491,7 @@ mic_request_threaded_irq(struct mic_device *mdev,
 				entry, offset, true);
 		}
 		cookie = MK_COOKIE(entry, intr_cb->cb_id);
-		dev_dbg(mdev->sdev->parent, "callback %d registered for src: %d\n",
+		dev_dbg(&mdev->pdev->dev, "callback %d registered for src: %d\n",
 			intr_cb->cb_id, intr_src);
 	}
 	return (struct mic_irq *)cookie;
@@ -515,20 +511,19 @@ err:
  * returns: none.
  */
 void mic_free_irq(struct mic_device *mdev,
-	struct mic_irq *cookie, void *data)
+		  struct mic_irq *cookie, void *data)
 {
 	u32 offset;
 	u32 entry;
 	u8 src_id;
 	unsigned int irq;
-	struct pci_dev *pdev = container_of(mdev->sdev->parent,
-		struct pci_dev, dev);
+	struct pci_dev *pdev = mdev->pdev;
 
 	entry = GET_ENTRY((unsigned long)cookie);
 	offset = GET_OFFSET((unsigned long)cookie);
 	if (mdev->irq_info.num_vectors > 1) {
 		if (entry >= mdev->irq_info.num_vectors) {
-			dev_warn(mdev->sdev->parent,
+			dev_warn(&mdev->pdev->dev,
 				 "entry %d should be < num_irq %d\n",
 				entry, mdev->irq_info.num_vectors);
 			return;
@@ -539,12 +534,12 @@ void mic_free_irq(struct mic_device *mdev,
 		mdev->intr_ops->program_msi_to_src_map(mdev,
 			entry, offset, false);
 
-		dev_dbg(mdev->sdev->parent, "irq: %d freed\n", irq);
+		dev_dbg(&mdev->pdev->dev, "irq: %d freed\n", irq);
 	} else {
 		irq = pdev->irq;
 		src_id = mic_unregister_intr_callback(mdev, offset);
 		if (src_id >= MIC_NUM_OFFSETS) {
-			dev_warn(mdev->sdev->parent, "Error unregistering callback\n");
+			dev_warn(&mdev->pdev->dev, "Error unregistering callback\n");
 			return;
 		}
 		if (pci_dev_msi_enabled(pdev)) {
@@ -552,7 +547,7 @@ void mic_free_irq(struct mic_device *mdev,
 			mdev->intr_ops->program_msi_to_src_map(mdev,
 				entry, src_id, false);
 		}
-		dev_dbg(mdev->sdev->parent, "callback %d unregistered for src: %d\n",
+		dev_dbg(&mdev->pdev->dev, "callback %d unregistered for src: %d\n",
 			offset, src_id);
 	}
 }
@@ -579,7 +574,7 @@ int mic_setup_interrupts(struct mic_device *mdev, struct pci_dev *pdev)
 
 	rc = mic_setup_intx(mdev, pdev);
 	if (rc) {
-		dev_err(mdev->sdev->parent, "no usable interrupts\n");
+		dev_err(&mdev->pdev->dev, "no usable interrupts\n");
 		return rc;
 	}
 done:
@@ -635,8 +630,7 @@ void mic_free_interrupts(struct mic_device *mdev, struct pci_dev *pdev)
 void mic_intr_restore(struct mic_device *mdev)
 {
 	int entry, offset;
-	struct pci_dev *pdev = container_of(mdev->sdev->parent,
-		struct pci_dev, dev);
+	struct pci_dev *pdev = mdev->pdev;
 
 	if (!pci_dev_msi_enabled(pdev))
 		return;
diff --git a/kernel/drivers/misc/mic/host/mic_intr.h b/kernel/drivers/misc/mic/host/mic_intr.h
index 9f783d4ad..cce28824d 100644
--- a/kernel/drivers/misc/mic/host/mic_intr.h
+++ b/kernel/drivers/misc/mic/host/mic_intr.h
@@ -28,8 +28,9 @@
  * 3 for virtio network, console and block devices.
  * 1 for card shutdown notifications.
  * 4 for host owned DMA channels.
+ * 1 for SCIF
  */
-#define MIC_MIN_MSIX 8
+#define MIC_MIN_MSIX 9
 #define MIC_NUM_OFFSETS 32
 
 /**
diff --git a/kernel/drivers/misc/mic/host/mic_main.c b/kernel/drivers/misc/mic/host/mic_main.c
index ab37a3117..153894e7e 100644
--- a/kernel/drivers/misc/mic/host/mic_main.c
+++ b/kernel/drivers/misc/mic/host/mic_main.c
@@ -16,17 +16,11 @@
  * the file called "COPYING".
  *
  * Intel MIC Host driver.
- *
- * Global TODO's across the driver to be added after initial base
- * patches are accepted upstream:
- * 1) Enable DMA support.
- * 2) Enable per vring interrupt support.
  */
 #include <linux/fs.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/poll.h>
-#include <linux/suspend.h>
 
 #include <linux/mic_common.h>
 #include "../common/mic_dev.h"
@@ -63,8 +57,6 @@ MODULE_DEVICE_TABLE(pci, mic_pci_tbl);
 
 /* ID allocator for MIC devices */
 static struct ida g_mic_ida;
-/* Class of MIC devices for sysfs accessibility. */
-static struct class *g_mic_class;
 /* Base device node number for MIC devices */
 static dev_t g_mic_devno;
 
@@ -81,17 +73,14 @@ static const struct file_operations mic_fops = {
 static int mic_dp_init(struct mic_device *mdev)
 {
 	mdev->dp = kzalloc(MIC_DP_SIZE, GFP_KERNEL);
-	if (!mdev->dp) {
-		dev_err(mdev->sdev->parent, "%s %d err %d\n",
-			__func__, __LINE__, -ENOMEM);
+	if (!mdev->dp)
 		return -ENOMEM;
-	}
 
 	mdev->dp_dma_addr = mic_map_single(mdev,
 		mdev->dp, MIC_DP_SIZE);
 	if (mic_map_error(mdev->dp_dma_addr)) {
 		kfree(mdev->dp);
-		dev_err(mdev->sdev->parent, "%s %d err %d\n",
+		dev_err(&mdev->pdev->dev, "%s %d err %d\n",
 			__func__, __LINE__, -ENOMEM);
 		return -ENOMEM;
 	}
@@ -108,30 +97,6 @@ static void mic_dp_uninit(struct mic_device *mdev)
 }
 
 /**
- * mic_shutdown_db - Shutdown doorbell interrupt handler.
- */
-static irqreturn_t mic_shutdown_db(int irq, void *data)
-{
-	struct mic_device *mdev = data;
-	struct mic_bootparam *bootparam = mdev->dp;
-
-	mdev->ops->intr_workarounds(mdev);
-
-	switch (bootparam->shutdown_status) {
-	case MIC_HALTED:
-	case MIC_POWER_OFF:
-	case MIC_RESTART:
-		/* Fall through */
-	case MIC_CRASHED:
-		schedule_work(&mdev->shutdown_work);
-		break;
-	default:
-		break;
-	};
-	return IRQ_HANDLED;
-}
-
-/**
  * mic_ops_init: Initialize HW specific operation tables.
  *
  * @mdev: pointer to mic_device instance
@@ -188,43 +153,6 @@ static enum mic_hw_family mic_get_family(struct pci_dev *pdev)
 }
 
 /**
-* mic_pm_notifier: Notifier callback function that handles
-* PM notifications.
-*
-* @notifier_block: The notifier structure.
-* @pm_event: The event for which the driver was notified.
-* @unused: Meaningless. Always NULL.
-*
-* returns NOTIFY_DONE
-*/
-static int mic_pm_notifier(struct notifier_block *notifier,
-		unsigned long pm_event, void *unused)
-{
-	struct mic_device *mdev = container_of(notifier,
-		struct mic_device, pm_notifier);
-
-	switch (pm_event) {
-	case PM_HIBERNATION_PREPARE:
-		/* Fall through */
-	case PM_SUSPEND_PREPARE:
-		mic_prepare_suspend(mdev);
-		break;
-	case PM_POST_HIBERNATION:
-		/* Fall through */
-	case PM_POST_SUSPEND:
-		/* Fall through */
-	case PM_POST_RESTORE:
-		mic_complete_resume(mdev);
-		break;
-	case PM_RESTORE_PREPARE:
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_DONE;
-}
-
-/**
  * mic_device_init - Allocates and initializes the MIC device structure
  *
  * @mdev: pointer to mic_device instance
@@ -232,52 +160,16 @@ static int mic_pm_notifier(struct notifier_block *notifier,
  *
  * returns none.
  */
-static int
+static void
 mic_device_init(struct mic_device *mdev, struct pci_dev *pdev)
 {
-	int rc;
-
+	mdev->pdev = pdev;
 	mdev->family = mic_get_family(pdev);
 	mdev->stepping = pdev->revision;
 	mic_ops_init(mdev);
-	mic_sysfs_init(mdev);
 	mutex_init(&mdev->mic_mutex);
 	mdev->irq_info.next_avail_src = 0;
-	INIT_WORK(&mdev->reset_trigger_work, mic_reset_trigger_work);
-	INIT_WORK(&mdev->shutdown_work, mic_shutdown_work);
-	init_completion(&mdev->reset_wait);
 	INIT_LIST_HEAD(&mdev->vdev_list);
-	mdev->pm_notifier.notifier_call = mic_pm_notifier;
-	rc = register_pm_notifier(&mdev->pm_notifier);
-	if (rc) {
-		dev_err(&pdev->dev, "register_pm_notifier failed rc %d\n",
-			rc);
-		goto register_pm_notifier_fail;
-	}
-	return 0;
-register_pm_notifier_fail:
-	flush_work(&mdev->shutdown_work);
-	flush_work(&mdev->reset_trigger_work);
-	return rc;
-}
-
-/**
- * mic_device_uninit - Frees resources allocated during mic_device_init(..)
- *
- * @mdev: pointer to mic_device instance
- *
- * returns none
- */
-static void mic_device_uninit(struct mic_device *mdev)
-{
-	/* The cmdline sysfs entry might have allocated cmdline */
-	kfree(mdev->cmdline);
-	kfree(mdev->firmware);
-	kfree(mdev->ramdisk);
-	kfree(mdev->bootmode);
-	flush_work(&mdev->reset_trigger_work);
-	flush_work(&mdev->shutdown_work);
-	unregister_pm_notifier(&mdev->pm_notifier);
 }
 
 /**
@@ -289,7 +181,7 @@ static void mic_device_uninit(struct mic_device *mdev)
  * returns 0 on success, < 0 on failure.
  */
 static int mic_probe(struct pci_dev *pdev,
-		const struct pci_device_id *ent)
+		     const struct pci_device_id *ent)
 {
 	int rc;
 	struct mic_device *mdev;
@@ -307,16 +199,12 @@ static int mic_probe(struct pci_dev *pdev,
 		goto ida_fail;
 	}
 
-	rc = mic_device_init(mdev, pdev);
-	if (rc) {
-		dev_err(&pdev->dev, "mic_device_init failed rc %d\n", rc);
-		goto device_init_fail;
-	}
+	mic_device_init(mdev, pdev);
 
 	rc = pci_enable_device(pdev);
 	if (rc) {
 		dev_err(&pdev->dev, "failed to enable pci device.\n");
-		goto uninit_device;
+		goto ida_remove;
 	}
 
 	pci_set_master(pdev);
@@ -365,61 +253,39 @@ static int mic_probe(struct pci_dev *pdev,
 
 	pci_set_drvdata(pdev, mdev);
 
-	mdev->sdev = device_create_with_groups(g_mic_class, &pdev->dev,
-		MKDEV(MAJOR(g_mic_devno), mdev->id), NULL,
-		mdev->attr_group, "mic%d", mdev->id);
-	if (IS_ERR(mdev->sdev)) {
-		rc = PTR_ERR(mdev->sdev);
-		dev_err(&pdev->dev,
-			"device_create_with_groups failed rc %d\n", rc);
-		goto smpt_uninit;
-	}
-	mdev->state_sysfs = sysfs_get_dirent(mdev->sdev->kobj.sd, "state");
-	if (!mdev->state_sysfs) {
-		rc = -ENODEV;
-		dev_err(&pdev->dev, "sysfs_get_dirent failed rc %d\n", rc);
-		goto destroy_device;
-	}
-
 	rc = mic_dp_init(mdev);
 	if (rc) {
 		dev_err(&pdev->dev, "mic_dp_init failed rc %d\n", rc);
-		goto sysfs_put;
-	}
-	mutex_lock(&mdev->mic_mutex);
-
-	mdev->shutdown_db = mic_next_db(mdev);
-	mdev->shutdown_cookie = mic_request_threaded_irq(mdev, mic_shutdown_db,
-					NULL, "shutdown-interrupt", mdev,
-					mdev->shutdown_db, MIC_INTR_DB);
-	if (IS_ERR(mdev->shutdown_cookie)) {
-		rc = PTR_ERR(mdev->shutdown_cookie);
-		mutex_unlock(&mdev->mic_mutex);
-		goto dp_uninit;
+		goto smpt_uninit;
 	}
-	mutex_unlock(&mdev->mic_mutex);
 	mic_bootparam_init(mdev);
 
 	mic_create_debug_dir(mdev);
-	cdev_init(&mdev->cdev, &mic_fops);
-	mdev->cdev.owner = THIS_MODULE;
-	rc = cdev_add(&mdev->cdev, MKDEV(MAJOR(g_mic_devno), mdev->id), 1);
+
+	mdev->miscdev.minor = MISC_DYNAMIC_MINOR;
+	snprintf(mdev->name, sizeof(mdev->name), "mic%d", mdev->id);
+	mdev->miscdev.name = mdev->name;
+	mdev->miscdev.fops = &mic_fops;
+	mdev->miscdev.parent = &mdev->pdev->dev;
+	rc = misc_register(&mdev->miscdev);
 	if (rc) {
-		dev_err(&pdev->dev, "cdev_add err id %d rc %d\n", mdev->id, rc);
+		dev_err(&pdev->dev, "misc_register err id %d rc %d\n",
+			mdev->id, rc);
 		goto cleanup_debug_dir;
 	}
+
+	mdev->cosm_dev = cosm_register_device(&mdev->pdev->dev, &cosm_hw_ops);
+	if (IS_ERR(mdev->cosm_dev)) {
+		rc = PTR_ERR(mdev->cosm_dev);
+		dev_err(&pdev->dev, "cosm_add_device failed rc %d\n", rc);
+		goto misc_dereg;
+	}
 	return 0;
+misc_dereg:
+	misc_deregister(&mdev->miscdev);
 cleanup_debug_dir:
 	mic_delete_debug_dir(mdev);
-	mutex_lock(&mdev->mic_mutex);
-	mic_free_irq(mdev, mdev->shutdown_cookie, mdev);
-	mutex_unlock(&mdev->mic_mutex);
-dp_uninit:
 	mic_dp_uninit(mdev);
-sysfs_put:
-	sysfs_put(mdev->state_sysfs);
-destroy_device:
-	device_destroy(g_mic_class, MKDEV(MAJOR(g_mic_devno), mdev->id));
 smpt_uninit:
 	mic_smpt_uninit(mdev);
 free_interrupts:
@@ -432,9 +298,7 @@ release_regions:
 	pci_release_regions(pdev);
 disable_device:
 	pci_disable_device(pdev);
-uninit_device:
-	mic_device_uninit(mdev);
-device_init_fail:
+ida_remove:
 	ida_simple_remove(&g_mic_ida, mdev->id);
 ida_fail:
 	kfree(mdev);
@@ -458,26 +322,20 @@ static void mic_remove(struct pci_dev *pdev)
 	if (!mdev)
 		return;
 
-	mic_stop(mdev, false);
-	cdev_del(&mdev->cdev);
+	cosm_unregister_device(mdev->cosm_dev);
+	misc_deregister(&mdev->miscdev);
 	mic_delete_debug_dir(mdev);
-	mutex_lock(&mdev->mic_mutex);
-	mic_free_irq(mdev, mdev->shutdown_cookie, mdev);
-	mutex_unlock(&mdev->mic_mutex);
-	flush_work(&mdev->shutdown_work);
 	mic_dp_uninit(mdev);
-	sysfs_put(mdev->state_sysfs);
-	device_destroy(g_mic_class, MKDEV(MAJOR(g_mic_devno), mdev->id));
 	mic_smpt_uninit(mdev);
 	mic_free_interrupts(mdev, pdev);
-	iounmap(mdev->mmio.va);
 	iounmap(mdev->aper.va);
-	mic_device_uninit(mdev);
+	iounmap(mdev->mmio.va);
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
 	ida_simple_remove(&g_mic_ida, mdev->id);
 	kfree(mdev);
 }
+
 static struct pci_driver mic_driver = {
 	.name = mic_driver_name,
 	.id_table = mic_pci_tbl,
@@ -490,31 +348,23 @@ static int __init mic_init(void)
 	int ret;
 
 	ret = alloc_chrdev_region(&g_mic_devno, 0,
-		MIC_MAX_NUM_DEVS, mic_driver_name);
+				  MIC_MAX_NUM_DEVS, mic_driver_name);
 	if (ret) {
 		pr_err("alloc_chrdev_region failed ret %d\n", ret);
 		goto error;
 	}
 
-	g_mic_class = class_create(THIS_MODULE, mic_driver_name);
-	if (IS_ERR(g_mic_class)) {
-		ret = PTR_ERR(g_mic_class);
-		pr_err("class_create failed ret %d\n", ret);
-		goto cleanup_chrdev;
-	}
-
 	mic_init_debugfs();
 	ida_init(&g_mic_ida);
 	ret = pci_register_driver(&mic_driver);
 	if (ret) {
 		pr_err("pci_register_driver failed ret %d\n", ret);
-		goto cleanup_debugfs;
+		goto cleanup_chrdev;
 	}
 	return ret;
-cleanup_debugfs:
-	mic_exit_debugfs();
-	class_destroy(g_mic_class);
 cleanup_chrdev:
+	ida_destroy(&g_mic_ida);
+	mic_exit_debugfs();
 	unregister_chrdev_region(g_mic_devno, MIC_MAX_NUM_DEVS);
 error:
 	return ret;
@@ -525,7 +375,6 @@ static void __exit mic_exit(void)
 	pci_unregister_driver(&mic_driver);
 	ida_destroy(&g_mic_ida);
 	mic_exit_debugfs();
-	class_destroy(g_mic_class);
 	unregister_chrdev_region(g_mic_devno, MIC_MAX_NUM_DEVS);
 }
 
diff --git a/kernel/drivers/misc/mic/host/mic_smpt.c b/kernel/drivers/misc/mic/host/mic_smpt.c
index fae474c48..c3f958580 100644
--- a/kernel/drivers/misc/mic/host/mic_smpt.c
+++ b/kernel/drivers/misc/mic/host/mic_smpt.c
@@ -76,7 +76,7 @@ mic_is_system_addr(struct mic_device *mdev, dma_addr_t pa)
 
 /* Populate an SMPT entry and update the reference counts. */
 static void mic_add_smpt_entry(int spt, s64 *ref, u64 addr,
-		int entries, struct mic_device *mdev)
+			       int entries, struct mic_device *mdev)
 {
 	struct mic_smpt_info *smpt_info = mdev->smpt;
 	int i;
@@ -97,7 +97,7 @@ static void mic_add_smpt_entry(int spt, s64 *ref, u64 addr,
  * for a given DMA address and size.
  */
 static dma_addr_t mic_smpt_op(struct mic_device *mdev, u64 dma_addr,
-				int entries, s64 *ref, size_t size)
+			      int entries, s64 *ref, size_t size)
 {
 	int spt;
 	int ae = 0;
@@ -148,7 +148,7 @@ found:
  * and the starting smpt address
  */
 static int mic_get_smpt_ref_count(struct mic_device *mdev, dma_addr_t dma_addr,
-				size_t size, s64 *ref,  u64 *smpt_start)
+				  size_t size, s64 *ref,  u64 *smpt_start)
 {
 	u64 start =  dma_addr;
 	u64 end = dma_addr + size;
@@ -174,15 +174,14 @@ static int mic_get_smpt_ref_count(struct mic_device *mdev, dma_addr_t dma_addr,
  *
  * returns a DMA address.
  */
-static dma_addr_t
-mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr)
+dma_addr_t mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr)
 {
 	struct mic_smpt_info *smpt_info = mdev->smpt;
 	int spt;
 	dma_addr_t dma_addr;
 
 	if (!mic_is_system_addr(mdev, mic_addr)) {
-		dev_err(mdev->sdev->parent,
+		dev_err(&mdev->pdev->dev,
 			"mic_addr is invalid. mic_addr = 0x%llx\n", mic_addr);
 		return -EINVAL;
 	}
@@ -214,12 +213,12 @@ dma_addr_t mic_map(struct mic_device *mdev, dma_addr_t dma_addr, size_t size)
 	if (!size || size > mic_max_system_memory(mdev))
 		return mic_addr;
 
-	ref = kmalloc(mdev->smpt->info.num_reg * sizeof(s64), GFP_KERNEL);
+	ref = kmalloc_array(mdev->smpt->info.num_reg, sizeof(s64), GFP_ATOMIC);
 	if (!ref)
 		return mic_addr;
 
 	num_entries = mic_get_smpt_ref_count(mdev, dma_addr, size,
-		ref, &smpt_start);
+					     ref, &smpt_start);
 
 	/* Set the smpt table appropriately and get 16G aligned mic address */
 	mic_addr = mic_smpt_op(mdev, smpt_start, num_entries, ref, size);
@@ -232,7 +231,7 @@ dma_addr_t mic_map(struct mic_device *mdev, dma_addr_t dma_addr, size_t size)
 	 * else generate mic_addr by adding the 16G offset in dma_addr
 	 */
 	if (!mic_addr && MIC_FAMILY_X100 == mdev->family) {
-		dev_err(mdev->sdev->parent,
+		dev_err(&mdev->pdev->dev,
 			"mic_map failed dma_addr 0x%llx size 0x%lx\n",
 			dma_addr, size);
 		return mic_addr;
@@ -265,13 +264,13 @@ void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
 		return;
 
 	if (!mic_is_system_addr(mdev, mic_addr)) {
-		dev_err(mdev->sdev->parent,
+		dev_err(&mdev->pdev->dev,
 			"invalid address: 0x%llx\n", mic_addr);
 		return;
 	}
 
 	spt = mic_sys_addr_to_smpt(mdev, mic_addr);
-	ref = kmalloc(mdev->smpt->info.num_reg * sizeof(s64), GFP_KERNEL);
+	ref = kmalloc_array(mdev->smpt->info.num_reg, sizeof(s64), GFP_ATOMIC);
 	if (!ref)
 		return;
 
@@ -285,7 +284,7 @@ void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
 	for (i = spt; i < spt + num_smpt; i++) {
 		smpt_info->entry[i].ref_count -= ref[i - spt];
 		if (smpt_info->entry[i].ref_count < 0)
-			dev_warn(mdev->sdev->parent,
+			dev_warn(&mdev->pdev->dev,
 				 "ref count for entry %d is negative\n", i);
 	}
 	spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
@@ -308,15 +307,14 @@ void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
 dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size)
 {
 	dma_addr_t mic_addr = 0;
-	struct pci_dev *pdev = container_of(mdev->sdev->parent,
-		struct pci_dev, dev);
+	struct pci_dev *pdev = mdev->pdev;
 	dma_addr_t dma_addr =
 		pci_map_single(pdev, va, size, PCI_DMA_BIDIRECTIONAL);
 
 	if (!pci_dma_mapping_error(pdev, dma_addr)) {
 		mic_addr = mic_map(mdev, dma_addr, size);
 		if (!mic_addr) {
-			dev_err(mdev->sdev->parent,
+			dev_err(&mdev->pdev->dev,
 				"mic_map failed dma_addr 0x%llx size 0x%lx\n",
 				dma_addr, size);
 			pci_unmap_single(pdev, dma_addr,
@@ -340,8 +338,7 @@ dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size)
 void
 mic_unmap_single(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
 {
-	struct pci_dev *pdev = container_of(mdev->sdev->parent,
-		struct pci_dev, dev);
+	struct pci_dev *pdev = mdev->pdev;
 	dma_addr_t dma_addr = mic_to_dma_addr(mdev, mic_addr);
 	mic_unmap(mdev, mic_addr, size);
 	pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
@@ -400,18 +397,18 @@ void mic_smpt_uninit(struct mic_device *mdev)
 	struct mic_smpt_info *smpt_info = mdev->smpt;
 	int i;
 
-	dev_dbg(mdev->sdev->parent,
+	dev_dbg(&mdev->pdev->dev,
 		"nodeid %d SMPT ref count %lld map %lld unmap %lld\n",
 		mdev->id, smpt_info->ref_count,
 		smpt_info->map_count, smpt_info->unmap_count);
 
 	for (i = 0; i < smpt_info->info.num_reg; i++) {
-		dev_dbg(mdev->sdev->parent,
+		dev_dbg(&mdev->pdev->dev,
 			"SMPT entry[%d] dma_addr = 0x%llx ref_count = %lld\n",
 			i, smpt_info->entry[i].dma_addr,
 			smpt_info->entry[i].ref_count);
 		if (smpt_info->entry[i].ref_count)
-			dev_warn(mdev->sdev->parent,
+			dev_warn(&mdev->pdev->dev,
 				 "ref count for entry %d is not zero\n", i);
 	}
 	kfree(smpt_info->entry);
diff --git a/kernel/drivers/misc/mic/host/mic_smpt.h b/kernel/drivers/misc/mic/host/mic_smpt.h
index 51970abfe..68721c6e7 100644
--- a/kernel/drivers/misc/mic/host/mic_smpt.h
+++ b/kernel/drivers/misc/mic/host/mic_smpt.h
@@ -78,6 +78,7 @@ void mic_unmap_single(struct mic_device *mdev,
 dma_addr_t mic_map(struct mic_device *mdev,
 	dma_addr_t dma_addr, size_t size);
 void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size);
+dma_addr_t mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr);
 
 /**
  * mic_map_error - Check a MIC address for errors.
diff --git a/kernel/drivers/misc/mic/host/mic_sysfs.c b/kernel/drivers/misc/mic/host/mic_sysfs.c
deleted file mode 100644
index 6dd864e4a..000000000
--- a/kernel/drivers/misc/mic/host/mic_sysfs.c
+++ /dev/null
@@ -1,459 +0,0 @@
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Intel MIC Host driver.
- *
- */
-#include <linux/pci.h>
-
-#include <linux/mic_common.h>
-#include "../common/mic_dev.h"
-#include "mic_device.h"
-
-/*
- * A state-to-string lookup table, for exposing a human readable state
- * via sysfs. Always keep in sync with enum mic_states
- */
-static const char * const mic_state_string[] = {
-	[MIC_OFFLINE] = "offline",
-	[MIC_ONLINE] = "online",
-	[MIC_SHUTTING_DOWN] = "shutting_down",
-	[MIC_RESET_FAILED] = "reset_failed",
-	[MIC_SUSPENDING] = "suspending",
-	[MIC_SUSPENDED] = "suspended",
-};
-
-/*
- * A shutdown-status-to-string lookup table, for exposing a human
- * readable state via sysfs. Always keep in sync with enum mic_shutdown_status
- */
-static const char * const mic_shutdown_status_string[] = {
-	[MIC_NOP] = "nop",
-	[MIC_CRASHED] = "crashed",
-	[MIC_HALTED] = "halted",
-	[MIC_POWER_OFF] = "poweroff",
-	[MIC_RESTART] = "restart",
-};
-
-void mic_set_shutdown_status(struct mic_device *mdev, u8 shutdown_status)
-{
-	dev_dbg(mdev->sdev->parent, "Shutdown Status %s -> %s\n",
-		mic_shutdown_status_string[mdev->shutdown_status],
-		mic_shutdown_status_string[shutdown_status]);
-	mdev->shutdown_status = shutdown_status;
-}
-
-void mic_set_state(struct mic_device *mdev, u8 state)
-{
-	dev_dbg(mdev->sdev->parent, "State %s -> %s\n",
-		mic_state_string[mdev->state],
-		mic_state_string[state]);
-	mdev->state = state;
-	sysfs_notify_dirent(mdev->state_sysfs);
-}
-
-static ssize_t
-family_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	static const char x100[] = "x100";
-	static const char unknown[] = "Unknown";
-	const char *card = NULL;
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
-	if (!mdev)
-		return -EINVAL;
-
-	switch (mdev->family) {
-	case MIC_FAMILY_X100:
-		card = x100;
-		break;
-	default:
-		card = unknown;
-		break;
-	}
-	return scnprintf(buf, PAGE_SIZE, "%s\n", card);
-}
-static DEVICE_ATTR_RO(family);
-
-static ssize_t
-stepping_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-	char *string = "??";
-
-	if (!mdev)
-		return -EINVAL;
-
-	switch (mdev->stepping) {
-	case MIC_A0_STEP:
-		string = "A0";
-		break;
-	case MIC_B0_STEP:
-		string = "B0";
-		break;
-	case MIC_B1_STEP:
-		string = "B1";
-		break;
-	case MIC_C0_STEP:
-		string = "C0";
-		break;
-	default:
-		break;
-	}
-	return scnprintf(buf, PAGE_SIZE, "%s\n", string);
-}
-static DEVICE_ATTR_RO(stepping);
-
-static ssize_t
-state_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
-	if (!mdev || mdev->state >= MIC_LAST)
-		return -EINVAL;
-
-	return scnprintf(buf, PAGE_SIZE, "%s\n",
-		mic_state_string[mdev->state]);
-}
-
-static ssize_t
-state_store(struct device *dev, struct device_attribute *attr,
-	    const char *buf, size_t count)
-{
-	int rc = 0;
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-	if (!mdev)
-		return -EINVAL;
-	if (sysfs_streq(buf, "boot")) {
-		rc = mic_start(mdev, buf);
-		if (rc) {
-			dev_err(mdev->sdev->parent,
-				"mic_boot failed rc %d\n", rc);
-			count = rc;
-		}
-		goto done;
-	}
-
-	if (sysfs_streq(buf, "reset")) {
-		schedule_work(&mdev->reset_trigger_work);
-		goto done;
-	}
-
-	if (sysfs_streq(buf, "shutdown")) {
-		mic_shutdown(mdev);
-		goto done;
-	}
-
-	if (sysfs_streq(buf, "suspend")) {
-		mic_suspend(mdev);
-		goto done;
-	}
-
-	count = -EINVAL;
-done:
-	return count;
-}
-static DEVICE_ATTR_RW(state);
-
-static ssize_t shutdown_status_show(struct device *dev,
-				    struct device_attribute *attr, char *buf)
-{
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
-	if (!mdev || mdev->shutdown_status >= MIC_STATUS_LAST)
-		return -EINVAL;
-
-	return scnprintf(buf, PAGE_SIZE, "%s\n",
-		mic_shutdown_status_string[mdev->shutdown_status]);
-}
-static DEVICE_ATTR_RO(shutdown_status);
-
-static ssize_t
-cmdline_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-	char *cmdline;
-
-	if (!mdev)
-		return -EINVAL;
-
-	cmdline = mdev->cmdline;
-
-	if (cmdline)
-		return scnprintf(buf, PAGE_SIZE, "%s\n", cmdline);
-	return 0;
-}
-
-static ssize_t
-cmdline_store(struct device *dev, struct device_attribute *attr,
-	      const char *buf, size_t count)
-{
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
-	if (!mdev)
-		return -EINVAL;
-
-	mutex_lock(&mdev->mic_mutex);
-	kfree(mdev->cmdline);
-
-	mdev->cmdline = kmalloc(count + 1, GFP_KERNEL);
-	if (!mdev->cmdline) {
-		count = -ENOMEM;
-		goto unlock;
-	}
-
-	strncpy(mdev->cmdline, buf, count);
-
-	if (mdev->cmdline[count - 1] == '\n')
-		mdev->cmdline[count - 1] = '\0';
-	else
-		mdev->cmdline[count] = '\0';
-unlock:
-	mutex_unlock(&mdev->mic_mutex);
-	return count;
-}
-static DEVICE_ATTR_RW(cmdline);
-
-static ssize_t
-firmware_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-	char *firmware;
-
-	if (!mdev)
-		return -EINVAL;
-
-	firmware = mdev->firmware;
-
-	if (firmware)
-		return scnprintf(buf, PAGE_SIZE, "%s\n", firmware);
-	return 0;
-}
-
-static ssize_t
-firmware_store(struct device *dev, struct device_attribute *attr,
-	       const char *buf, size_t count)
-{
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
-	if (!mdev)
-		return -EINVAL;
-
-	mutex_lock(&mdev->mic_mutex);
-	kfree(mdev->firmware);
-
-	mdev->firmware = kmalloc(count + 1, GFP_KERNEL);
-	if (!mdev->firmware) {
-		count = -ENOMEM;
-		goto unlock;
-	}
-	strncpy(mdev->firmware, buf, count);
-
-	if (mdev->firmware[count - 1] == '\n')
-		mdev->firmware[count - 1] = '\0';
-	else
-		mdev->firmware[count] = '\0';
-unlock:
-	mutex_unlock(&mdev->mic_mutex);
-	return count;
-}
-static DEVICE_ATTR_RW(firmware);
-
-static ssize_t
-ramdisk_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-	char *ramdisk;
-
-	if (!mdev)
-		return -EINVAL;
-
-	ramdisk = mdev->ramdisk;
-
-	if (ramdisk)
-		return scnprintf(buf, PAGE_SIZE, "%s\n", ramdisk);
-	return 0;
-}
-
-static ssize_t
-ramdisk_store(struct device *dev, struct device_attribute *attr,
-	      const char *buf, size_t count)
-{
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
-	if (!mdev)
-		return -EINVAL;
-
-	mutex_lock(&mdev->mic_mutex);
-	kfree(mdev->ramdisk);
-
-	mdev->ramdisk = kmalloc(count + 1, GFP_KERNEL);
-	if (!mdev->ramdisk) {
-		count = -ENOMEM;
-		goto unlock;
-	}
-
-	strncpy(mdev->ramdisk, buf, count);
-
-	if (mdev->ramdisk[count - 1] == '\n')
-		mdev->ramdisk[count - 1] = '\0';
-	else
-		mdev->ramdisk[count] = '\0';
-unlock:
-	mutex_unlock(&mdev->mic_mutex);
-	return count;
-}
-static DEVICE_ATTR_RW(ramdisk);
-
-static ssize_t
-bootmode_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-	char *bootmode;
-
-	if (!mdev)
-		return -EINVAL;
-
-	bootmode = mdev->bootmode;
-
-	if (bootmode)
-		return scnprintf(buf, PAGE_SIZE, "%s\n", bootmode);
-	return 0;
-}
-
-static ssize_t
-bootmode_store(struct device *dev, struct device_attribute *attr,
-	       const char *buf, size_t count)
-{
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
-	if (!mdev)
-		return -EINVAL;
-
-	if (!sysfs_streq(buf, "linux") && !sysfs_streq(buf, "elf"))
-		return -EINVAL;
-
-	mutex_lock(&mdev->mic_mutex);
-	kfree(mdev->bootmode);
-
-	mdev->bootmode = kmalloc(count + 1, GFP_KERNEL);
-	if (!mdev->bootmode) {
-		count = -ENOMEM;
-		goto unlock;
-	}
-
-	strncpy(mdev->bootmode, buf, count);
-
-	if (mdev->bootmode[count - 1] == '\n')
-		mdev->bootmode[count - 1] = '\0';
-	else
-		mdev->bootmode[count] = '\0';
-unlock:
-	mutex_unlock(&mdev->mic_mutex);
-	return count;
-}
-static DEVICE_ATTR_RW(bootmode);
-
-static ssize_t
-log_buf_addr_show(struct device *dev, struct device_attribute *attr,
-		  char *buf)
-{
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
-	if (!mdev)
-		return -EINVAL;
-
-	return scnprintf(buf, PAGE_SIZE, "%p\n", mdev->log_buf_addr);
-}
-
-static ssize_t
-log_buf_addr_store(struct device *dev, struct device_attribute *attr,
-		   const char *buf, size_t count)
-{
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-	int ret;
-	unsigned long addr;
-
-	if (!mdev)
-		return -EINVAL;
-
-	ret = kstrtoul(buf, 16, &addr);
-	if (ret)
-		goto exit;
-
-	mdev->log_buf_addr = (void *)addr;
-	ret = count;
-exit:
-	return ret;
-}
-static DEVICE_ATTR_RW(log_buf_addr);
-
-static ssize_t
-log_buf_len_show(struct device *dev, struct device_attribute *attr,
-		 char *buf)
-{
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-
-	if (!mdev)
-		return -EINVAL;
-
-	return scnprintf(buf, PAGE_SIZE, "%p\n", mdev->log_buf_len);
-}
-
-static ssize_t
-log_buf_len_store(struct device *dev, struct device_attribute *attr,
-		  const char *buf, size_t count)
-{
-	struct mic_device *mdev = dev_get_drvdata(dev->parent);
-	int ret;
-	unsigned long addr;
-
-	if (!mdev)
-		return -EINVAL;
-
-	ret = kstrtoul(buf, 16, &addr);
-	if (ret)
-		goto exit;
-
-	mdev->log_buf_len = (int *)addr;
-	ret = count;
-exit:
-	return ret;
-}
-static DEVICE_ATTR_RW(log_buf_len);
-
-static struct attribute *mic_default_attrs[] = {
-	&dev_attr_family.attr,
-	&dev_attr_stepping.attr,
-	&dev_attr_state.attr,
-	&dev_attr_shutdown_status.attr,
-	&dev_attr_cmdline.attr,
-	&dev_attr_firmware.attr,
-	&dev_attr_ramdisk.attr,
-	&dev_attr_bootmode.attr,
-	&dev_attr_log_buf_addr.attr,
-	&dev_attr_log_buf_len.attr,
-
-	NULL
-};
-
-ATTRIBUTE_GROUPS(mic_default);
-
-void mic_sysfs_init(struct mic_device *mdev)
-{
-	mdev->attr_group = mic_default_groups;
-}
diff --git a/kernel/drivers/misc/mic/host/mic_virtio.c b/kernel/drivers/misc/mic/host/mic_virtio.c
index a020e4eb4..58b107a24 100644
--- a/kernel/drivers/misc/mic/host/mic_virtio.c
+++ b/kernel/drivers/misc/mic/host/mic_virtio.c
@@ -23,7 +23,6 @@
 #include <linux/uaccess.h>
 #include <linux/dmaengine.h>
 #include <linux/mic_common.h>
-
 #include "../common/mic_dev.h"
 #include "mic_device.h"
 #include "mic_smpt.h"
@@ -40,7 +39,7 @@ static int mic_sync_dma(struct mic_device *mdev, dma_addr_t dst,
 {
 	int err = 0;
 	struct dma_async_tx_descriptor *tx;
-	struct dma_chan *mic_ch = mdev->dma_ch;
+	struct dma_chan *mic_ch = mdev->dma_ch[0];
 
 	if (!mic_ch) {
 		err = -EBUSY;
@@ -62,7 +61,7 @@ static int mic_sync_dma(struct mic_device *mdev, dma_addr_t dst,
 	}
 error:
 	if (err)
-		dev_err(mdev->sdev->parent, "%s %d err %d\n",
+		dev_err(&mdev->pdev->dev, "%s %d err %d\n",
 			__func__, __LINE__, err);
 	return err;
 }
@@ -80,7 +79,7 @@ static int mic_virtio_copy_to_user(struct mic_vdev *mvdev, void __user *ubuf,
 	struct mic_device *mdev = mvdev->mdev;
 	void __iomem *dbuf = mdev->aper.va + daddr;
 	struct mic_vringh *mvr = &mvdev->mvr[vr_idx];
-	size_t dma_alignment = 1 << mdev->dma_ch->device->copy_align;
+	size_t dma_alignment = 1 << mdev->dma_ch[0]->device->copy_align;
 	size_t dma_offset;
 	size_t partlen;
 	int err;
@@ -129,7 +128,7 @@ static int mic_virtio_copy_from_user(struct mic_vdev *mvdev, void __user *ubuf,
 	struct mic_device *mdev = mvdev->mdev;
 	void __iomem *dbuf = mdev->aper.va + daddr;
 	struct mic_vringh *mvr = &mvdev->mvr[vr_idx];
-	size_t dma_alignment = 1 << mdev->dma_ch->device->copy_align;
+	size_t dma_alignment = 1 << mdev->dma_ch[0]->device->copy_align;
 	size_t partlen;
 	int err;
 
@@ -440,7 +439,7 @@ void mic_virtio_reset_devices(struct mic_device *mdev)
 	struct list_head *pos, *tmp;
 	struct mic_vdev *mvdev;
 
-	dev_dbg(mdev->sdev->parent, "%s\n",  __func__);
+	dev_dbg(&mdev->pdev->dev, "%s\n",  __func__);
 
 	list_for_each_safe(pos, tmp, &mdev->vdev_list) {
 		mvdev = list_entry(pos, struct mic_vdev, list);
@@ -686,7 +685,7 @@ int mic_virtio_add_device(struct mic_vdev *mvdev,
 		mvr->head = USHRT_MAX;
 		mvr->mvdev = mvdev;
 		mvr->vrh.notify = mic_notify;
-		dev_dbg(mdev->sdev->parent,
+		dev_dbg(&mdev->pdev->dev,
 			"%s %d index %d va %p info %p vr_size 0x%x\n",
 			__func__, __LINE__, i, vr->va, vr->info, vr_size);
 		mvr->buf = (void *)__get_free_pages(GFP_KERNEL,
@@ -704,7 +703,7 @@ int mic_virtio_add_device(struct mic_vdev *mvdev,
 					       mvdev->virtio_db, MIC_INTR_DB);
 	if (IS_ERR(mvdev->virtio_cookie)) {
 		ret = PTR_ERR(mvdev->virtio_cookie);
-		dev_dbg(mdev->sdev->parent, "request irq failed\n");
+		dev_dbg(&mdev->pdev->dev, "request irq failed\n");
 		goto err;
 	}
 
@@ -720,7 +719,7 @@ int mic_virtio_add_device(struct mic_vdev *mvdev,
 	smp_wmb();
 	dd->type = type;
 
-	dev_dbg(mdev->sdev->parent, "Added virtio device id %d\n", dd->type);
+	dev_dbg(&mdev->pdev->dev, "Added virtio device id %d\n", dd->type);
 
 	db = bootparam->h2c_config_db;
 	if (db != -1)
@@ -755,7 +754,7 @@ void mic_virtio_del_device(struct mic_vdev *mvdev)
 	db = bootparam->h2c_config_db;
 	if (db == -1)
 		goto skip_hot_remove;
-	dev_dbg(mdev->sdev->parent,
+	dev_dbg(&mdev->pdev->dev,
 		"Requesting hot remove id %d\n", mvdev->virtio_id);
 	mvdev->dc->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
 	mdev->ops->send_intr(mdev, db);
@@ -765,7 +764,7 @@ void mic_virtio_del_device(struct mic_vdev *mvdev)
 		if (ret)
 			break;
 	}
-	dev_dbg(mdev->sdev->parent,
+	dev_dbg(&mdev->pdev->dev,
 		"Device id %d config_change %d guest_ack %d retry %d\n",
 		mvdev->virtio_id, mvdev->dc->config_change,
 		mvdev->dc->guest_ack, retry);
@@ -794,7 +793,7 @@ skip_hot_remove:
 		tmp_mvdev = list_entry(pos, struct mic_vdev, list);
 		if (tmp_mvdev == mvdev) {
 			list_del(pos);
-			dev_dbg(mdev->sdev->parent,
+			dev_dbg(&mdev->pdev->dev,
 				"Removing virtio device id %d\n",
 				mvdev->virtio_id);
 			break;
diff --git a/kernel/drivers/misc/mic/host/mic_virtio.h b/kernel/drivers/misc/mic/host/mic_virtio.h
index d574efb85..a80631f27 100644
--- a/kernel/drivers/misc/mic/host/mic_virtio.h
+++ b/kernel/drivers/misc/mic/host/mic_virtio.h
@@ -124,7 +124,7 @@ void mic_bh_handler(struct work_struct *work);
 /* Helper API to obtain the MIC PCIe device */
 static inline struct device *mic_dev(struct mic_vdev *mvdev)
 {
-	return mvdev->mdev->sdev->parent;
+	return &mvdev->mdev->pdev->dev;
 }
 
 /* Helper API to check if a virtio device is initialized */
diff --git a/kernel/drivers/misc/mic/host/mic_x100.c b/kernel/drivers/misc/mic/host/mic_x100.c
index b7a21e11d..8118ac48c 100644
--- a/kernel/drivers/misc/mic/host/mic_x100.c
+++ b/kernel/drivers/misc/mic/host/mic_x100.c
@@ -43,7 +43,7 @@
 static void
 mic_x100_write_spad(struct mic_device *mdev, unsigned int idx, u32 val)
 {
-	dev_dbg(mdev->sdev->parent, "Writing 0x%x to scratch pad index %d\n",
+	dev_dbg(&mdev->pdev->dev, "Writing 0x%x to scratch pad index %d\n",
 		val, idx);
 	mic_mmio_write(&mdev->mmio, val,
 		       MIC_X100_SBOX_BASE_ADDRESS +
@@ -66,7 +66,7 @@ mic_x100_read_spad(struct mic_device *mdev, unsigned int idx)
 		MIC_X100_SBOX_BASE_ADDRESS +
 		MIC_X100_SBOX_SPAD0 + idx * 4);
 
-	dev_dbg(mdev->sdev->parent,
+	dev_dbg(&mdev->pdev->dev,
 		"Reading 0x%x from scratch pad index %d\n", val, idx);
 	return val;
 }
@@ -126,7 +126,7 @@ static void mic_x100_disable_interrupts(struct mic_device *mdev)
  * @mdev: pointer to mic_device instance
  */
 static void mic_x100_send_sbox_intr(struct mic_device *mdev,
-			int doorbell)
+				    int doorbell)
 {
 	struct mic_mw *mw = &mdev->mmio;
 	u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8;
@@ -147,7 +147,7 @@ static void mic_x100_send_sbox_intr(struct mic_device *mdev,
  * @mdev: pointer to mic_device instance
  */
 static void mic_x100_send_rdmasr_intr(struct mic_device *mdev,
-			int doorbell)
+				      int doorbell)
 {
 	int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2);
 	/* Ensure that the interrupt is ordered w.r.t. previous stores. */
@@ -167,8 +167,7 @@ static void mic_x100_send_intr(struct mic_device *mdev, int doorbell)
 	if (doorbell < MIC_X100_NUM_SBOX_IRQ) {
 		mic_x100_send_sbox_intr(mdev, doorbell);
 	} else {
-		rdmasr_db = doorbell - MIC_X100_NUM_SBOX_IRQ +
-			MIC_X100_RDMASR_IRQ_BASE;
+		rdmasr_db = doorbell - MIC_X100_NUM_SBOX_IRQ;
 		mic_x100_send_rdmasr_intr(mdev, rdmasr_db);
 	}
 }
@@ -360,15 +359,14 @@ mic_x100_load_command_line(struct mic_device *mdev, const struct firmware *fw)
 
 	boot_mem = mdev->aper.len >> 20;
 	buf = kzalloc(CMDLINE_SIZE, GFP_KERNEL);
-	if (!buf) {
-		dev_err(mdev->sdev->parent,
-			"%s %d allocation failed\n", __func__, __LINE__);
+	if (!buf)
 		return -ENOMEM;
-	}
+
 	len += snprintf(buf, CMDLINE_SIZE - len,
 		" mem=%dM", boot_mem);
-	if (mdev->cmdline)
-		snprintf(buf + len, CMDLINE_SIZE - len, " %s", mdev->cmdline);
+	if (mdev->cosm_dev->cmdline)
+		snprintf(buf + len, CMDLINE_SIZE - len, " %s",
+			 mdev->cosm_dev->cmdline);
 	memcpy_toio(cmd_line_va, buf, strlen(buf) + 1);
 	kfree(buf);
 	return 0;
@@ -387,12 +385,11 @@ mic_x100_load_ramdisk(struct mic_device *mdev)
 	int rc;
 	struct boot_params __iomem *bp = mdev->aper.va + mdev->bootaddr;
 
-	rc = request_firmware(&fw,
-			mdev->ramdisk, mdev->sdev->parent);
+	rc = request_firmware(&fw, mdev->cosm_dev->ramdisk, &mdev->pdev->dev);
 	if (rc < 0) {
-		dev_err(mdev->sdev->parent,
+		dev_err(&mdev->pdev->dev,
 			"ramdisk request_firmware failed: %d %s\n",
-			rc, mdev->ramdisk);
+			rc, mdev->cosm_dev->ramdisk);
 		goto error;
 	}
 	/*
@@ -424,10 +421,10 @@ mic_x100_get_boot_addr(struct mic_device *mdev)
 
 	scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO);
 	boot_addr = MIC_X100_SPAD2_DOWNLOAD_ADDR(scratch2);
-	dev_dbg(mdev->sdev->parent, "%s %d boot_addr 0x%x\n",
+	dev_dbg(&mdev->pdev->dev, "%s %d boot_addr 0x%x\n",
 		__func__, __LINE__, boot_addr);
 	if (boot_addr > (1 << 31)) {
-		dev_err(mdev->sdev->parent,
+		dev_err(&mdev->pdev->dev,
 			"incorrect bootaddr 0x%x\n",
 			boot_addr);
 		rc = -EINVAL;
@@ -455,37 +452,37 @@ mic_x100_load_firmware(struct mic_device *mdev, const char *buf)
 	if (rc)
 		goto error;
 	/* load OS */
-	rc = request_firmware(&fw, mdev->firmware, mdev->sdev->parent);
+	rc = request_firmware(&fw, mdev->cosm_dev->firmware, &mdev->pdev->dev);
 	if (rc < 0) {
-		dev_err(mdev->sdev->parent,
+		dev_err(&mdev->pdev->dev,
 			"ramdisk request_firmware failed: %d %s\n",
-			rc, mdev->firmware);
+			rc, mdev->cosm_dev->firmware);
 		goto error;
 	}
 	if (mdev->bootaddr > mdev->aper.len - fw->size) {
 		rc = -EINVAL;
-		dev_err(mdev->sdev->parent, "%s %d rc %d bootaddr 0x%x\n",
+		dev_err(&mdev->pdev->dev, "%s %d rc %d bootaddr 0x%x\n",
 			__func__, __LINE__, rc, mdev->bootaddr);
 		release_firmware(fw);
 		goto error;
 	}
 	memcpy_toio(mdev->aper.va + mdev->bootaddr, fw->data, fw->size);
 	mdev->ops->write_spad(mdev, MIC_X100_FW_SIZE, fw->size);
-	if (!strcmp(mdev->bootmode, "elf"))
+	if (!strcmp(mdev->cosm_dev->bootmode, "flash"))
 		goto done;
 	/* load command line */
 	rc = mic_x100_load_command_line(mdev, fw);
 	if (rc) {
-		dev_err(mdev->sdev->parent, "%s %d rc %d\n",
+		dev_err(&mdev->pdev->dev, "%s %d rc %d\n",
 			__func__, __LINE__, rc);
 		goto error;
 	}
 	release_firmware(fw);
 	/* load ramdisk */
-	if (mdev->ramdisk)
+	if (mdev->cosm_dev->ramdisk)
 		rc = mic_x100_load_ramdisk(mdev);
 error:
-	dev_dbg(mdev->sdev->parent, "%s %d rc %d\n", __func__, __LINE__, rc);
+	dev_dbg(&mdev->pdev->dev, "%s %d rc %d\n", __func__, __LINE__, rc);
 done:
 	return rc;
 }
diff --git a/kernel/drivers/misc/mic/scif/Makefile b/kernel/drivers/misc/mic/scif/Makefile
new file mode 100644
index 000000000..29cfc3e51
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/Makefile
@@ -0,0 +1,20 @@
+#
+# Makefile - SCIF driver.
+# Copyright(c) 2014, Intel Corporation.
+#
+obj-$(CONFIG_SCIF) += scif.o
+scif-objs := scif_main.o
+scif-objs += scif_peer_bus.o
+scif-objs += scif_ports.o
+scif-objs += scif_debugfs.o
+scif-objs += scif_fd.o
+scif-objs += scif_api.o
+scif-objs += scif_epd.o
+scif-objs += scif_rb.o
+scif-objs += scif_nodeqp.o
+scif-objs += scif_nm.o
+scif-objs += scif_dma.o
+scif-objs += scif_fence.o
+scif-objs += scif_mmap.o
+scif-objs += scif_rma.o
+scif-objs += scif_rma_list.o
diff --git a/kernel/drivers/misc/mic/scif/scif_api.c b/kernel/drivers/misc/mic/scif/scif_api.c
new file mode 100644
index 000000000..ddc9e4b08
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_api.c
@@ -0,0 +1,1496 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/scif.h>
+#include "scif_main.h"
+#include "scif_map.h"
+
+static const char * const scif_ep_states[] = {
+	"Unbound",
+	"Bound",
+	"Listening",
+	"Connected",
+	"Connecting",
+	"Mapping",
+	"Closing",
+	"Close Listening",
+	"Disconnected",
+	"Zombie"};
+
+enum conn_async_state {
+	ASYNC_CONN_IDLE = 1,	/* ep setup for async connect */
+	ASYNC_CONN_INPROGRESS,	/* async connect in progress */
+	ASYNC_CONN_FLUSH_WORK	/* async work flush in progress  */
+};
+
+/*
+ * File operations for anonymous inode file associated with a SCIF endpoint,
+ * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
+ * poll API in the kernel and these take in a struct file *. Since a struct
+ * file is not available to kernel mode SCIF, it uses an anonymous file for
+ * this purpose.
+ */
+const struct file_operations scif_anon_fops = {
+	.owner = THIS_MODULE,
+};
+
+scif_epd_t scif_open(void)
+{
+	struct scif_endpt *ep;
+	int err;
+
+	might_sleep();
+	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
+	if (!ep)
+		goto err_ep_alloc;
+
+	ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
+	if (!ep->qp_info.qp)
+		goto err_qp_alloc;
+
+	err = scif_anon_inode_getfile(ep);
+	if (err)
+		goto err_anon_inode;
+
+	spin_lock_init(&ep->lock);
+	mutex_init(&ep->sendlock);
+	mutex_init(&ep->recvlock);
+
+	scif_rma_ep_init(ep);
+	ep->state = SCIFEP_UNBOUND;
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI open: ep %p success\n", ep);
+	return ep;
+
+err_anon_inode:
+	kfree(ep->qp_info.qp);
+err_qp_alloc:
+	kfree(ep);
+err_ep_alloc:
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(scif_open);
+
+/*
+ * scif_disconnect_ep - Disconnects the endpoint if found
+ * @epd: The end point returned from scif_open()
+ */
+static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
+{
+	struct scifmsg msg;
+	struct scif_endpt *fep = NULL;
+	struct scif_endpt *tmpep;
+	struct list_head *pos, *tmpq;
+	int err;
+
+	/*
+	 * Wake up any threads blocked in send()/recv() before closing
+	 * out the connection. Grabbing and releasing the send/recv lock
+	 * will ensure that any blocked senders/receivers have exited for
+	 * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
+	 * close. Ring 3 endpoints are not affected since close will not
+	 * be called while there are IOCTLs executing.
+	 */
+	wake_up_interruptible(&ep->sendwq);
+	wake_up_interruptible(&ep->recvwq);
+	mutex_lock(&ep->sendlock);
+	mutex_unlock(&ep->sendlock);
+	mutex_lock(&ep->recvlock);
+	mutex_unlock(&ep->recvlock);
+
+	/* Remove from the connected list */
+	mutex_lock(&scif_info.connlock);
+	list_for_each_safe(pos, tmpq, &scif_info.connected) {
+		tmpep = list_entry(pos, struct scif_endpt, list);
+		if (tmpep == ep) {
+			list_del(pos);
+			fep = tmpep;
+			spin_lock(&ep->lock);
+			break;
+		}
+	}
+
+	if (!fep) {
+		/*
+		 * The other side has completed the disconnect before
+		 * the end point can be removed from the list. Therefore
+		 * the ep lock is not locked, traverse the disconnected
+		 * list to find the endpoint and release the conn lock.
+		 */
+		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+			tmpep = list_entry(pos, struct scif_endpt, list);
+			if (tmpep == ep) {
+				list_del(pos);
+				break;
+			}
+		}
+		mutex_unlock(&scif_info.connlock);
+		return NULL;
+	}
+
+	init_completion(&ep->discon);
+	msg.uop = SCIF_DISCNCT;
+	msg.src = ep->port;
+	msg.dst = ep->peer;
+	msg.payload[0] = (u64)ep;
+	msg.payload[1] = ep->remote_ep;
+
+	err = scif_nodeqp_send(ep->remote_dev, &msg);
+	spin_unlock(&ep->lock);
+	mutex_unlock(&scif_info.connlock);
+
+	if (!err)
+		/* Wait for the remote node to respond with SCIF_DISCNT_ACK */
+		wait_for_completion_timeout(&ep->discon,
+					    SCIF_NODE_ALIVE_TIMEOUT);
+	return ep;
+}
+
+int scif_close(scif_epd_t epd)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_endpt *tmpep;
+	struct list_head *pos, *tmpq;
+	enum scif_epd_state oldstate;
+	bool flush_conn;
+
+	dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
+		ep, scif_ep_states[ep->state]);
+	might_sleep();
+	spin_lock(&ep->lock);
+	flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
+	spin_unlock(&ep->lock);
+
+	if (flush_conn)
+		flush_work(&scif_info.conn_work);
+
+	spin_lock(&ep->lock);
+	oldstate = ep->state;
+
+	ep->state = SCIFEP_CLOSING;
+
+	switch (oldstate) {
+	case SCIFEP_ZOMBIE:
+		dev_err(scif_info.mdev.this_device,
+			"SCIFAPI close: zombie state unexpected\n");
+	case SCIFEP_DISCONNECTED:
+		spin_unlock(&ep->lock);
+		scif_unregister_all_windows(epd);
+		/* Remove from the disconnected list */
+		mutex_lock(&scif_info.connlock);
+		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+			tmpep = list_entry(pos, struct scif_endpt, list);
+			if (tmpep == ep) {
+				list_del(pos);
+				break;
+			}
+		}
+		mutex_unlock(&scif_info.connlock);
+		break;
+	case SCIFEP_UNBOUND:
+	case SCIFEP_BOUND:
+	case SCIFEP_CONNECTING:
+		spin_unlock(&ep->lock);
+		break;
+	case SCIFEP_MAPPING:
+	case SCIFEP_CONNECTED:
+	case SCIFEP_CLOSING:
+	{
+		spin_unlock(&ep->lock);
+		scif_unregister_all_windows(epd);
+		scif_disconnect_ep(ep);
+		break;
+	}
+	case SCIFEP_LISTENING:
+	case SCIFEP_CLLISTEN:
+	{
+		struct scif_conreq *conreq;
+		struct scifmsg msg;
+		struct scif_endpt *aep;
+
+		spin_unlock(&ep->lock);
+		mutex_lock(&scif_info.eplock);
+
+		/* remove from listen list */
+		list_for_each_safe(pos, tmpq, &scif_info.listen) {
+			tmpep = list_entry(pos, struct scif_endpt, list);
+			if (tmpep == ep)
+				list_del(pos);
+		}
+		/* Remove any dangling accepts */
+		while (ep->acceptcnt) {
+			aep = list_first_entry(&ep->li_accept,
+					       struct scif_endpt, liacceptlist);
+			list_del(&aep->liacceptlist);
+			scif_put_port(aep->port.port);
+			list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
+				tmpep = list_entry(pos, struct scif_endpt,
+						   miacceptlist);
+				if (tmpep == aep) {
+					list_del(pos);
+					break;
+				}
+			}
+			mutex_unlock(&scif_info.eplock);
+			mutex_lock(&scif_info.connlock);
+			list_for_each_safe(pos, tmpq, &scif_info.connected) {
+				tmpep = list_entry(pos,
+						   struct scif_endpt, list);
+				if (tmpep == aep) {
+					list_del(pos);
+					break;
+				}
+			}
+			list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+				tmpep = list_entry(pos,
+						   struct scif_endpt, list);
+				if (tmpep == aep) {
+					list_del(pos);
+					break;
+				}
+			}
+			mutex_unlock(&scif_info.connlock);
+			scif_teardown_ep(aep);
+			mutex_lock(&scif_info.eplock);
+			scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
+			ep->acceptcnt--;
+		}
+
+		spin_lock(&ep->lock);
+		mutex_unlock(&scif_info.eplock);
+
+		/* Remove and reject any pending connection requests. */
+		while (ep->conreqcnt) {
+			conreq = list_first_entry(&ep->conlist,
+						  struct scif_conreq, list);
+			list_del(&conreq->list);
+
+			msg.uop = SCIF_CNCT_REJ;
+			msg.dst.node = conreq->msg.src.node;
+			msg.dst.port = conreq->msg.src.port;
+			msg.payload[0] = conreq->msg.payload[0];
+			msg.payload[1] = conreq->msg.payload[1];
+			/*
+			 * No Error Handling on purpose for scif_nodeqp_send().
+			 * If the remote node is lost we still want free the
+			 * connection requests on the self node.
+			 */
+			scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
+					 &msg);
+			ep->conreqcnt--;
+			kfree(conreq);
+		}
+
+		spin_unlock(&ep->lock);
+		/* If a kSCIF accept is waiting wake it up */
+		wake_up_interruptible(&ep->conwq);
+		break;
+	}
+	}
+	scif_put_port(ep->port.port);
+	scif_anon_inode_fput(ep);
+	scif_teardown_ep(ep);
+	scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(scif_close);
+
+/**
+ * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
+ *			accept new connections.
+ * @epd: The end point returned from scif_open()
+ */
+int __scif_flush(scif_epd_t epd)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+	switch (ep->state) {
+	case SCIFEP_LISTENING:
+	{
+		ep->state = SCIFEP_CLLISTEN;
+
+		/* If an accept is waiting wake it up */
+		wake_up_interruptible(&ep->conwq);
+		break;
+	}
+	default:
+		break;
+	}
+	return 0;
+}
+
+int scif_bind(scif_epd_t epd, u16 pn)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int ret = 0;
+	int tmp;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI bind: ep %p %s requested port number %d\n",
+		ep, scif_ep_states[ep->state], pn);
+	if (pn) {
+		/*
+		 * Similar to IETF RFC 1700, SCIF ports below
+		 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
+		 * processes or by processes executed by privileged users.
+		 */
+		if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
+			ret = -EACCES;
+			goto scif_bind_admin_exit;
+		}
+	}
+
+	spin_lock(&ep->lock);
+	if (ep->state == SCIFEP_BOUND) {
+		ret = -EINVAL;
+		goto scif_bind_exit;
+	} else if (ep->state != SCIFEP_UNBOUND) {
+		ret = -EISCONN;
+		goto scif_bind_exit;
+	}
+
+	if (pn) {
+		tmp = scif_rsrv_port(pn);
+		if (tmp != pn) {
+			ret = -EINVAL;
+			goto scif_bind_exit;
+		}
+	} else {
+		pn = scif_get_new_port();
+		if (!pn) {
+			ret = -ENOSPC;
+			goto scif_bind_exit;
+		}
+	}
+
+	ep->state = SCIFEP_BOUND;
+	ep->port.node = scif_info.nodeid;
+	ep->port.port = pn;
+	ep->conn_async_state = ASYNC_CONN_IDLE;
+	ret = pn;
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI bind: bound to port number %d\n", pn);
+scif_bind_exit:
+	spin_unlock(&ep->lock);
+scif_bind_admin_exit:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(scif_bind);
+
+int scif_listen(scif_epd_t epd, int backlog)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
+	spin_lock(&ep->lock);
+	switch (ep->state) {
+	case SCIFEP_ZOMBIE:
+	case SCIFEP_CLOSING:
+	case SCIFEP_CLLISTEN:
+	case SCIFEP_UNBOUND:
+	case SCIFEP_DISCONNECTED:
+		spin_unlock(&ep->lock);
+		return -EINVAL;
+	case SCIFEP_LISTENING:
+	case SCIFEP_CONNECTED:
+	case SCIFEP_CONNECTING:
+	case SCIFEP_MAPPING:
+		spin_unlock(&ep->lock);
+		return -EISCONN;
+	case SCIFEP_BOUND:
+		break;
+	}
+
+	ep->state = SCIFEP_LISTENING;
+	ep->backlog = backlog;
+
+	ep->conreqcnt = 0;
+	ep->acceptcnt = 0;
+	INIT_LIST_HEAD(&ep->conlist);
+	init_waitqueue_head(&ep->conwq);
+	INIT_LIST_HEAD(&ep->li_accept);
+	spin_unlock(&ep->lock);
+
+	/*
+	 * Listen status is complete so delete the qp information not needed
+	 * on a listen before placing on the list of listening ep's
+	 */
+	scif_teardown_ep(ep);
+	ep->qp_info.qp = NULL;
+
+	mutex_lock(&scif_info.eplock);
+	list_add_tail(&ep->list, &scif_info.listen);
+	mutex_unlock(&scif_info.eplock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(scif_listen);
+
+/*
+ ************************************************************************
+ * SCIF connection flow:
+ *
+ * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
+ *	connections via a SCIF_CNCT_REQ message
+ * 2) A SCIF endpoint can initiate a SCIF connection by calling
+ *	scif_connect(..) which calls scif_setup_qp_connect(..) which
+ *	allocates the local qp for the endpoint ring buffer and then sends
+ *	a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
+ *	a SCIF_CNCT_REJ message
+ * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
+ *	wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
+ *	message otherwise
+ * 4) A thread blocked waiting for incoming connections allocates its local
+ *	endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
+ *	and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
+ *	the node sends a SCIF_CNCT_REJ message
+ * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
+ *	connecting endpoint is woken up as part of handling
+ *	scif_cnctgnt_resp(..) following which it maps the remote endpoints'
+ *	QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
+ *	success or a SCIF_CNCT_GNTNACK message on failure and completes
+ *	the scif_connect(..) API
+ * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
+ *	in step 4 is woken up and completes the scif_accept(..) API
+ * 7) The SCIF connection is now established between the two SCIF endpoints.
+ */
+static int scif_conn_func(struct scif_endpt *ep)
+{
+	int err = 0;
+	struct scifmsg msg;
+	struct device *spdev;
+
+	err = scif_reserve_dma_chan(ep);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		ep->state = SCIFEP_BOUND;
+		goto connect_error_simple;
+	}
+	/* Initiate the first part of the endpoint QP setup */
+	err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
+				    SCIF_ENDPT_QP_SIZE, ep->remote_dev);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s err %d qp_offset 0x%llx\n",
+			__func__, err, ep->qp_info.qp_offset);
+		ep->state = SCIFEP_BOUND;
+		goto connect_error_simple;
+	}
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		goto cleanup_qp;
+	}
+	/* Format connect message and send it */
+	msg.src = ep->port;
+	msg.dst = ep->conn_port;
+	msg.uop = SCIF_CNCT_REQ;
+	msg.payload[0] = (u64)ep;
+	msg.payload[1] = ep->qp_info.qp_offset;
+	err = _scif_nodeqp_send(ep->remote_dev, &msg);
+	if (err)
+		goto connect_error_dec;
+	scif_put_peer_dev(spdev);
+	/*
+	 * Wait for the remote node to respond with SCIF_CNCT_GNT or
+	 * SCIF_CNCT_REJ message.
+	 */
+	err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
+				 SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d timeout\n", __func__, __LINE__);
+		ep->state = SCIFEP_BOUND;
+	}
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		goto cleanup_qp;
+	}
+	if (ep->state == SCIFEP_MAPPING) {
+		err = scif_setup_qp_connect_response(ep->remote_dev,
+						     ep->qp_info.qp,
+						     ep->qp_info.gnt_pld);
+		/*
+		 * If the resource to map the queue are not available then
+		 * we need to tell the other side to terminate the accept
+		 */
+		if (err) {
+			dev_err(&ep->remote_dev->sdev->dev,
+				"%s %d err %d\n", __func__, __LINE__, err);
+			msg.uop = SCIF_CNCT_GNTNACK;
+			msg.payload[0] = ep->remote_ep;
+			_scif_nodeqp_send(ep->remote_dev, &msg);
+			ep->state = SCIFEP_BOUND;
+			goto connect_error_dec;
+		}
+
+		msg.uop = SCIF_CNCT_GNTACK;
+		msg.payload[0] = ep->remote_ep;
+		err = _scif_nodeqp_send(ep->remote_dev, &msg);
+		if (err) {
+			ep->state = SCIFEP_BOUND;
+			goto connect_error_dec;
+		}
+		ep->state = SCIFEP_CONNECTED;
+		mutex_lock(&scif_info.connlock);
+		list_add_tail(&ep->list, &scif_info.connected);
+		mutex_unlock(&scif_info.connlock);
+		dev_dbg(&ep->remote_dev->sdev->dev,
+			"SCIFAPI connect: ep %p connected\n", ep);
+	} else if (ep->state == SCIFEP_BOUND) {
+		dev_dbg(&ep->remote_dev->sdev->dev,
+			"SCIFAPI connect: ep %p connection refused\n", ep);
+		err = -ECONNREFUSED;
+		goto connect_error_dec;
+	}
+	scif_put_peer_dev(spdev);
+	return err;
+connect_error_dec:
+	scif_put_peer_dev(spdev);
+cleanup_qp:
+	scif_cleanup_ep_qp(ep);
+connect_error_simple:
+	return err;
+}
+
+/*
+ * scif_conn_handler:
+ *
+ * Workqueue handler for servicing non-blocking SCIF connect
+ *
+ */
+void scif_conn_handler(struct work_struct *work)
+{
+	struct scif_endpt *ep;
+
+	do {
+		ep = NULL;
+		spin_lock(&scif_info.nb_connect_lock);
+		if (!list_empty(&scif_info.nb_connect_list)) {
+			ep = list_first_entry(&scif_info.nb_connect_list,
+					      struct scif_endpt, conn_list);
+			list_del(&ep->conn_list);
+		}
+		spin_unlock(&scif_info.nb_connect_lock);
+		if (ep) {
+			ep->conn_err = scif_conn_func(ep);
+			wake_up_interruptible(&ep->conn_pend_wq);
+		}
+	} while (ep);
+}
+
+int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+	struct scif_dev *remote_dev;
+	struct device *spdev;
+
+	dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
+		scif_ep_states[ep->state]);
+
+	if (!scif_dev || dst->node > scif_info.maxid)
+		return -ENODEV;
+
+	might_sleep();
+
+	remote_dev = &scif_dev[dst->node];
+	spdev = scif_get_peer_dev(remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		return err;
+	}
+
+	spin_lock(&ep->lock);
+	switch (ep->state) {
+	case SCIFEP_ZOMBIE:
+	case SCIFEP_CLOSING:
+		err = -EINVAL;
+		break;
+	case SCIFEP_DISCONNECTED:
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+		else
+			err = -EINVAL;
+		break;
+	case SCIFEP_LISTENING:
+	case SCIFEP_CLLISTEN:
+		err = -EOPNOTSUPP;
+		break;
+	case SCIFEP_CONNECTING:
+	case SCIFEP_MAPPING:
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+			err = -EINPROGRESS;
+		else
+			err = -EISCONN;
+		break;
+	case SCIFEP_CONNECTED:
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+		else
+			err = -EISCONN;
+		break;
+	case SCIFEP_UNBOUND:
+		ep->port.port = scif_get_new_port();
+		if (!ep->port.port) {
+			err = -ENOSPC;
+		} else {
+			ep->port.node = scif_info.nodeid;
+			ep->conn_async_state = ASYNC_CONN_IDLE;
+		}
+		/* Fall through */
+	case SCIFEP_BOUND:
+		/*
+		 * If a non-blocking connect has been already initiated
+		 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
+		 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
+		 * SCIF_BOUND due an error in the connection process
+		 * (e.g., connection refused) If conn_async_state is
+		 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
+		 * so that the error status can be collected. If the state is
+		 * already ASYNC_CONN_FLUSH_WORK - then set the error to
+		 * EINPROGRESS since some other thread is waiting to collect
+		 * error status.
+		 */
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+		} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
+			err = -EINPROGRESS;
+		} else {
+			ep->conn_port = *dst;
+			init_waitqueue_head(&ep->sendwq);
+			init_waitqueue_head(&ep->recvwq);
+			init_waitqueue_head(&ep->conwq);
+			ep->conn_async_state = 0;
+
+			if (unlikely(non_block))
+				ep->conn_async_state = ASYNC_CONN_INPROGRESS;
+		}
+		break;
+	}
+
+	if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
+			goto connect_simple_unlock1;
+
+	ep->state = SCIFEP_CONNECTING;
+	ep->remote_dev = &scif_dev[dst->node];
+	ep->qp_info.qp->magic = SCIFEP_MAGIC;
+	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+		init_waitqueue_head(&ep->conn_pend_wq);
+		spin_lock(&scif_info.nb_connect_lock);
+		list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
+		spin_unlock(&scif_info.nb_connect_lock);
+		err = -EINPROGRESS;
+		schedule_work(&scif_info.conn_work);
+	}
+connect_simple_unlock1:
+	spin_unlock(&ep->lock);
+	scif_put_peer_dev(spdev);
+	if (err) {
+		return err;
+	} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
+		flush_work(&scif_info.conn_work);
+		err = ep->conn_err;
+		spin_lock(&ep->lock);
+		ep->conn_async_state = ASYNC_CONN_IDLE;
+		spin_unlock(&ep->lock);
+	} else {
+		err = scif_conn_func(ep);
+	}
+	return err;
+}
+
+int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
+{
+	return __scif_connect(epd, dst, false);
+}
+EXPORT_SYMBOL_GPL(scif_connect);
+
+/**
+ * scif_accept() - Accept a connection request from the remote node
+ *
+ * The function accepts a connection request from the remote node.  Successful
+ * complete is indicate by a new end point being created and passed back
+ * to the caller for future reference.
+ *
+ * Upon successful complete a zero will be returned and the peer information
+ * will be filled in.
+ *
+ * If the end point is not in the listening state -EINVAL will be returned.
+ *
+ * If during the connection sequence resource allocation fails the -ENOMEM
+ * will be returned.
+ *
+ * If the function is called with the ASYNC flag set and no connection requests
+ * are pending it will return -EAGAIN.
+ *
+ * If the remote side is not sending any connection requests the caller may
+ * terminate this function with a signal.  If so a -EINTR will be returned.
+ */
+int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
+		scif_epd_t *newepd, int flags)
+{
+	struct scif_endpt *lep = (struct scif_endpt *)epd;
+	struct scif_endpt *cep;
+	struct scif_conreq *conreq;
+	struct scifmsg msg;
+	int err;
+	struct device *spdev;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
+
+	if (flags & ~SCIF_ACCEPT_SYNC)
+		return -EINVAL;
+
+	if (!peer || !newepd)
+		return -EINVAL;
+
+	might_sleep();
+	spin_lock(&lep->lock);
+	if (lep->state != SCIFEP_LISTENING) {
+		spin_unlock(&lep->lock);
+		return -EINVAL;
+	}
+
+	if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
+		/* No connection request present and we do not want to wait */
+		spin_unlock(&lep->lock);
+		return -EAGAIN;
+	}
+
+	lep->files = current->files;
+retry_connection:
+	spin_unlock(&lep->lock);
+	/* Wait for the remote node to send us a SCIF_CNCT_REQ */
+	err = wait_event_interruptible(lep->conwq,
+				       (lep->conreqcnt ||
+				       (lep->state != SCIFEP_LISTENING)));
+	if (err)
+		return err;
+
+	if (lep->state != SCIFEP_LISTENING)
+		return -EINTR;
+
+	spin_lock(&lep->lock);
+
+	if (!lep->conreqcnt)
+		goto retry_connection;
+
+	/* Get the first connect request off the list */
+	conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
+	list_del(&conreq->list);
+	lep->conreqcnt--;
+	spin_unlock(&lep->lock);
+
+	/* Fill in the peer information */
+	peer->node = conreq->msg.src.node;
+	peer->port = conreq->msg.src.port;
+
+	cep = kzalloc(sizeof(*cep), GFP_KERNEL);
+	if (!cep) {
+		err = -ENOMEM;
+		goto scif_accept_error_epalloc;
+	}
+	spin_lock_init(&cep->lock);
+	mutex_init(&cep->sendlock);
+	mutex_init(&cep->recvlock);
+	cep->state = SCIFEP_CONNECTING;
+	cep->remote_dev = &scif_dev[peer->node];
+	cep->remote_ep = conreq->msg.payload[0];
+
+	scif_rma_ep_init(cep);
+
+	err = scif_reserve_dma_chan(cep);
+	if (err) {
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto scif_accept_error_qpalloc;
+	}
+
+	cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
+	if (!cep->qp_info.qp) {
+		err = -ENOMEM;
+		goto scif_accept_error_qpalloc;
+	}
+
+	err = scif_anon_inode_getfile(cep);
+	if (err)
+		goto scif_accept_error_anon_inode;
+
+	cep->qp_info.qp->magic = SCIFEP_MAGIC;
+	spdev = scif_get_peer_dev(cep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		goto scif_accept_error_map;
+	}
+	err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
+				   conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
+				   cep->remote_dev);
+	if (err) {
+		dev_dbg(&cep->remote_dev->sdev->dev,
+			"SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
+			lep, cep, err, cep->qp_info.qp_offset);
+		scif_put_peer_dev(spdev);
+		goto scif_accept_error_map;
+	}
+
+	cep->port.node = lep->port.node;
+	cep->port.port = lep->port.port;
+	cep->peer.node = peer->node;
+	cep->peer.port = peer->port;
+	init_waitqueue_head(&cep->sendwq);
+	init_waitqueue_head(&cep->recvwq);
+	init_waitqueue_head(&cep->conwq);
+
+	msg.uop = SCIF_CNCT_GNT;
+	msg.src = cep->port;
+	msg.payload[0] = cep->remote_ep;
+	msg.payload[1] = cep->qp_info.qp_offset;
+	msg.payload[2] = (u64)cep;
+
+	err = _scif_nodeqp_send(cep->remote_dev, &msg);
+	scif_put_peer_dev(spdev);
+	if (err)
+		goto scif_accept_error_map;
+retry:
+	/* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
+	err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
+				 SCIF_NODE_ACCEPT_TIMEOUT);
+	if (!err && scifdev_alive(cep))
+		goto retry;
+	err = !err ? -ENODEV : 0;
+	if (err)
+		goto scif_accept_error_map;
+	kfree(conreq);
+
+	spin_lock(&cep->lock);
+
+	if (cep->state == SCIFEP_CLOSING) {
+		/*
+		 * Remote failed to allocate resources and NAKed the grant.
+		 * There is at this point nothing referencing the new end point.
+		 */
+		spin_unlock(&cep->lock);
+		scif_teardown_ep(cep);
+		kfree(cep);
+
+		/* If call with sync flag then go back and wait. */
+		if (flags & SCIF_ACCEPT_SYNC) {
+			spin_lock(&lep->lock);
+			goto retry_connection;
+		}
+		return -EAGAIN;
+	}
+
+	scif_get_port(cep->port.port);
+	*newepd = (scif_epd_t)cep;
+	spin_unlock(&cep->lock);
+	return 0;
+scif_accept_error_map:
+	scif_anon_inode_fput(cep);
+scif_accept_error_anon_inode:
+	scif_teardown_ep(cep);
+scif_accept_error_qpalloc:
+	kfree(cep);
+scif_accept_error_epalloc:
+	msg.uop = SCIF_CNCT_REJ;
+	msg.dst.node = conreq->msg.src.node;
+	msg.dst.port = conreq->msg.src.port;
+	msg.payload[0] = conreq->msg.payload[0];
+	msg.payload[1] = conreq->msg.payload[1];
+	scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
+	kfree(conreq);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_accept);
+
+/*
+ * scif_msg_param_check:
+ * @epd: The end point returned from scif_open()
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
+ */
+static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
+{
+	int ret = -EINVAL;
+
+	if (len < 0)
+		goto err_ret;
+	if (flags && (!(flags & SCIF_RECV_BLOCK)))
+		goto err_ret;
+	ret = 0;
+err_ret:
+	return ret;
+}
+
+static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scifmsg notif_msg;
+	int curr_xfer_len = 0, sent_len = 0, write_count;
+	int ret = 0;
+	struct scif_qp *qp = ep->qp_info.qp;
+
+	if (flags & SCIF_SEND_BLOCK)
+		might_sleep();
+
+	spin_lock(&ep->lock);
+	while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
+		write_count = scif_rb_space(&qp->outbound_q);
+		if (write_count) {
+			/* Best effort to send as much data as possible */
+			curr_xfer_len = min(len - sent_len, write_count);
+			ret = scif_rb_write(&qp->outbound_q, msg,
+					    curr_xfer_len);
+			if (ret < 0)
+				break;
+			/* Success. Update write pointer */
+			scif_rb_commit(&qp->outbound_q);
+			/*
+			 * Send a notification to the peer about the
+			 * produced data message.
+			 */
+			notif_msg.src = ep->port;
+			notif_msg.uop = SCIF_CLIENT_SENT;
+			notif_msg.payload[0] = ep->remote_ep;
+			ret = _scif_nodeqp_send(ep->remote_dev, &notif_msg);
+			if (ret)
+				break;
+			sent_len += curr_xfer_len;
+			msg = msg + curr_xfer_len;
+			continue;
+		}
+		curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
+		/* Not enough RB space. return for the Non Blocking case */
+		if (!(flags & SCIF_SEND_BLOCK))
+			break;
+
+		spin_unlock(&ep->lock);
+		/* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
+		ret =
+		wait_event_interruptible(ep->sendwq,
+					 (SCIFEP_CONNECTED != ep->state) ||
+					 (scif_rb_space(&qp->outbound_q) >=
+					 curr_xfer_len));
+		spin_lock(&ep->lock);
+		if (ret)
+			break;
+	}
+	if (sent_len)
+		ret = sent_len;
+	else if (!ret && SCIFEP_CONNECTED != ep->state)
+		ret = SCIFEP_DISCONNECTED == ep->state ?
+			-ECONNRESET : -ENOTCONN;
+	spin_unlock(&ep->lock);
+	return ret;
+}
+
+static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
+{
+	int read_size;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scifmsg notif_msg;
+	int curr_recv_len = 0, remaining_len = len, read_count;
+	int ret = 0;
+	struct scif_qp *qp = ep->qp_info.qp;
+
+	if (flags & SCIF_RECV_BLOCK)
+		might_sleep();
+	spin_lock(&ep->lock);
+	while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
+				 SCIFEP_DISCONNECTED == ep->state)) {
+		read_count = scif_rb_count(&qp->inbound_q, remaining_len);
+		if (read_count) {
+			/*
+			 * Best effort to recv as much data as there
+			 * are bytes to read in the RB particularly
+			 * important for the Non Blocking case.
+			 */
+			curr_recv_len = min(remaining_len, read_count);
+			read_size = scif_rb_get_next(&qp->inbound_q,
+						     msg, curr_recv_len);
+			if (ep->state == SCIFEP_CONNECTED) {
+				/*
+				 * Update the read pointer only if the endpoint
+				 * is still connected else the read pointer
+				 * might no longer exist since the peer has
+				 * freed resources!
+				 */
+				scif_rb_update_read_ptr(&qp->inbound_q);
+				/*
+				 * Send a notification to the peer about the
+				 * consumed data message only if the EP is in
+				 * SCIFEP_CONNECTED state.
+				 */
+				notif_msg.src = ep->port;
+				notif_msg.uop = SCIF_CLIENT_RCVD;
+				notif_msg.payload[0] = ep->remote_ep;
+				ret = _scif_nodeqp_send(ep->remote_dev,
+							&notif_msg);
+				if (ret)
+					break;
+			}
+			remaining_len -= curr_recv_len;
+			msg = msg + curr_recv_len;
+			continue;
+		}
+		/*
+		 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
+		 * we will keep looping forever.
+		 */
+		if (ep->state == SCIFEP_DISCONNECTED)
+			break;
+		/*
+		 * Return in the Non Blocking case if there is no data
+		 * to read in this iteration.
+		 */
+		if (!(flags & SCIF_RECV_BLOCK))
+			break;
+		curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
+		spin_unlock(&ep->lock);
+		/*
+		 * Wait for a SCIF_CLIENT_SEND message in the blocking case
+		 * or until other side disconnects.
+		 */
+		ret =
+		wait_event_interruptible(ep->recvwq,
+					 SCIFEP_CONNECTED != ep->state ||
+					 scif_rb_count(&qp->inbound_q,
+						       curr_recv_len)
+					 >= curr_recv_len);
+		spin_lock(&ep->lock);
+		if (ret)
+			break;
+	}
+	if (len - remaining_len)
+		ret = len - remaining_len;
+	else if (!ret && ep->state != SCIFEP_CONNECTED)
+		ret = ep->state == SCIFEP_DISCONNECTED ?
+			-ECONNRESET : -ENOTCONN;
+	spin_unlock(&ep->lock);
+	return ret;
+}
+
+/**
+ * scif_user_send() - Send data to connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the driver IOCTL entry point
+ * only and is a wrapper for _scif_send().
+ */
+int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+	int sent_len = 0;
+	char *tmp;
+	int loop_len;
+	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
+	if (!len)
+		return 0;
+
+	err = scif_msg_param_check(epd, len, flags);
+	if (err)
+		goto send_err;
+
+	tmp = kmalloc(chunk_len, GFP_KERNEL);
+	if (!tmp) {
+		err = -ENOMEM;
+		goto send_err;
+	}
+	/*
+	 * Grabbing the lock before breaking up the transfer in
+	 * multiple chunks is required to ensure that messages do
+	 * not get fragmented and reordered.
+	 */
+	mutex_lock(&ep->sendlock);
+	while (sent_len != len) {
+		loop_len = len - sent_len;
+		loop_len = min(chunk_len, loop_len);
+		if (copy_from_user(tmp, msg, loop_len)) {
+			err = -EFAULT;
+			goto send_free_err;
+		}
+		err = _scif_send(epd, tmp, loop_len, flags);
+		if (err < 0)
+			goto send_free_err;
+		sent_len += err;
+		msg += err;
+		if (err != loop_len)
+			goto send_free_err;
+	}
+send_free_err:
+	mutex_unlock(&ep->sendlock);
+	kfree(tmp);
+send_err:
+	return err < 0 ? err : sent_len;
+}
+
+/**
+ * scif_user_recv() - Receive data from connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the driver IOCTL entry point
+ * only and is a wrapper for _scif_recv().
+ */
+int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+	int recv_len = 0;
+	char *tmp;
+	int loop_len;
+	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
+	if (!len)
+		return 0;
+
+	err = scif_msg_param_check(epd, len, flags);
+	if (err)
+		goto recv_err;
+
+	tmp = kmalloc(chunk_len, GFP_KERNEL);
+	if (!tmp) {
+		err = -ENOMEM;
+		goto recv_err;
+	}
+	/*
+	 * Grabbing the lock before breaking up the transfer in
+	 * multiple chunks is required to ensure that messages do
+	 * not get fragmented and reordered.
+	 */
+	mutex_lock(&ep->recvlock);
+	while (recv_len != len) {
+		loop_len = len - recv_len;
+		loop_len = min(chunk_len, loop_len);
+		err = _scif_recv(epd, tmp, loop_len, flags);
+		if (err < 0)
+			goto recv_free_err;
+		if (copy_to_user(msg, tmp, err)) {
+			err = -EFAULT;
+			goto recv_free_err;
+		}
+		recv_len += err;
+		msg += err;
+		if (err != loop_len)
+			goto recv_free_err;
+	}
+recv_free_err:
+	mutex_unlock(&ep->recvlock);
+	kfree(tmp);
+recv_err:
+	return err < 0 ? err : recv_len;
+}
+
+/**
+ * scif_send() - Send data to connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the kernel mode only and is
+ * a wrapper for _scif_send().
+ */
+int scif_send(scif_epd_t epd, void *msg, int len, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int ret;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
+	if (!len)
+		return 0;
+
+	ret = scif_msg_param_check(epd, len, flags);
+	if (ret)
+		return ret;
+	if (!ep->remote_dev)
+		return -ENOTCONN;
+	/*
+	 * Grab the mutex lock in the blocking case only
+	 * to ensure messages do not get fragmented/reordered.
+	 * The non blocking mode is protected using spin locks
+	 * in _scif_send().
+	 */
+	if (flags & SCIF_SEND_BLOCK)
+		mutex_lock(&ep->sendlock);
+
+	ret = _scif_send(epd, msg, len, flags);
+
+	if (flags & SCIF_SEND_BLOCK)
+		mutex_unlock(&ep->sendlock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(scif_send);
+
+/**
+ * scif_recv() - Receive data from connection queue
+ * @epd: The end point returned from scif_open()
+ * @msg: Address to place data
+ * @len: Length to receive
+ * @flags: blocking or non blocking
+ *
+ * This function is called from the kernel mode only and is
+ * a wrapper for _scif_recv().
+ */
+int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int ret;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
+	if (!len)
+		return 0;
+
+	ret = scif_msg_param_check(epd, len, flags);
+	if (ret)
+		return ret;
+	/*
+	 * Grab the mutex lock in the blocking case only
+	 * to ensure messages do not get fragmented/reordered.
+	 * The non blocking mode is protected using spin locks
+	 * in _scif_send().
+	 */
+	if (flags & SCIF_RECV_BLOCK)
+		mutex_lock(&ep->recvlock);
+
+	ret = _scif_recv(epd, msg, len, flags);
+
+	if (flags & SCIF_RECV_BLOCK)
+		mutex_unlock(&ep->recvlock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(scif_recv);
+
+static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
+				   poll_table *p, struct scif_endpt *ep)
+{
+	/*
+	 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
+	 * and regrab it afterwards. Because the endpoint state might have
+	 * changed while the lock was given up, the state must be checked
+	 * again after re-acquiring the lock. The code in __scif_pollfd(..)
+	 * does this.
+	 */
+	spin_unlock(&ep->lock);
+	poll_wait(f, wq, p);
+	spin_lock(&ep->lock);
+}
+
+unsigned int
+__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
+{
+	unsigned int mask = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
+
+	spin_lock(&ep->lock);
+
+	/* Endpoint is waiting for a non-blocking connect to complete */
+	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+		_scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+			if (ep->state == SCIFEP_CONNECTED ||
+			    ep->state == SCIFEP_DISCONNECTED ||
+			    ep->conn_err)
+				mask |= POLLOUT;
+			goto exit;
+		}
+	}
+
+	/* Endpoint is listening for incoming connection requests */
+	if (ep->state == SCIFEP_LISTENING) {
+		_scif_poll_wait(f, &ep->conwq, wait, ep);
+		if (ep->state == SCIFEP_LISTENING) {
+			if (ep->conreqcnt)
+				mask |= POLLIN;
+			goto exit;
+		}
+	}
+
+	/* Endpoint is connected or disconnected */
+	if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
+		if (poll_requested_events(wait) & POLLIN)
+			_scif_poll_wait(f, &ep->recvwq, wait, ep);
+		if (poll_requested_events(wait) & POLLOUT)
+			_scif_poll_wait(f, &ep->sendwq, wait, ep);
+		if (ep->state == SCIFEP_CONNECTED ||
+		    ep->state == SCIFEP_DISCONNECTED) {
+			/* Data can be read without blocking */
+			if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
+				mask |= POLLIN;
+			/* Data can be written without blocking */
+			if (scif_rb_space(&ep->qp_info.qp->outbound_q))
+				mask |= POLLOUT;
+			/* Return POLLHUP if endpoint is disconnected */
+			if (ep->state == SCIFEP_DISCONNECTED)
+				mask |= POLLHUP;
+			goto exit;
+		}
+	}
+
+	/* Return POLLERR if the endpoint is in none of the above states */
+	mask |= POLLERR;
+exit:
+	spin_unlock(&ep->lock);
+	return mask;
+}
+
+/**
+ * scif_poll() - Kernel mode SCIF poll
+ * @ufds: Array of scif_pollepd structures containing the end points
+ *	  and events to poll on
+ * @nfds: Size of the ufds array
+ * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
+ *
+ * The code flow in this function is based on do_poll(..) in select.c
+ *
+ * Returns the number of endpoints which have pending events or 0 in
+ * the event of a timeout. If a signal is used for wake up, -EINTR is
+ * returned.
+ */
+int
+scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
+{
+	struct poll_wqueues table;
+	poll_table *pt;
+	int i, mask, count = 0, timed_out = timeout_msecs == 0;
+	u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
+		: msecs_to_jiffies(timeout_msecs);
+
+	poll_initwait(&table);
+	pt = &table.pt;
+	while (1) {
+		for (i = 0; i < nfds; i++) {
+			pt->_key = ufds[i].events | POLLERR | POLLHUP;
+			mask = __scif_pollfd(ufds[i].epd->anon,
+					     pt, ufds[i].epd);
+			mask &= ufds[i].events | POLLERR | POLLHUP;
+			if (mask) {
+				count++;
+				pt->_qproc = NULL;
+			}
+			ufds[i].revents = mask;
+		}
+		pt->_qproc = NULL;
+		if (!count) {
+			count = table.error;
+			if (signal_pending(current))
+				count = -EINTR;
+		}
+		if (count || timed_out)
+			break;
+
+		if (!schedule_timeout_interruptible(timeout))
+			timed_out = 1;
+	}
+	poll_freewait(&table);
+	return count;
+}
+EXPORT_SYMBOL_GPL(scif_poll);
+
+int scif_get_node_ids(u16 *nodes, int len, u16 *self)
+{
+	int online = 0;
+	int offset = 0;
+	int node;
+
+	if (!scif_is_mgmt_node())
+		scif_get_node_info();
+
+	*self = scif_info.nodeid;
+	mutex_lock(&scif_info.conflock);
+	len = min_t(int, len, scif_info.total);
+	for (node = 0; node <= scif_info.maxid; node++) {
+		if (_scifdev_alive(&scif_dev[node])) {
+			online++;
+			if (offset < len)
+				nodes[offset++] = node;
+		}
+	}
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
+		scif_info.total, online, offset);
+	mutex_unlock(&scif_info.conflock);
+
+	return online;
+}
+EXPORT_SYMBOL_GPL(scif_get_node_ids);
+
+static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
+{
+	struct scif_client *client =
+		container_of(si, struct scif_client, si);
+	struct scif_peer_dev *spdev =
+		container_of(dev, struct scif_peer_dev, dev);
+
+	if (client->probe)
+		client->probe(spdev);
+	return 0;
+}
+
+static void scif_remove_client_dev(struct device *dev,
+				   struct subsys_interface *si)
+{
+	struct scif_client *client =
+		container_of(si, struct scif_client, si);
+	struct scif_peer_dev *spdev =
+		container_of(dev, struct scif_peer_dev, dev);
+
+	if (client->remove)
+		client->remove(spdev);
+}
+
+void scif_client_unregister(struct scif_client *client)
+{
+	subsys_interface_unregister(&client->si);
+}
+EXPORT_SYMBOL_GPL(scif_client_unregister);
+
+int scif_client_register(struct scif_client *client)
+{
+	struct subsys_interface *si = &client->si;
+
+	si->name = client->name;
+	si->subsys = &scif_peer_bus;
+	si->add_dev = scif_add_client_dev;
+	si->remove_dev = scif_remove_client_dev;
+
+	return subsys_interface_register(&client->si);
+}
+EXPORT_SYMBOL_GPL(scif_client_register);
diff --git a/kernel/drivers/misc/mic/scif/scif_debugfs.c b/kernel/drivers/misc/mic/scif/scif_debugfs.c
new file mode 100644
index 000000000..6884dad97
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_debugfs.c
@@ -0,0 +1,162 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "../common/mic_dev.h"
+#include "scif_main.h"
+
+/* Debugfs parent dir */
+static struct dentry *scif_dbg;
+
+static int scif_dev_test(struct seq_file *s, void *unused)
+{
+	int node;
+
+	seq_printf(s, "Total Nodes %d Self Node Id %d Maxid %d\n",
+		   scif_info.total, scif_info.nodeid,
+		   scif_info.maxid);
+
+	if (!scif_dev)
+		return 0;
+
+	seq_printf(s, "%-16s\t%-16s\n", "node_id", "state");
+
+	for (node = 0; node <= scif_info.maxid; node++)
+		seq_printf(s, "%-16d\t%-16s\n", scif_dev[node].node,
+			   _scifdev_alive(&scif_dev[node]) ?
+			   "Running" : "Offline");
+	return 0;
+}
+
+static int scif_dev_test_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, scif_dev_test, inode->i_private);
+}
+
+static int scif_dev_test_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations scif_dev_ops = {
+	.owner   = THIS_MODULE,
+	.open    = scif_dev_test_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = scif_dev_test_release
+};
+
+static void scif_display_window(struct scif_window *window, struct seq_file *s)
+{
+	int j;
+	struct scatterlist *sg;
+	scif_pinned_pages_t pin = window->pinned_pages;
+
+	seq_printf(s, "window %p type %d temp %d offset 0x%llx ",
+		   window, window->type, window->temp, window->offset);
+	seq_printf(s, "nr_pages 0x%llx nr_contig_chunks 0x%x prot %d ",
+		   window->nr_pages, window->nr_contig_chunks, window->prot);
+	seq_printf(s, "ref_count %d magic 0x%llx peer_window 0x%llx ",
+		   window->ref_count, window->magic, window->peer_window);
+	seq_printf(s, "unreg_state 0x%x va_for_temp 0x%lx\n",
+		   window->unreg_state, window->va_for_temp);
+
+	for (j = 0; j < window->nr_contig_chunks; j++)
+		seq_printf(s, "page[%d] dma_addr 0x%llx num_pages 0x%llx\n", j,
+			   window->dma_addr[j], window->num_pages[j]);
+
+	if (window->type == SCIF_WINDOW_SELF && pin)
+		for (j = 0; j < window->nr_pages; j++)
+			seq_printf(s, "page[%d] = pinned_pages %p address %p\n",
+				   j, pin->pages[j],
+				   page_address(pin->pages[j]));
+
+	if (window->st)
+		for_each_sg(window->st->sgl, sg, window->st->nents, j)
+			seq_printf(s, "sg[%d] dma addr 0x%llx length 0x%x\n",
+				   j, sg_dma_address(sg), sg_dma_len(sg));
+}
+
+static void scif_display_all_windows(struct list_head *head, struct seq_file *s)
+{
+	struct list_head *item;
+	struct scif_window *window;
+
+	list_for_each(item, head) {
+		window = list_entry(item, struct scif_window, list);
+		scif_display_window(window, s);
+	}
+}
+
+static int scif_rma_test(struct seq_file *s, void *unused)
+{
+	struct scif_endpt *ep;
+	struct list_head *pos;
+
+	mutex_lock(&scif_info.connlock);
+	list_for_each(pos, &scif_info.connected) {
+		ep = list_entry(pos, struct scif_endpt, list);
+		seq_printf(s, "ep %p self windows\n", ep);
+		mutex_lock(&ep->rma_info.rma_lock);
+		scif_display_all_windows(&ep->rma_info.reg_list, s);
+		seq_printf(s, "ep %p remote windows\n", ep);
+		scif_display_all_windows(&ep->rma_info.remote_reg_list, s);
+		mutex_unlock(&ep->rma_info.rma_lock);
+	}
+	mutex_unlock(&scif_info.connlock);
+	return 0;
+}
+
+static int scif_rma_test_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, scif_rma_test, inode->i_private);
+}
+
+static int scif_rma_test_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations scif_rma_ops = {
+	.owner   = THIS_MODULE,
+	.open    = scif_rma_test_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = scif_rma_test_release
+};
+
+void __init scif_init_debugfs(void)
+{
+	scif_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	if (!scif_dbg) {
+		dev_err(scif_info.mdev.this_device,
+			"can't create debugfs dir scif\n");
+		return;
+	}
+
+	debugfs_create_file("scif_dev", 0444, scif_dbg, NULL, &scif_dev_ops);
+	debugfs_create_file("scif_rma", 0444, scif_dbg, NULL, &scif_rma_ops);
+	debugfs_create_u8("en_msg_log", 0666, scif_dbg, &scif_info.en_msg_log);
+	debugfs_create_u8("p2p_enable", 0666, scif_dbg, &scif_info.p2p_enable);
+}
+
+void scif_exit_debugfs(void)
+{
+	debugfs_remove_recursive(scif_dbg);
+}
diff --git a/kernel/drivers/misc/mic/scif/scif_dma.c b/kernel/drivers/misc/mic/scif/scif_dma.c
new file mode 100644
index 000000000..95a13c629
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_dma.c
@@ -0,0 +1,1979 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+#include "scif_map.h"
+
+/*
+ * struct scif_dma_comp_cb - SCIF DMA completion callback
+ *
+ * @dma_completion_func: DMA completion callback
+ * @cb_cookie: DMA completion callback cookie
+ * @temp_buf: Temporary buffer
+ * @temp_buf_to_free: Temporary buffer to be freed
+ * @is_cache: Is a kmem_cache allocated buffer
+ * @dst_offset: Destination registration offset
+ * @dst_window: Destination registration window
+ * @len: Length of the temp buffer
+ * @temp_phys: DMA address of the temp buffer
+ * @sdev: The SCIF device
+ * @header_padding: padding for cache line alignment
+ */
+struct scif_dma_comp_cb {
+	void (*dma_completion_func)(void *cookie);
+	void *cb_cookie;
+	u8 *temp_buf;
+	u8 *temp_buf_to_free;
+	bool is_cache;
+	s64 dst_offset;
+	struct scif_window *dst_window;
+	size_t len;
+	dma_addr_t temp_phys;
+	struct scif_dev *sdev;
+	int header_padding;
+};
+
+/**
+ * struct scif_copy_work - Work for DMA copy
+ *
+ * @src_offset: Starting source offset
+ * @dst_offset: Starting destination offset
+ * @src_window: Starting src registered window
+ * @dst_window: Starting dst registered window
+ * @loopback: true if this is a loopback DMA transfer
+ * @len: Length of the transfer
+ * @comp_cb: DMA copy completion callback
+ * @remote_dev: The remote SCIF peer device
+ * @fence_type: polling or interrupt based
+ * @ordered: is this a tail byte ordered DMA transfer
+ */
+struct scif_copy_work {
+	s64 src_offset;
+	s64 dst_offset;
+	struct scif_window *src_window;
+	struct scif_window *dst_window;
+	int loopback;
+	size_t len;
+	struct scif_dma_comp_cb   *comp_cb;
+	struct scif_dev	*remote_dev;
+	int fence_type;
+	bool ordered;
+};
+
+#ifndef list_entry_next
+#define list_entry_next(pos, member) \
+	list_entry(pos->member.next, typeof(*pos), member)
+#endif
+
+/**
+ * scif_reserve_dma_chan:
+ * @ep: Endpoint Descriptor.
+ *
+ * This routine reserves a DMA channel for a particular
+ * endpoint. All DMA transfers for an endpoint are always
+ * programmed on the same DMA channel.
+ */
+int scif_reserve_dma_chan(struct scif_endpt *ep)
+{
+	int err = 0;
+	struct scif_dev *scifdev;
+	struct scif_hw_dev *sdev;
+	struct dma_chan *chan;
+
+	/* Loopback DMAs are not supported on the management node */
+	if (!scif_info.nodeid && scifdev_self(ep->remote_dev))
+		return 0;
+	if (scif_info.nodeid)
+		scifdev = &scif_dev[0];
+	else
+		scifdev = ep->remote_dev;
+	sdev = scifdev->sdev;
+	if (!sdev->num_dma_ch)
+		return -ENODEV;
+	chan = sdev->dma_ch[scifdev->dma_ch_idx];
+	scifdev->dma_ch_idx = (scifdev->dma_ch_idx + 1) % sdev->num_dma_ch;
+	mutex_lock(&ep->rma_info.rma_lock);
+	ep->rma_info.dma_chan = chan;
+	mutex_unlock(&ep->rma_info.rma_lock);
+	return err;
+}
+
+#ifdef CONFIG_MMU_NOTIFIER
+/**
+ * scif_rma_destroy_tcw:
+ *
+ * This routine destroys temporary cached windows
+ */
+static
+void __scif_rma_destroy_tcw(struct scif_mmu_notif *mmn,
+			    struct scif_endpt *ep,
+			    u64 start, u64 len)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	u64 start_va, end_va;
+	u64 end = start + len;
+
+	if (end <= start)
+		return;
+
+	list_for_each_safe(item, tmp, &mmn->tc_reg_list) {
+		window = list_entry(item, struct scif_window, list);
+		ep = (struct scif_endpt *)window->ep;
+		if (!len)
+			break;
+		start_va = window->va_for_temp;
+		end_va = start_va + (window->nr_pages << PAGE_SHIFT);
+		if (start < start_va && end <= start_va)
+			break;
+		if (start >= end_va)
+			continue;
+		__scif_rma_destroy_tcw_helper(window);
+	}
+}
+
+static void scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, u64 start, u64 len)
+{
+	struct scif_endpt *ep = mmn->ep;
+
+	spin_lock(&ep->rma_info.tc_lock);
+	__scif_rma_destroy_tcw(mmn, ep, start, len);
+	spin_unlock(&ep->rma_info.tc_lock);
+}
+
+static void scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
+{
+	struct list_head *item, *tmp;
+	struct scif_mmu_notif *mmn;
+
+	list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
+		mmn = list_entry(item, struct scif_mmu_notif, list);
+		scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
+	}
+}
+
+static void __scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
+{
+	struct list_head *item, *tmp;
+	struct scif_mmu_notif *mmn;
+
+	spin_lock(&ep->rma_info.tc_lock);
+	list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
+		mmn = list_entry(item, struct scif_mmu_notif, list);
+		__scif_rma_destroy_tcw(mmn, ep, 0, ULONG_MAX);
+	}
+	spin_unlock(&ep->rma_info.tc_lock);
+}
+
+static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
+{
+	if ((cur_bytes >> PAGE_SHIFT) > scif_info.rma_tc_limit)
+		return false;
+	if ((atomic_read(&ep->rma_info.tcw_total_pages)
+			+ (cur_bytes >> PAGE_SHIFT)) >
+			scif_info.rma_tc_limit) {
+		dev_info(scif_info.mdev.this_device,
+			 "%s %d total=%d, current=%zu reached max\n",
+			 __func__, __LINE__,
+			 atomic_read(&ep->rma_info.tcw_total_pages),
+			 (1 + (cur_bytes >> PAGE_SHIFT)));
+		scif_rma_destroy_tcw_invalid();
+		__scif_rma_destroy_tcw_ep(ep);
+	}
+	return true;
+}
+
+static void scif_mmu_notifier_release(struct mmu_notifier *mn,
+				      struct mm_struct *mm)
+{
+	struct scif_mmu_notif	*mmn;
+
+	mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
+	scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
+	schedule_work(&scif_info.misc_work);
+}
+
+static void scif_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
+					      struct mm_struct *mm,
+					      unsigned long address)
+{
+	struct scif_mmu_notif	*mmn;
+
+	mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
+	scif_rma_destroy_tcw(mmn, address, PAGE_SIZE);
+}
+
+static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+						     struct mm_struct *mm,
+						     unsigned long start,
+						     unsigned long end)
+{
+	struct scif_mmu_notif	*mmn;
+
+	mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
+	scif_rma_destroy_tcw(mmn, start, end - start);
+}
+
+static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
+						   struct mm_struct *mm,
+						   unsigned long start,
+						   unsigned long end)
+{
+	/*
+	 * Nothing to do here, everything needed was done in
+	 * invalidate_range_start.
+	 */
+}
+
+static const struct mmu_notifier_ops scif_mmu_notifier_ops = {
+	.release = scif_mmu_notifier_release,
+	.clear_flush_young = NULL,
+	.invalidate_page = scif_mmu_notifier_invalidate_page,
+	.invalidate_range_start = scif_mmu_notifier_invalidate_range_start,
+	.invalidate_range_end = scif_mmu_notifier_invalidate_range_end};
+
+static void scif_ep_unregister_mmu_notifier(struct scif_endpt *ep)
+{
+	struct scif_endpt_rma_info *rma = &ep->rma_info;
+	struct scif_mmu_notif *mmn = NULL;
+	struct list_head *item, *tmp;
+
+	mutex_lock(&ep->rma_info.mmn_lock);
+	list_for_each_safe(item, tmp, &rma->mmn_list) {
+		mmn = list_entry(item, struct scif_mmu_notif, list);
+		mmu_notifier_unregister(&mmn->ep_mmu_notifier, mmn->mm);
+		list_del(item);
+		kfree(mmn);
+	}
+	mutex_unlock(&ep->rma_info.mmn_lock);
+}
+
+static void scif_init_mmu_notifier(struct scif_mmu_notif *mmn,
+				   struct mm_struct *mm, struct scif_endpt *ep)
+{
+	mmn->ep = ep;
+	mmn->mm = mm;
+	mmn->ep_mmu_notifier.ops = &scif_mmu_notifier_ops;
+	INIT_LIST_HEAD(&mmn->list);
+	INIT_LIST_HEAD(&mmn->tc_reg_list);
+}
+
+static struct scif_mmu_notif *
+scif_find_mmu_notifier(struct mm_struct *mm, struct scif_endpt_rma_info *rma)
+{
+	struct scif_mmu_notif *mmn;
+	struct list_head *item;
+
+	list_for_each(item, &rma->mmn_list) {
+		mmn = list_entry(item, struct scif_mmu_notif, list);
+		if (mmn->mm == mm)
+			return mmn;
+	}
+	return NULL;
+}
+
+static struct scif_mmu_notif *
+scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
+{
+	struct scif_mmu_notif *mmn
+		 = kzalloc(sizeof(*mmn), GFP_KERNEL);
+
+	if (!mmn)
+		return ERR_PTR(ENOMEM);
+
+	scif_init_mmu_notifier(mmn, current->mm, ep);
+	if (mmu_notifier_register(&mmn->ep_mmu_notifier,
+				  current->mm)) {
+		kfree(mmn);
+		return ERR_PTR(EBUSY);
+	}
+	list_add(&mmn->list, &ep->rma_info.mmn_list);
+	return mmn;
+}
+
+/*
+ * Called from the misc thread to destroy temporary cached windows and
+ * unregister the MMU notifier for the SCIF endpoint.
+ */
+void scif_mmu_notif_handler(struct work_struct *work)
+{
+	struct list_head *pos, *tmpq;
+	struct scif_endpt *ep;
+restart:
+	scif_rma_destroy_tcw_invalid();
+	spin_lock(&scif_info.rmalock);
+	list_for_each_safe(pos, tmpq, &scif_info.mmu_notif_cleanup) {
+		ep = list_entry(pos, struct scif_endpt, mmu_list);
+		list_del(&ep->mmu_list);
+		spin_unlock(&scif_info.rmalock);
+		scif_rma_destroy_tcw_ep(ep);
+		scif_ep_unregister_mmu_notifier(ep);
+		goto restart;
+	}
+	spin_unlock(&scif_info.rmalock);
+}
+
+static bool scif_is_set_reg_cache(int flags)
+{
+	return !!(flags & SCIF_RMA_USECACHE);
+}
+#else
+static struct scif_mmu_notif *
+scif_find_mmu_notifier(struct mm_struct *mm,
+		       struct scif_endpt_rma_info *rma)
+{
+	return NULL;
+}
+
+static struct scif_mmu_notif *
+scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
+{
+	return NULL;
+}
+
+void scif_mmu_notif_handler(struct work_struct *work)
+{
+}
+
+static bool scif_is_set_reg_cache(int flags)
+{
+	return false;
+}
+
+static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
+{
+	return false;
+}
+#endif
+
+/**
+ * scif_register_temp:
+ * @epd: End Point Descriptor.
+ * @addr: virtual address to/from which to copy
+ * @len: length of range to copy
+ * @out_offset: computed offset returned by reference.
+ * @out_window: allocated registered window returned by reference.
+ *
+ * Create a temporary registered window. The peer will not know about this
+ * window. This API is used for scif_vreadfrom()/scif_vwriteto() API's.
+ */
+static int
+scif_register_temp(scif_epd_t epd, unsigned long addr, size_t len, int prot,
+		   off_t *out_offset, struct scif_window **out_window)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err;
+	scif_pinned_pages_t pinned_pages;
+	size_t aligned_len;
+
+	aligned_len = ALIGN(len, PAGE_SIZE);
+
+	err = __scif_pin_pages((void *)(addr & PAGE_MASK),
+			       aligned_len, &prot, 0, &pinned_pages);
+	if (err)
+		return err;
+
+	pinned_pages->prot = prot;
+
+	/* Compute the offset for this registration */
+	err = scif_get_window_offset(ep, 0, 0,
+				     aligned_len >> PAGE_SHIFT,
+				     (s64 *)out_offset);
+	if (err)
+		goto error_unpin;
+
+	/* Allocate and prepare self registration window */
+	*out_window = scif_create_window(ep, aligned_len >> PAGE_SHIFT,
+					*out_offset, true);
+	if (!*out_window) {
+		scif_free_window_offset(ep, NULL, *out_offset);
+		err = -ENOMEM;
+		goto error_unpin;
+	}
+
+	(*out_window)->pinned_pages = pinned_pages;
+	(*out_window)->nr_pages = pinned_pages->nr_pages;
+	(*out_window)->prot = pinned_pages->prot;
+
+	(*out_window)->va_for_temp = addr & PAGE_MASK;
+	err = scif_map_window(ep->remote_dev, *out_window);
+	if (err) {
+		/* Something went wrong! Rollback */
+		scif_destroy_window(ep, *out_window);
+		*out_window = NULL;
+	} else {
+		*out_offset |= (addr - (*out_window)->va_for_temp);
+	}
+	return err;
+error_unpin:
+	if (err)
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	scif_unpin_pages(pinned_pages);
+	return err;
+}
+
+#define SCIF_DMA_TO (3 * HZ)
+
+/*
+ * scif_sync_dma - Program a DMA without an interrupt descriptor
+ *
+ * @dev - The address of the pointer to the device instance used
+ * for DMA registration.
+ * @chan - DMA channel to be used.
+ * @sync_wait: Wait for DMA to complete?
+ *
+ * Return 0 on success and -errno on error.
+ */
+static int scif_sync_dma(struct scif_hw_dev *sdev, struct dma_chan *chan,
+			 bool sync_wait)
+{
+	int err = 0;
+	struct dma_async_tx_descriptor *tx = NULL;
+	enum dma_ctrl_flags flags = DMA_PREP_FENCE;
+	dma_cookie_t cookie;
+	struct dma_device *ddev;
+
+	if (!chan) {
+		err = -EIO;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	ddev = chan->device;
+
+	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	cookie = tx->tx_submit(tx);
+
+	if (dma_submit_error(cookie)) {
+		err = -ENOMEM;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	if (!sync_wait) {
+		dma_async_issue_pending(chan);
+	} else {
+		if (dma_sync_wait(chan, cookie) == DMA_COMPLETE) {
+			err = 0;
+		} else {
+			err = -EIO;
+			dev_err(&sdev->dev, "%s %d err %d\n",
+				__func__, __LINE__, err);
+		}
+	}
+release:
+	return err;
+}
+
+static void scif_dma_callback(void *arg)
+{
+	struct completion *done = (struct completion *)arg;
+
+	complete(done);
+}
+
+#define SCIF_DMA_SYNC_WAIT true
+#define SCIF_DMA_POLL BIT(0)
+#define SCIF_DMA_INTR BIT(1)
+
+/*
+ * scif_async_dma - Program a DMA with an interrupt descriptor
+ *
+ * @dev - The address of the pointer to the device instance used
+ * for DMA registration.
+ * @chan - DMA channel to be used.
+ * Return 0 on success and -errno on error.
+ */
+static int scif_async_dma(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+	int err = 0;
+	struct dma_device *ddev;
+	struct dma_async_tx_descriptor *tx = NULL;
+	enum dma_ctrl_flags flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE;
+	DECLARE_COMPLETION_ONSTACK(done_wait);
+	dma_cookie_t cookie;
+	enum dma_status status;
+
+	if (!chan) {
+		err = -EIO;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	ddev = chan->device;
+
+	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	reinit_completion(&done_wait);
+	tx->callback = scif_dma_callback;
+	tx->callback_param = &done_wait;
+	cookie = tx->tx_submit(tx);
+
+	if (dma_submit_error(cookie)) {
+		err = -ENOMEM;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	dma_async_issue_pending(chan);
+
+	err = wait_for_completion_timeout(&done_wait, SCIF_DMA_TO);
+	if (!err) {
+		err = -EIO;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+	err = 0;
+	status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
+	if (status != DMA_COMPLETE) {
+		err = -EIO;
+		dev_err(&sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto release;
+	}
+release:
+	return err;
+}
+
+/*
+ * scif_drain_dma_poll - Drain all outstanding DMA operations for a particular
+ * DMA channel via polling.
+ *
+ * @sdev - The SCIF device
+ * @chan - DMA channel
+ * Return 0 on success and -errno on error.
+ */
+static int scif_drain_dma_poll(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+	if (!chan)
+		return -EINVAL;
+	return scif_sync_dma(sdev, chan, SCIF_DMA_SYNC_WAIT);
+}
+
+/*
+ * scif_drain_dma_intr - Drain all outstanding DMA operations for a particular
+ * DMA channel via interrupt based blocking wait.
+ *
+ * @sdev - The SCIF device
+ * @chan - DMA channel
+ * Return 0 on success and -errno on error.
+ */
+int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan)
+{
+	if (!chan)
+		return -EINVAL;
+	return scif_async_dma(sdev, chan);
+}
+
+/**
+ * scif_rma_destroy_windows:
+ *
+ * This routine destroys all windows queued for cleanup
+ */
+void scif_rma_destroy_windows(void)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	struct scif_endpt *ep;
+	struct dma_chan *chan;
+
+	might_sleep();
+restart:
+	spin_lock(&scif_info.rmalock);
+	list_for_each_safe(item, tmp, &scif_info.rma) {
+		window = list_entry(item, struct scif_window,
+				    list);
+		ep = (struct scif_endpt *)window->ep;
+		chan = ep->rma_info.dma_chan;
+
+		list_del_init(&window->list);
+		spin_unlock(&scif_info.rmalock);
+		if (!chan || !scifdev_alive(ep) ||
+		    !scif_drain_dma_intr(ep->remote_dev->sdev,
+					 ep->rma_info.dma_chan))
+			/* Remove window from global list */
+			window->unreg_state = OP_COMPLETED;
+		else
+			dev_warn(&ep->remote_dev->sdev->dev,
+				 "DMA engine hung?\n");
+		if (window->unreg_state == OP_COMPLETED) {
+			if (window->type == SCIF_WINDOW_SELF)
+				scif_destroy_window(ep, window);
+			else
+				scif_destroy_remote_window(window);
+			atomic_dec(&ep->rma_info.tw_refcount);
+		}
+		goto restart;
+	}
+	spin_unlock(&scif_info.rmalock);
+}
+
+/**
+ * scif_rma_destroy_tcw:
+ *
+ * This routine destroys temporary cached registered windows
+ * which have been queued for cleanup.
+ */
+void scif_rma_destroy_tcw_invalid(void)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	struct scif_endpt *ep;
+	struct dma_chan *chan;
+
+	might_sleep();
+restart:
+	spin_lock(&scif_info.rmalock);
+	list_for_each_safe(item, tmp, &scif_info.rma_tc) {
+		window = list_entry(item, struct scif_window, list);
+		ep = (struct scif_endpt *)window->ep;
+		chan = ep->rma_info.dma_chan;
+		list_del_init(&window->list);
+		spin_unlock(&scif_info.rmalock);
+		mutex_lock(&ep->rma_info.rma_lock);
+		if (!chan || !scifdev_alive(ep) ||
+		    !scif_drain_dma_intr(ep->remote_dev->sdev,
+					 ep->rma_info.dma_chan)) {
+			atomic_sub(window->nr_pages,
+				   &ep->rma_info.tcw_total_pages);
+			scif_destroy_window(ep, window);
+			atomic_dec(&ep->rma_info.tcw_refcount);
+		} else {
+			dev_warn(&ep->remote_dev->sdev->dev,
+				 "DMA engine hung?\n");
+		}
+		mutex_unlock(&ep->rma_info.rma_lock);
+		goto restart;
+	}
+	spin_unlock(&scif_info.rmalock);
+}
+
+static inline
+void *_get_local_va(off_t off, struct scif_window *window, size_t len)
+{
+	int page_nr = (off - window->offset) >> PAGE_SHIFT;
+	off_t page_off = off & ~PAGE_MASK;
+	void *va = NULL;
+
+	if (window->type == SCIF_WINDOW_SELF) {
+		struct page **pages = window->pinned_pages->pages;
+
+		va = page_address(pages[page_nr]) + page_off;
+	}
+	return va;
+}
+
+static inline
+void *ioremap_remote(off_t off, struct scif_window *window,
+		     size_t len, struct scif_dev *dev,
+		     struct scif_window_iter *iter)
+{
+	dma_addr_t phys = scif_off_to_dma_addr(window, off, NULL, iter);
+
+	/*
+	 * If the DMA address is not card relative then we need the DMA
+	 * addresses to be an offset into the bar. The aperture base was already
+	 * added so subtract it here since scif_ioremap is going to add it again
+	 */
+	if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
+	    dev->sdev->aper && !dev->sdev->card_rel_da)
+		phys = phys - dev->sdev->aper->pa;
+	return scif_ioremap(phys, len, dev);
+}
+
+static inline void
+iounmap_remote(void *virt, size_t size, struct scif_copy_work *work)
+{
+	scif_iounmap(virt, size, work->remote_dev);
+}
+
+/*
+ * Takes care of ordering issue caused by
+ * 1. Hardware:  Only in the case of cpu copy from mgmt node to card
+ * because of WC memory.
+ * 2. Software: If memcpy reorders copy instructions for optimization.
+ * This could happen at both mgmt node and card.
+ */
+static inline void
+scif_ordered_memcpy_toio(char *dst, const char *src, size_t count)
+{
+	if (!count)
+		return;
+
+	memcpy_toio((void __iomem __force *)dst, src, --count);
+	/* Order the last byte with the previous stores */
+	wmb();
+	*(dst + count) = *(src + count);
+}
+
+static inline void scif_unaligned_cpy_toio(char *dst, const char *src,
+					   size_t count, bool ordered)
+{
+	if (ordered)
+		scif_ordered_memcpy_toio(dst, src, count);
+	else
+		memcpy_toio((void __iomem __force *)dst, src, count);
+}
+
+static inline
+void scif_ordered_memcpy_fromio(char *dst, const char *src, size_t count)
+{
+	if (!count)
+		return;
+
+	memcpy_fromio(dst, (void __iomem __force *)src, --count);
+	/* Order the last byte with the previous loads */
+	rmb();
+	*(dst + count) = *(src + count);
+}
+
+static inline void scif_unaligned_cpy_fromio(char *dst, const char *src,
+					     size_t count, bool ordered)
+{
+	if (ordered)
+		scif_ordered_memcpy_fromio(dst, src, count);
+	else
+		memcpy_fromio(dst, (void __iomem __force *)src, count);
+}
+
+#define SCIF_RMA_ERROR_CODE (~(dma_addr_t)0x0)
+
+/*
+ * scif_off_to_dma_addr:
+ * Obtain the dma_addr given the window and the offset.
+ * @window: Registered window.
+ * @off: Window offset.
+ * @nr_bytes: Return the number of contiguous bytes till next DMA addr index.
+ * @index: Return the index of the dma_addr array found.
+ * @start_off: start offset of index of the dma addr array found.
+ * The nr_bytes provides the callee an estimate of the maximum possible
+ * DMA xfer possible while the index/start_off provide faster lookups
+ * for the next iteration.
+ */
+dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
+				size_t *nr_bytes, struct scif_window_iter *iter)
+{
+	int i, page_nr;
+	s64 start, end;
+	off_t page_off;
+
+	if (window->nr_pages == window->nr_contig_chunks) {
+		page_nr = (off - window->offset) >> PAGE_SHIFT;
+		page_off = off & ~PAGE_MASK;
+
+		if (nr_bytes)
+			*nr_bytes = PAGE_SIZE - page_off;
+		return window->dma_addr[page_nr] | page_off;
+	}
+	if (iter) {
+		i = iter->index;
+		start = iter->offset;
+	} else {
+		i =  0;
+		start =  window->offset;
+	}
+	for (; i < window->nr_contig_chunks; i++) {
+		end = start + (window->num_pages[i] << PAGE_SHIFT);
+		if (off >= start && off < end) {
+			if (iter) {
+				iter->index = i;
+				iter->offset = start;
+			}
+			if (nr_bytes)
+				*nr_bytes = end - off;
+			return (window->dma_addr[i] + (off - start));
+		}
+		start += (window->num_pages[i] << PAGE_SHIFT);
+	}
+	dev_err(scif_info.mdev.this_device,
+		"%s %d BUG. Addr not found? window %p off 0x%llx\n",
+		__func__, __LINE__, window, off);
+	return SCIF_RMA_ERROR_CODE;
+}
+
+/*
+ * Copy between rma window and temporary buffer
+ */
+static void scif_rma_local_cpu_copy(s64 offset, struct scif_window *window,
+				    u8 *temp, size_t rem_len, bool to_temp)
+{
+	void *window_virt;
+	size_t loop_len;
+	int offset_in_page;
+	s64 end_offset;
+
+	offset_in_page = offset & ~PAGE_MASK;
+	loop_len = PAGE_SIZE - offset_in_page;
+
+	if (rem_len < loop_len)
+		loop_len = rem_len;
+
+	window_virt = _get_local_va(offset, window, loop_len);
+	if (!window_virt)
+		return;
+	if (to_temp)
+		memcpy(temp, window_virt, loop_len);
+	else
+		memcpy(window_virt, temp, loop_len);
+
+	offset += loop_len;
+	temp += loop_len;
+	rem_len -= loop_len;
+
+	end_offset = window->offset +
+		(window->nr_pages << PAGE_SHIFT);
+	while (rem_len) {
+		if (offset == end_offset) {
+			window = list_entry_next(window, list);
+			end_offset = window->offset +
+				(window->nr_pages << PAGE_SHIFT);
+		}
+		loop_len = min(PAGE_SIZE, rem_len);
+		window_virt = _get_local_va(offset, window, loop_len);
+		if (!window_virt)
+			return;
+		if (to_temp)
+			memcpy(temp, window_virt, loop_len);
+		else
+			memcpy(window_virt, temp, loop_len);
+		offset	+= loop_len;
+		temp	+= loop_len;
+		rem_len	-= loop_len;
+	}
+}
+
+/**
+ * scif_rma_completion_cb:
+ * @data: RMA cookie
+ *
+ * RMA interrupt completion callback.
+ */
+static void scif_rma_completion_cb(void *data)
+{
+	struct scif_dma_comp_cb *comp_cb = data;
+
+	/* Free DMA Completion CB. */
+	if (comp_cb->dst_window)
+		scif_rma_local_cpu_copy(comp_cb->dst_offset,
+					comp_cb->dst_window,
+					comp_cb->temp_buf +
+					comp_cb->header_padding,
+					comp_cb->len, false);
+	scif_unmap_single(comp_cb->temp_phys, comp_cb->sdev,
+			  SCIF_KMEM_UNALIGNED_BUF_SIZE);
+	if (comp_cb->is_cache)
+		kmem_cache_free(unaligned_cache,
+				comp_cb->temp_buf_to_free);
+	else
+		kfree(comp_cb->temp_buf_to_free);
+}
+
+/* Copies between temporary buffer and offsets provided in work */
+static int
+scif_rma_list_dma_copy_unaligned(struct scif_copy_work *work,
+				 u8 *temp, struct dma_chan *chan,
+				 bool src_local)
+{
+	struct scif_dma_comp_cb *comp_cb = work->comp_cb;
+	dma_addr_t window_dma_addr, temp_dma_addr;
+	dma_addr_t temp_phys = comp_cb->temp_phys;
+	size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len;
+	int offset_in_ca, ret = 0;
+	s64 end_offset, offset;
+	struct scif_window *window;
+	void *window_virt_addr;
+	size_t tail_len;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_device *dev = chan->device;
+	dma_cookie_t cookie;
+
+	if (src_local) {
+		offset = work->dst_offset;
+		window = work->dst_window;
+	} else {
+		offset = work->src_offset;
+		window = work->src_window;
+	}
+
+	offset_in_ca = offset & (L1_CACHE_BYTES - 1);
+	if (offset_in_ca) {
+		loop_len = L1_CACHE_BYTES - offset_in_ca;
+		loop_len = min(loop_len, remaining_len);
+		window_virt_addr = ioremap_remote(offset, window,
+						  loop_len,
+						  work->remote_dev,
+						  NULL);
+		if (!window_virt_addr)
+			return -ENOMEM;
+		if (src_local)
+			scif_unaligned_cpy_toio(window_virt_addr, temp,
+						loop_len,
+						work->ordered &&
+						!(remaining_len - loop_len));
+		else
+			scif_unaligned_cpy_fromio(temp, window_virt_addr,
+						  loop_len, work->ordered &&
+						  !(remaining_len - loop_len));
+		iounmap_remote(window_virt_addr, loop_len, work);
+
+		offset += loop_len;
+		temp += loop_len;
+		temp_phys += loop_len;
+		remaining_len -= loop_len;
+	}
+
+	offset_in_ca = offset & ~PAGE_MASK;
+	end_offset = window->offset +
+		(window->nr_pages << PAGE_SHIFT);
+
+	tail_len = remaining_len & (L1_CACHE_BYTES - 1);
+	remaining_len -= tail_len;
+	while (remaining_len) {
+		if (offset == end_offset) {
+			window = list_entry_next(window, list);
+			end_offset = window->offset +
+				(window->nr_pages << PAGE_SHIFT);
+		}
+		if (scif_is_mgmt_node())
+			temp_dma_addr = temp_phys;
+		else
+			/* Fix if we ever enable IOMMU on the card */
+			temp_dma_addr = (dma_addr_t)virt_to_phys(temp);
+		window_dma_addr = scif_off_to_dma_addr(window, offset,
+						       &nr_contig_bytes,
+						       NULL);
+		loop_len = min(nr_contig_bytes, remaining_len);
+		if (src_local) {
+			if (work->ordered && !tail_len &&
+			    !(remaining_len - loop_len) &&
+			    loop_len != L1_CACHE_BYTES) {
+				/*
+				 * Break up the last chunk of the transfer into
+				 * two steps. if there is no tail to guarantee
+				 * DMA ordering. SCIF_DMA_POLLING inserts
+				 * a status update descriptor in step 1 which
+				 * acts as a double sided synchronization fence
+				 * for the DMA engine to ensure that the last
+				 * cache line in step 2 is updated last.
+				 */
+				/* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
+				tx =
+				dev->device_prep_dma_memcpy(chan,
+							    window_dma_addr,
+							    temp_dma_addr,
+							    loop_len -
+							    L1_CACHE_BYTES,
+							    DMA_PREP_FENCE);
+				if (!tx) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				cookie = tx->tx_submit(tx);
+				if (dma_submit_error(cookie)) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				dma_async_issue_pending(chan);
+				offset += (loop_len - L1_CACHE_BYTES);
+				temp_dma_addr += (loop_len - L1_CACHE_BYTES);
+				window_dma_addr += (loop_len - L1_CACHE_BYTES);
+				remaining_len -= (loop_len - L1_CACHE_BYTES);
+				loop_len = remaining_len;
+
+				/* Step 2) DMA: L1_CACHE_BYTES */
+				tx =
+				dev->device_prep_dma_memcpy(chan,
+							    window_dma_addr,
+							    temp_dma_addr,
+							    loop_len, 0);
+				if (!tx) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				cookie = tx->tx_submit(tx);
+				if (dma_submit_error(cookie)) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				dma_async_issue_pending(chan);
+			} else {
+				tx =
+				dev->device_prep_dma_memcpy(chan,
+							    window_dma_addr,
+							    temp_dma_addr,
+							    loop_len, 0);
+				if (!tx) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				cookie = tx->tx_submit(tx);
+				if (dma_submit_error(cookie)) {
+					ret = -ENOMEM;
+					goto err;
+				}
+				dma_async_issue_pending(chan);
+			}
+		} else {
+			tx = dev->device_prep_dma_memcpy(chan, temp_dma_addr,
+					window_dma_addr, loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+		}
+		if (ret < 0)
+			goto err;
+		offset += loop_len;
+		temp += loop_len;
+		temp_phys += loop_len;
+		remaining_len -= loop_len;
+		offset_in_ca = 0;
+	}
+	if (tail_len) {
+		if (offset == end_offset) {
+			window = list_entry_next(window, list);
+			end_offset = window->offset +
+				(window->nr_pages << PAGE_SHIFT);
+		}
+		window_virt_addr = ioremap_remote(offset, window, tail_len,
+						  work->remote_dev,
+						  NULL);
+		if (!window_virt_addr)
+			return -ENOMEM;
+		/*
+		 * The CPU copy for the tail bytes must be initiated only once
+		 * previous DMA transfers for this endpoint have completed
+		 * to guarantee ordering.
+		 */
+		if (work->ordered) {
+			struct scif_dev *rdev = work->remote_dev;
+
+			ret = scif_drain_dma_intr(rdev->sdev, chan);
+			if (ret)
+				return ret;
+		}
+		if (src_local)
+			scif_unaligned_cpy_toio(window_virt_addr, temp,
+						tail_len, work->ordered);
+		else
+			scif_unaligned_cpy_fromio(temp, window_virt_addr,
+						  tail_len, work->ordered);
+		iounmap_remote(window_virt_addr, tail_len, work);
+	}
+	tx = dev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_INTERRUPT);
+	if (!tx) {
+		ret = -ENOMEM;
+		return ret;
+	}
+	tx->callback = &scif_rma_completion_cb;
+	tx->callback_param = comp_cb;
+	cookie = tx->tx_submit(tx);
+
+	if (dma_submit_error(cookie)) {
+		ret = -ENOMEM;
+		return ret;
+	}
+	dma_async_issue_pending(chan);
+	return 0;
+err:
+	dev_err(scif_info.mdev.this_device,
+		"%s %d Desc Prog Failed ret %d\n",
+		__func__, __LINE__, ret);
+	return ret;
+}
+
+/*
+ * _scif_rma_list_dma_copy_aligned:
+ *
+ * Traverse all the windows and perform DMA copy.
+ */
+static int _scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
+					   struct dma_chan *chan)
+{
+	dma_addr_t src_dma_addr, dst_dma_addr;
+	size_t loop_len, remaining_len, src_contig_bytes = 0;
+	size_t dst_contig_bytes = 0;
+	struct scif_window_iter src_win_iter;
+	struct scif_window_iter dst_win_iter;
+	s64 end_src_offset, end_dst_offset;
+	struct scif_window *src_window = work->src_window;
+	struct scif_window *dst_window = work->dst_window;
+	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+	int ret = 0;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_device *dev = chan->device;
+	dma_cookie_t cookie;
+
+	remaining_len = work->len;
+
+	scif_init_window_iter(src_window, &src_win_iter);
+	scif_init_window_iter(dst_window, &dst_win_iter);
+	end_src_offset = src_window->offset +
+		(src_window->nr_pages << PAGE_SHIFT);
+	end_dst_offset = dst_window->offset +
+		(dst_window->nr_pages << PAGE_SHIFT);
+	while (remaining_len) {
+		if (src_offset == end_src_offset) {
+			src_window = list_entry_next(src_window, list);
+			end_src_offset = src_window->offset +
+				(src_window->nr_pages << PAGE_SHIFT);
+			scif_init_window_iter(src_window, &src_win_iter);
+		}
+		if (dst_offset == end_dst_offset) {
+			dst_window = list_entry_next(dst_window, list);
+			end_dst_offset = dst_window->offset +
+				(dst_window->nr_pages << PAGE_SHIFT);
+			scif_init_window_iter(dst_window, &dst_win_iter);
+		}
+
+		/* compute dma addresses for transfer */
+		src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
+						    &src_contig_bytes,
+						    &src_win_iter);
+		dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
+						    &dst_contig_bytes,
+						    &dst_win_iter);
+		loop_len = min(src_contig_bytes, dst_contig_bytes);
+		loop_len = min(loop_len, remaining_len);
+		if (work->ordered && !(remaining_len - loop_len)) {
+			/*
+			 * Break up the last chunk of the transfer into two
+			 * steps to ensure that the last byte in step 2 is
+			 * updated last.
+			 */
+			/* Step 1) DMA: Body Length - 1 */
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+							 src_dma_addr,
+							 loop_len - 1,
+							 DMA_PREP_FENCE);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			src_offset += (loop_len - 1);
+			dst_offset += (loop_len - 1);
+			src_dma_addr += (loop_len - 1);
+			dst_dma_addr += (loop_len - 1);
+			remaining_len -= (loop_len - 1);
+			loop_len = remaining_len;
+
+			/* Step 2) DMA: 1 BYTES */
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+					src_dma_addr, loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+		} else {
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+					src_dma_addr, loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+		}
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+	}
+	return ret;
+err:
+	dev_err(scif_info.mdev.this_device,
+		"%s %d Desc Prog Failed ret %d\n",
+		__func__, __LINE__, ret);
+	return ret;
+}
+
+/*
+ * scif_rma_list_dma_copy_aligned:
+ *
+ * Traverse all the windows and perform DMA copy.
+ */
+static int scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
+					  struct dma_chan *chan)
+{
+	dma_addr_t src_dma_addr, dst_dma_addr;
+	size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0;
+	size_t dst_contig_bytes = 0;
+	int src_cache_off;
+	s64 end_src_offset, end_dst_offset;
+	struct scif_window_iter src_win_iter;
+	struct scif_window_iter dst_win_iter;
+	void *src_virt, *dst_virt;
+	struct scif_window *src_window = work->src_window;
+	struct scif_window *dst_window = work->dst_window;
+	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+	int ret = 0;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_device *dev = chan->device;
+	dma_cookie_t cookie;
+
+	remaining_len = work->len;
+	scif_init_window_iter(src_window, &src_win_iter);
+	scif_init_window_iter(dst_window, &dst_win_iter);
+
+	src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
+	if (src_cache_off != 0) {
+		/* Head */
+		loop_len = L1_CACHE_BYTES - src_cache_off;
+		loop_len = min(loop_len, remaining_len);
+		src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
+		dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
+		if (src_window->type == SCIF_WINDOW_SELF)
+			src_virt = _get_local_va(src_offset, src_window,
+						 loop_len);
+		else
+			src_virt = ioremap_remote(src_offset, src_window,
+						  loop_len,
+						  work->remote_dev, NULL);
+		if (!src_virt)
+			return -ENOMEM;
+		if (dst_window->type == SCIF_WINDOW_SELF)
+			dst_virt = _get_local_va(dst_offset, dst_window,
+						 loop_len);
+		else
+			dst_virt = ioremap_remote(dst_offset, dst_window,
+						  loop_len,
+						  work->remote_dev, NULL);
+		if (!dst_virt) {
+			if (src_window->type != SCIF_WINDOW_SELF)
+				iounmap_remote(src_virt, loop_len, work);
+			return -ENOMEM;
+		}
+		if (src_window->type == SCIF_WINDOW_SELF)
+			scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
+						remaining_len == loop_len ?
+						work->ordered : false);
+		else
+			scif_unaligned_cpy_fromio(dst_virt, src_virt, loop_len,
+						  remaining_len == loop_len ?
+						  work->ordered : false);
+		if (src_window->type != SCIF_WINDOW_SELF)
+			iounmap_remote(src_virt, loop_len, work);
+		if (dst_window->type != SCIF_WINDOW_SELF)
+			iounmap_remote(dst_virt, loop_len, work);
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+	}
+
+	end_src_offset = src_window->offset +
+		(src_window->nr_pages << PAGE_SHIFT);
+	end_dst_offset = dst_window->offset +
+		(dst_window->nr_pages << PAGE_SHIFT);
+	tail_len = remaining_len & (L1_CACHE_BYTES - 1);
+	remaining_len -= tail_len;
+	while (remaining_len) {
+		if (src_offset == end_src_offset) {
+			src_window = list_entry_next(src_window, list);
+			end_src_offset = src_window->offset +
+				(src_window->nr_pages << PAGE_SHIFT);
+			scif_init_window_iter(src_window, &src_win_iter);
+		}
+		if (dst_offset == end_dst_offset) {
+			dst_window = list_entry_next(dst_window, list);
+			end_dst_offset = dst_window->offset +
+				(dst_window->nr_pages << PAGE_SHIFT);
+			scif_init_window_iter(dst_window, &dst_win_iter);
+		}
+
+		/* compute dma addresses for transfer */
+		src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
+						    &src_contig_bytes,
+						    &src_win_iter);
+		dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
+						    &dst_contig_bytes,
+						    &dst_win_iter);
+		loop_len = min(src_contig_bytes, dst_contig_bytes);
+		loop_len = min(loop_len, remaining_len);
+		if (work->ordered && !tail_len &&
+		    !(remaining_len - loop_len)) {
+			/*
+			 * Break up the last chunk of the transfer into two
+			 * steps. if there is no tail to gurantee DMA ordering.
+			 * Passing SCIF_DMA_POLLING inserts a status update
+			 * descriptor in step 1 which acts as a double sided
+			 * synchronization fence for the DMA engine to ensure
+			 * that the last cache line in step 2 is updated last.
+			 */
+			/* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+							 src_dma_addr,
+							 loop_len -
+							 L1_CACHE_BYTES,
+							 DMA_PREP_FENCE);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+			src_offset += (loop_len - L1_CACHE_BYTES);
+			dst_offset += (loop_len - L1_CACHE_BYTES);
+			src_dma_addr += (loop_len - L1_CACHE_BYTES);
+			dst_dma_addr += (loop_len - L1_CACHE_BYTES);
+			remaining_len -= (loop_len - L1_CACHE_BYTES);
+			loop_len = remaining_len;
+
+			/* Step 2) DMA: L1_CACHE_BYTES */
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+							 src_dma_addr,
+							 loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+		} else {
+			tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
+							 src_dma_addr,
+							 loop_len, 0);
+			if (!tx) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			cookie = tx->tx_submit(tx);
+			if (dma_submit_error(cookie)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			dma_async_issue_pending(chan);
+		}
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+	}
+	remaining_len = tail_len;
+	if (remaining_len) {
+		loop_len = remaining_len;
+		if (src_offset == end_src_offset)
+			src_window = list_entry_next(src_window, list);
+		if (dst_offset == end_dst_offset)
+			dst_window = list_entry_next(dst_window, list);
+
+		src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
+		dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
+		/*
+		 * The CPU copy for the tail bytes must be initiated only once
+		 * previous DMA transfers for this endpoint have completed to
+		 * guarantee ordering.
+		 */
+		if (work->ordered) {
+			struct scif_dev *rdev = work->remote_dev;
+
+			ret = scif_drain_dma_poll(rdev->sdev, chan);
+			if (ret)
+				return ret;
+		}
+		if (src_window->type == SCIF_WINDOW_SELF)
+			src_virt = _get_local_va(src_offset, src_window,
+						 loop_len);
+		else
+			src_virt = ioremap_remote(src_offset, src_window,
+						  loop_len,
+						  work->remote_dev, NULL);
+		if (!src_virt)
+			return -ENOMEM;
+
+		if (dst_window->type == SCIF_WINDOW_SELF)
+			dst_virt = _get_local_va(dst_offset, dst_window,
+						 loop_len);
+		else
+			dst_virt = ioremap_remote(dst_offset, dst_window,
+						  loop_len,
+						  work->remote_dev, NULL);
+		if (!dst_virt) {
+			if (src_window->type != SCIF_WINDOW_SELF)
+				iounmap_remote(src_virt, loop_len, work);
+			return -ENOMEM;
+		}
+
+		if (src_window->type == SCIF_WINDOW_SELF)
+			scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
+						work->ordered);
+		else
+			scif_unaligned_cpy_fromio(dst_virt, src_virt,
+						  loop_len, work->ordered);
+		if (src_window->type != SCIF_WINDOW_SELF)
+			iounmap_remote(src_virt, loop_len, work);
+
+		if (dst_window->type != SCIF_WINDOW_SELF)
+			iounmap_remote(dst_virt, loop_len, work);
+		remaining_len -= loop_len;
+	}
+	return ret;
+err:
+	dev_err(scif_info.mdev.this_device,
+		"%s %d Desc Prog Failed ret %d\n",
+		__func__, __LINE__, ret);
+	return ret;
+}
+
+/*
+ * scif_rma_list_cpu_copy:
+ *
+ * Traverse all the windows and perform CPU copy.
+ */
+static int scif_rma_list_cpu_copy(struct scif_copy_work *work)
+{
+	void *src_virt, *dst_virt;
+	size_t loop_len, remaining_len;
+	int src_page_off, dst_page_off;
+	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+	struct scif_window *src_window = work->src_window;
+	struct scif_window *dst_window = work->dst_window;
+	s64 end_src_offset, end_dst_offset;
+	int ret = 0;
+	struct scif_window_iter src_win_iter;
+	struct scif_window_iter dst_win_iter;
+
+	remaining_len = work->len;
+
+	scif_init_window_iter(src_window, &src_win_iter);
+	scif_init_window_iter(dst_window, &dst_win_iter);
+	while (remaining_len) {
+		src_page_off = src_offset & ~PAGE_MASK;
+		dst_page_off = dst_offset & ~PAGE_MASK;
+		loop_len = min(PAGE_SIZE -
+			       max(src_page_off, dst_page_off),
+			       remaining_len);
+
+		if (src_window->type == SCIF_WINDOW_SELF)
+			src_virt = _get_local_va(src_offset, src_window,
+						 loop_len);
+		else
+			src_virt = ioremap_remote(src_offset, src_window,
+						  loop_len,
+						  work->remote_dev,
+						  &src_win_iter);
+		if (!src_virt) {
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		if (dst_window->type == SCIF_WINDOW_SELF)
+			dst_virt = _get_local_va(dst_offset, dst_window,
+						 loop_len);
+		else
+			dst_virt = ioremap_remote(dst_offset, dst_window,
+						  loop_len,
+						  work->remote_dev,
+						  &dst_win_iter);
+		if (!dst_virt) {
+			if (src_window->type == SCIF_WINDOW_PEER)
+				iounmap_remote(src_virt, loop_len, work);
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		if (work->loopback) {
+			memcpy(dst_virt, src_virt, loop_len);
+		} else {
+			if (src_window->type == SCIF_WINDOW_SELF)
+				memcpy_toio((void __iomem __force *)dst_virt,
+					    src_virt, loop_len);
+			else
+				memcpy_fromio(dst_virt,
+					      (void __iomem __force *)src_virt,
+					      loop_len);
+		}
+		if (src_window->type == SCIF_WINDOW_PEER)
+			iounmap_remote(src_virt, loop_len, work);
+
+		if (dst_window->type == SCIF_WINDOW_PEER)
+			iounmap_remote(dst_virt, loop_len, work);
+
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+		if (remaining_len) {
+			end_src_offset = src_window->offset +
+				(src_window->nr_pages << PAGE_SHIFT);
+			end_dst_offset = dst_window->offset +
+				(dst_window->nr_pages << PAGE_SHIFT);
+			if (src_offset == end_src_offset) {
+				src_window = list_entry_next(src_window, list);
+				scif_init_window_iter(src_window,
+						      &src_win_iter);
+			}
+			if (dst_offset == end_dst_offset) {
+				dst_window = list_entry_next(dst_window, list);
+				scif_init_window_iter(dst_window,
+						      &dst_win_iter);
+			}
+		}
+	}
+error:
+	return ret;
+}
+
+static int scif_rma_list_dma_copy_wrapper(struct scif_endpt *epd,
+					  struct scif_copy_work *work,
+					  struct dma_chan *chan, off_t loffset)
+{
+	int src_cache_off, dst_cache_off;
+	s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
+	u8 *temp = NULL;
+	bool src_local = true, dst_local = false;
+	struct scif_dma_comp_cb *comp_cb;
+	dma_addr_t src_dma_addr, dst_dma_addr;
+	int err;
+
+	if (is_dma_copy_aligned(chan->device, 1, 1, 1))
+		return _scif_rma_list_dma_copy_aligned(work, chan);
+
+	src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
+	dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1);
+
+	if (dst_cache_off == src_cache_off)
+		return scif_rma_list_dma_copy_aligned(work, chan);
+
+	if (work->loopback)
+		return scif_rma_list_cpu_copy(work);
+	src_dma_addr = __scif_off_to_dma_addr(work->src_window, src_offset);
+	dst_dma_addr = __scif_off_to_dma_addr(work->dst_window, dst_offset);
+	src_local = work->src_window->type == SCIF_WINDOW_SELF;
+	dst_local = work->dst_window->type == SCIF_WINDOW_SELF;
+
+	dst_local = dst_local;
+	/* Allocate dma_completion cb */
+	comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL);
+	if (!comp_cb)
+		goto error;
+
+	work->comp_cb = comp_cb;
+	comp_cb->cb_cookie = comp_cb;
+	comp_cb->dma_completion_func = &scif_rma_completion_cb;
+
+	if (work->len + (L1_CACHE_BYTES << 1) < SCIF_KMEM_UNALIGNED_BUF_SIZE) {
+		comp_cb->is_cache = false;
+		/* Allocate padding bytes to align to a cache line */
+		temp = kmalloc(work->len + (L1_CACHE_BYTES << 1),
+			       GFP_KERNEL);
+		if (!temp)
+			goto free_comp_cb;
+		comp_cb->temp_buf_to_free = temp;
+		/* kmalloc(..) does not guarantee cache line alignment */
+		if (!IS_ALIGNED((u64)temp, L1_CACHE_BYTES))
+			temp = PTR_ALIGN(temp, L1_CACHE_BYTES);
+	} else {
+		comp_cb->is_cache = true;
+		temp = kmem_cache_alloc(unaligned_cache, GFP_KERNEL);
+		if (!temp)
+			goto free_comp_cb;
+		comp_cb->temp_buf_to_free = temp;
+	}
+
+	if (src_local) {
+		temp += dst_cache_off;
+		scif_rma_local_cpu_copy(work->src_offset, work->src_window,
+					temp, work->len, true);
+	} else {
+		comp_cb->dst_window = work->dst_window;
+		comp_cb->dst_offset = work->dst_offset;
+		work->src_offset = work->src_offset - src_cache_off;
+		comp_cb->len = work->len;
+		work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES);
+		comp_cb->header_padding = src_cache_off;
+	}
+	comp_cb->temp_buf = temp;
+
+	err = scif_map_single(&comp_cb->temp_phys, temp,
+			      work->remote_dev, SCIF_KMEM_UNALIGNED_BUF_SIZE);
+	if (err)
+		goto free_temp_buf;
+	comp_cb->sdev = work->remote_dev;
+	if (scif_rma_list_dma_copy_unaligned(work, temp, chan, src_local) < 0)
+		goto free_temp_buf;
+	if (!src_local)
+		work->fence_type = SCIF_DMA_INTR;
+	return 0;
+free_temp_buf:
+	if (comp_cb->is_cache)
+		kmem_cache_free(unaligned_cache, comp_cb->temp_buf_to_free);
+	else
+		kfree(comp_cb->temp_buf_to_free);
+free_comp_cb:
+	kfree(comp_cb);
+error:
+	return -ENOMEM;
+}
+
+/**
+ * scif_rma_copy:
+ * @epd: end point descriptor.
+ * @loffset: offset in local registered address space to/from which to copy
+ * @addr: user virtual address to/from which to copy
+ * @len: length of range to copy
+ * @roffset: offset in remote registered address space to/from which to copy
+ * @flags: flags
+ * @dir: LOCAL->REMOTE or vice versa.
+ * @last_chunk: true if this is the last chunk of a larger transfer
+ *
+ * Validate parameters, check if src/dst registered ranges requested for copy
+ * are valid and initiate either CPU or DMA copy.
+ */
+static int scif_rma_copy(scif_epd_t epd, off_t loffset, unsigned long addr,
+			 size_t len, off_t roffset, int flags,
+			 enum scif_rma_dir dir, bool last_chunk)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_rma_req remote_req;
+	struct scif_rma_req req;
+	struct scif_window *local_window = NULL;
+	struct scif_window *remote_window = NULL;
+	struct scif_copy_work copy_work;
+	bool loopback;
+	int err = 0;
+	struct dma_chan *chan;
+	struct scif_mmu_notif *mmn = NULL;
+	bool cache = false;
+	struct device *spdev;
+
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	if (flags && !(flags & (SCIF_RMA_USECPU | SCIF_RMA_USECACHE |
+				SCIF_RMA_SYNC | SCIF_RMA_ORDERED)))
+		return -EINVAL;
+
+	loopback = scifdev_self(ep->remote_dev) ? true : false;
+	copy_work.fence_type = ((flags & SCIF_RMA_SYNC) && last_chunk) ?
+				SCIF_DMA_POLL : 0;
+	copy_work.ordered = !!((flags & SCIF_RMA_ORDERED) && last_chunk);
+
+	/* Use CPU for Mgmt node <-> Mgmt node copies */
+	if (loopback && scif_is_mgmt_node()) {
+		flags |= SCIF_RMA_USECPU;
+		copy_work.fence_type = 0x0;
+	}
+
+	cache = scif_is_set_reg_cache(flags);
+
+	remote_req.out_window = &remote_window;
+	remote_req.offset = roffset;
+	remote_req.nr_bytes = len;
+	/*
+	 * If transfer is from local to remote then the remote window
+	 * must be writeable and vice versa.
+	 */
+	remote_req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_WRITE : VM_READ;
+	remote_req.type = SCIF_WINDOW_PARTIAL;
+	remote_req.head = &ep->rma_info.remote_reg_list;
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		return err;
+	}
+
+	if (addr && cache) {
+		mutex_lock(&ep->rma_info.mmn_lock);
+		mmn = scif_find_mmu_notifier(current->mm, &ep->rma_info);
+		if (!mmn)
+			scif_add_mmu_notifier(current->mm, ep);
+		mutex_unlock(&ep->rma_info.mmn_lock);
+		if (IS_ERR(mmn)) {
+			scif_put_peer_dev(spdev);
+			return PTR_ERR(mmn);
+		}
+		cache = cache && !scif_rma_tc_can_cache(ep, len);
+	}
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (addr) {
+		req.out_window = &local_window;
+		req.nr_bytes = ALIGN(len + (addr & ~PAGE_MASK),
+				     PAGE_SIZE);
+		req.va_for_temp = addr & PAGE_MASK;
+		req.prot = (dir == SCIF_LOCAL_TO_REMOTE ?
+			    VM_READ : VM_WRITE | VM_READ);
+		/* Does a valid local window exist? */
+		if (mmn) {
+			spin_lock(&ep->rma_info.tc_lock);
+			req.head = &mmn->tc_reg_list;
+			err = scif_query_tcw(ep, &req);
+			spin_unlock(&ep->rma_info.tc_lock);
+		}
+		if (!mmn || err) {
+			err = scif_register_temp(epd, req.va_for_temp,
+						 req.nr_bytes, req.prot,
+						 &loffset, &local_window);
+			if (err) {
+				mutex_unlock(&ep->rma_info.rma_lock);
+				goto error;
+			}
+			if (!cache)
+				goto skip_cache;
+			atomic_inc(&ep->rma_info.tcw_refcount);
+			atomic_add_return(local_window->nr_pages,
+					  &ep->rma_info.tcw_total_pages);
+			if (mmn) {
+				spin_lock(&ep->rma_info.tc_lock);
+				scif_insert_tcw(local_window,
+						&mmn->tc_reg_list);
+				spin_unlock(&ep->rma_info.tc_lock);
+			}
+		}
+skip_cache:
+		loffset = local_window->offset +
+				(addr - local_window->va_for_temp);
+	} else {
+		req.out_window = &local_window;
+		req.offset = loffset;
+		/*
+		 * If transfer is from local to remote then the self window
+		 * must be readable and vice versa.
+		 */
+		req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_READ : VM_WRITE;
+		req.nr_bytes = len;
+		req.type = SCIF_WINDOW_PARTIAL;
+		req.head = &ep->rma_info.reg_list;
+		/* Does a valid local window exist? */
+		err = scif_query_window(&req);
+		if (err) {
+			mutex_unlock(&ep->rma_info.rma_lock);
+			goto error;
+		}
+	}
+
+	/* Does a valid remote window exist? */
+	err = scif_query_window(&remote_req);
+	if (err) {
+		mutex_unlock(&ep->rma_info.rma_lock);
+		goto error;
+	}
+
+	/*
+	 * Prepare copy_work for submitting work to the DMA kernel thread
+	 * or CPU copy routine.
+	 */
+	copy_work.len = len;
+	copy_work.loopback = loopback;
+	copy_work.remote_dev = ep->remote_dev;
+	if (dir == SCIF_LOCAL_TO_REMOTE) {
+		copy_work.src_offset = loffset;
+		copy_work.src_window = local_window;
+		copy_work.dst_offset = roffset;
+		copy_work.dst_window = remote_window;
+	} else {
+		copy_work.src_offset = roffset;
+		copy_work.src_window = remote_window;
+		copy_work.dst_offset = loffset;
+		copy_work.dst_window = local_window;
+	}
+
+	if (flags & SCIF_RMA_USECPU) {
+		scif_rma_list_cpu_copy(&copy_work);
+	} else {
+		chan = ep->rma_info.dma_chan;
+		err = scif_rma_list_dma_copy_wrapper(epd, &copy_work,
+						     chan, loffset);
+	}
+	if (addr && !cache)
+		atomic_inc(&ep->rma_info.tw_refcount);
+
+	mutex_unlock(&ep->rma_info.rma_lock);
+
+	if (last_chunk) {
+		struct scif_dev *rdev = ep->remote_dev;
+
+		if (copy_work.fence_type == SCIF_DMA_POLL)
+			err = scif_drain_dma_poll(rdev->sdev,
+						  ep->rma_info.dma_chan);
+		else if (copy_work.fence_type == SCIF_DMA_INTR)
+			err = scif_drain_dma_intr(rdev->sdev,
+						  ep->rma_info.dma_chan);
+	}
+
+	if (addr && !cache)
+		scif_queue_for_cleanup(local_window, &scif_info.rma);
+	scif_put_peer_dev(spdev);
+	return err;
+error:
+	if (err) {
+		if (addr && local_window && !cache)
+			scif_destroy_window(ep, local_window);
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d len 0x%lx\n",
+			__func__, __LINE__, err, len);
+	}
+	scif_put_peer_dev(spdev);
+	return err;
+}
+
+int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len,
+		  off_t roffset, int flags)
+{
+	int err;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI readfrom: ep %p loffset 0x%lx len 0x%lx offset 0x%lx flags 0x%x\n",
+		epd, loffset, len, roffset, flags);
+	if (scif_unaligned(loffset, roffset)) {
+		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+			err = scif_rma_copy(epd, loffset, 0x0,
+					    SCIF_MAX_UNALIGNED_BUF_SIZE,
+					    roffset, flags,
+					    SCIF_REMOTE_TO_LOCAL, false);
+			if (err)
+				goto readfrom_err;
+			loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = scif_rma_copy(epd, loffset, 0x0, len,
+			    roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
+readfrom_err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_readfrom);
+
+int scif_writeto(scif_epd_t epd, off_t loffset, size_t len,
+		 off_t roffset, int flags)
+{
+	int err;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI writeto: ep %p loffset 0x%lx len 0x%lx roffset 0x%lx flags 0x%x\n",
+		epd, loffset, len, roffset, flags);
+	if (scif_unaligned(loffset, roffset)) {
+		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+			err = scif_rma_copy(epd, loffset, 0x0,
+					    SCIF_MAX_UNALIGNED_BUF_SIZE,
+					    roffset, flags,
+					    SCIF_LOCAL_TO_REMOTE, false);
+			if (err)
+				goto writeto_err;
+			loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = scif_rma_copy(epd, loffset, 0x0, len,
+			    roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
+writeto_err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_writeto);
+
+int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len,
+		   off_t roffset, int flags)
+{
+	int err;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI vreadfrom: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
+		epd, addr, len, roffset, flags);
+	if (scif_unaligned((off_t __force)addr, roffset)) {
+		if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
+			flags &= ~SCIF_RMA_USECACHE;
+
+		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+			err = scif_rma_copy(epd, 0, (u64)addr,
+					    SCIF_MAX_UNALIGNED_BUF_SIZE,
+					    roffset, flags,
+					    SCIF_REMOTE_TO_LOCAL, false);
+			if (err)
+				goto vreadfrom_err;
+			addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = scif_rma_copy(epd, 0, (u64)addr, len,
+			    roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
+vreadfrom_err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_vreadfrom);
+
+int scif_vwriteto(scif_epd_t epd, void *addr, size_t len,
+		  off_t roffset, int flags)
+{
+	int err;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI vwriteto: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
+		epd, addr, len, roffset, flags);
+	if (scif_unaligned((off_t __force)addr, roffset)) {
+		if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
+			flags &= ~SCIF_RMA_USECACHE;
+
+		while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
+			err = scif_rma_copy(epd, 0, (u64)addr,
+					    SCIF_MAX_UNALIGNED_BUF_SIZE,
+					    roffset, flags,
+					    SCIF_LOCAL_TO_REMOTE, false);
+			if (err)
+				goto vwriteto_err;
+			addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
+			len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = scif_rma_copy(epd, 0, (u64)addr, len,
+			    roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
+vwriteto_err:
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_vwriteto);
diff --git a/kernel/drivers/misc/mic/scif/scif_epd.c b/kernel/drivers/misc/mic/scif/scif_epd.c
new file mode 100644
index 000000000..00e5d6d66
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_epd.c
@@ -0,0 +1,357 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+#include "scif_map.h"
+
+void scif_cleanup_ep_qp(struct scif_endpt *ep)
+{
+	struct scif_qp *qp = ep->qp_info.qp;
+
+	if (qp->outbound_q.rb_base) {
+		scif_iounmap((void *)qp->outbound_q.rb_base,
+			     qp->outbound_q.size, ep->remote_dev);
+		qp->outbound_q.rb_base = NULL;
+	}
+	if (qp->remote_qp) {
+		scif_iounmap((void *)qp->remote_qp,
+			     sizeof(struct scif_qp), ep->remote_dev);
+		qp->remote_qp = NULL;
+	}
+	if (qp->local_qp) {
+		scif_unmap_single(qp->local_qp, ep->remote_dev,
+				  sizeof(struct scif_qp));
+		qp->local_qp = 0x0;
+	}
+	if (qp->local_buf) {
+		scif_unmap_single(qp->local_buf, ep->remote_dev,
+				  SCIF_ENDPT_QP_SIZE);
+		qp->local_buf = 0;
+	}
+}
+
+void scif_teardown_ep(void *endpt)
+{
+	struct scif_endpt *ep = endpt;
+	struct scif_qp *qp = ep->qp_info.qp;
+
+	if (qp) {
+		spin_lock(&ep->lock);
+		scif_cleanup_ep_qp(ep);
+		spin_unlock(&ep->lock);
+		kfree(qp->inbound_q.rb_base);
+		kfree(qp);
+	}
+}
+
+/*
+ * Enqueue the endpoint to the zombie list for cleanup.
+ * The endpoint should not be accessed once this API returns.
+ */
+void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held)
+{
+	if (!eplock_held)
+		mutex_lock(&scif_info.eplock);
+	spin_lock(&ep->lock);
+	ep->state = SCIFEP_ZOMBIE;
+	spin_unlock(&ep->lock);
+	list_add_tail(&ep->list, &scif_info.zombie);
+	scif_info.nr_zombies++;
+	if (!eplock_held)
+		mutex_unlock(&scif_info.eplock);
+	schedule_work(&scif_info.misc_work);
+}
+
+static struct scif_endpt *scif_find_listen_ep(u16 port)
+{
+	struct scif_endpt *ep = NULL;
+	struct list_head *pos, *tmpq;
+
+	mutex_lock(&scif_info.eplock);
+	list_for_each_safe(pos, tmpq, &scif_info.listen) {
+		ep = list_entry(pos, struct scif_endpt, list);
+		if (ep->port.port == port) {
+			mutex_unlock(&scif_info.eplock);
+			return ep;
+		}
+	}
+	mutex_unlock(&scif_info.eplock);
+	return NULL;
+}
+
+void scif_cleanup_zombie_epd(void)
+{
+	struct list_head *pos, *tmpq;
+	struct scif_endpt *ep;
+
+	mutex_lock(&scif_info.eplock);
+	list_for_each_safe(pos, tmpq, &scif_info.zombie) {
+		ep = list_entry(pos, struct scif_endpt, list);
+		if (scif_rma_ep_can_uninit(ep)) {
+			list_del(pos);
+			scif_info.nr_zombies--;
+			put_iova_domain(&ep->rma_info.iovad);
+			kfree(ep);
+		}
+	}
+	mutex_unlock(&scif_info.eplock);
+}
+
+/**
+ * scif_cnctreq() - Respond to SCIF_CNCT_REQ interrupt message
+ * @msg:        Interrupt message
+ *
+ * This message is initiated by the remote node to request a connection
+ * to the local node.  This function looks for an end point in the
+ * listen state on the requested port id.
+ *
+ * If it finds a listening port it places the connect request on the
+ * listening end points queue and wakes up any pending accept calls.
+ *
+ * If it does not find a listening end point it sends a connection
+ * reject message to the remote node.
+ */
+void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = NULL;
+	struct scif_conreq *conreq;
+
+	conreq = kmalloc(sizeof(*conreq), GFP_KERNEL);
+	if (!conreq)
+		/* Lack of resources so reject the request. */
+		goto conreq_sendrej;
+
+	ep = scif_find_listen_ep(msg->dst.port);
+	if (!ep)
+		/*  Send reject due to no listening ports */
+		goto conreq_sendrej_free;
+	else
+		spin_lock(&ep->lock);
+
+	if (ep->backlog <= ep->conreqcnt) {
+		/*  Send reject due to too many pending requests */
+		spin_unlock(&ep->lock);
+		goto conreq_sendrej_free;
+	}
+
+	conreq->msg = *msg;
+	list_add_tail(&conreq->list, &ep->conlist);
+	ep->conreqcnt++;
+	wake_up_interruptible(&ep->conwq);
+	spin_unlock(&ep->lock);
+	return;
+
+conreq_sendrej_free:
+	kfree(conreq);
+conreq_sendrej:
+	msg->uop = SCIF_CNCT_REJ;
+	scif_nodeqp_send(&scif_dev[msg->src.node], msg);
+}
+
+/**
+ * scif_cnctgnt() - Respond to SCIF_CNCT_GNT interrupt message
+ * @msg:        Interrupt message
+ *
+ * An accept() on the remote node has occurred and sent this message
+ * to indicate success.  Place the end point in the MAPPING state and
+ * save the remote nodes memory information.  Then wake up the connect
+ * request so it can finish.
+ */
+void scif_cnctgnt(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	if (SCIFEP_CONNECTING == ep->state) {
+		ep->peer.node = msg->src.node;
+		ep->peer.port = msg->src.port;
+		ep->qp_info.gnt_pld = msg->payload[1];
+		ep->remote_ep = msg->payload[2];
+		ep->state = SCIFEP_MAPPING;
+
+		wake_up(&ep->conwq);
+	}
+	spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_cnctgnt_ack() - Respond to SCIF_CNCT_GNTACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * The remote connection request has finished mapping the local memory.
+ * Place the connection in the connected state and wake up the pending
+ * accept() call.
+ */
+void scif_cnctgnt_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	mutex_lock(&scif_info.connlock);
+	spin_lock(&ep->lock);
+	/* New ep is now connected with all resources set. */
+	ep->state = SCIFEP_CONNECTED;
+	list_add_tail(&ep->list, &scif_info.connected);
+	wake_up(&ep->conwq);
+	spin_unlock(&ep->lock);
+	mutex_unlock(&scif_info.connlock);
+}
+
+/**
+ * scif_cnctgnt_nack() - Respond to SCIF_CNCT_GNTNACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * The remote connection request failed to map the local memory it was sent.
+ * Place the end point in the CLOSING state to indicate it and wake up
+ * the pending accept();
+ */
+void scif_cnctgnt_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	ep->state = SCIFEP_CLOSING;
+	wake_up(&ep->conwq);
+	spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_cnctrej() - Respond to SCIF_CNCT_REJ interrupt message
+ * @msg:        Interrupt message
+ *
+ * The remote end has rejected the connection request.  Set the end
+ * point back to the bound state and wake up the pending connect().
+ */
+void scif_cnctrej(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	if (SCIFEP_CONNECTING == ep->state) {
+		ep->state = SCIFEP_BOUND;
+		wake_up(&ep->conwq);
+	}
+	spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_discnct() - Respond to SCIF_DISCNCT interrupt message
+ * @msg:        Interrupt message
+ *
+ * The remote node has indicated close() has been called on its end
+ * point.  Remove the local end point from the connected list, set its
+ * state to disconnected and ensure accesses to the remote node are
+ * shutdown.
+ *
+ * When all accesses to the remote end have completed then send a
+ * DISCNT_ACK to indicate it can remove its resources and complete
+ * the close routine.
+ */
+void scif_discnct(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = NULL;
+	struct scif_endpt *tmpep;
+	struct list_head *pos, *tmpq;
+
+	mutex_lock(&scif_info.connlock);
+	list_for_each_safe(pos, tmpq, &scif_info.connected) {
+		tmpep = list_entry(pos, struct scif_endpt, list);
+		/*
+		 * The local ep may have sent a disconnect and and been closed
+		 * due to a message response time out. It may have been
+		 * allocated again and formed a new connection so we want to
+		 * check if the remote ep matches
+		 */
+		if (((u64)tmpep == msg->payload[1]) &&
+		    ((u64)tmpep->remote_ep == msg->payload[0])) {
+			list_del(pos);
+			ep = tmpep;
+			spin_lock(&ep->lock);
+			break;
+		}
+	}
+
+	/*
+	 * If the terminated end is not found then this side started closing
+	 * before the other side sent the disconnect.  If so the ep will no
+	 * longer be on the connected list.  Regardless the other side
+	 * needs to be acked to let it know close is complete.
+	 */
+	if (!ep) {
+		mutex_unlock(&scif_info.connlock);
+		goto discnct_ack;
+	}
+
+	ep->state = SCIFEP_DISCONNECTED;
+	list_add_tail(&ep->list, &scif_info.disconnected);
+
+	wake_up_interruptible(&ep->sendwq);
+	wake_up_interruptible(&ep->recvwq);
+	spin_unlock(&ep->lock);
+	mutex_unlock(&scif_info.connlock);
+
+discnct_ack:
+	msg->uop = SCIF_DISCNT_ACK;
+	scif_nodeqp_send(&scif_dev[msg->src.node], msg);
+}
+
+/**
+ * scif_discnct_ack() - Respond to SCIF_DISCNT_ACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side has indicated it has not more references to local resources
+ */
+void scif_discnt_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	ep->state = SCIFEP_DISCONNECTED;
+	spin_unlock(&ep->lock);
+	complete(&ep->discon);
+}
+
+/**
+ * scif_clientsend() - Respond to SCIF_CLIENT_SEND interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side is confirming send or receive interrupt handling is complete.
+ */
+void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	if (SCIFEP_CONNECTED == ep->state)
+		wake_up_interruptible(&ep->recvwq);
+	spin_unlock(&ep->lock);
+}
+
+/**
+ * scif_clientrcvd() - Respond to SCIF_CLIENT_RCVD interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side is confirming send or receive interrupt handling is complete.
+ */
+void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+
+	spin_lock(&ep->lock);
+	if (SCIFEP_CONNECTED == ep->state)
+		wake_up_interruptible(&ep->sendwq);
+	spin_unlock(&ep->lock);
+}
diff --git a/kernel/drivers/misc/mic/scif/scif_epd.h b/kernel/drivers/misc/mic/scif/scif_epd.h
new file mode 100644
index 000000000..1771d7a9b
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_epd.h
@@ -0,0 +1,210 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_EPD_H
+#define SCIF_EPD_H
+
+#include <linux/delay.h>
+#include <linux/scif.h>
+#include <linux/scif_ioctl.h>
+
+#define SCIF_EPLOCK_HELD true
+
+enum scif_epd_state {
+	SCIFEP_UNBOUND,
+	SCIFEP_BOUND,
+	SCIFEP_LISTENING,
+	SCIFEP_CONNECTED,
+	SCIFEP_CONNECTING,
+	SCIFEP_MAPPING,
+	SCIFEP_CLOSING,
+	SCIFEP_CLLISTEN,
+	SCIFEP_DISCONNECTED,
+	SCIFEP_ZOMBIE
+};
+
+/*
+ * struct scif_conreq - Data structure added to the connection list.
+ *
+ * @msg: connection request message received
+ * @list: link to list of connection requests
+ */
+struct scif_conreq {
+	struct scifmsg msg;
+	struct list_head list;
+};
+
+/* Size of the RB for the Endpoint QP */
+#define SCIF_ENDPT_QP_SIZE 0x1000
+
+/*
+ * scif_endpt_qp_info - SCIF endpoint queue pair
+ *
+ * @qp - Qpair for this endpoint
+ * @qp_offset - DMA address of the QP
+ * @gnt_pld - Payload in a SCIF_CNCT_GNT message containing the
+ * physical address of the remote_qp.
+ */
+struct scif_endpt_qp_info {
+	struct scif_qp *qp;
+	dma_addr_t qp_offset;
+	dma_addr_t gnt_pld;
+};
+
+/*
+ * struct scif_endpt - The SCIF endpoint data structure
+ *
+ * @state: end point state
+ * @lock: lock synchronizing access to endpoint fields like state etc
+ * @port: self port information
+ * @peer: peer port information
+ * @backlog: maximum pending connection requests
+ * @qp_info: Endpoint QP information for SCIF messaging
+ * @remote_dev: scifdev used by this endpt to communicate with remote node.
+ * @remote_ep: remote endpoint
+ * @conreqcnt: Keep track of number of connection requests.
+ * @files: Open file information used to match the id passed in with
+ *         the flush routine.
+ * @conlist: list of connection requests
+ * @conwq: waitqueue for connection processing
+ * @discon: completion used during disconnection
+ * @sendwq: waitqueue used during sending messages
+ * @recvwq: waitqueue used during message receipt
+ * @sendlock: Synchronize ordering of messages sent
+ * @recvlock: Synchronize ordering of messages received
+ * @list: link to list of various endpoints like connected, listening etc
+ * @li_accept: pending ACCEPTREG
+ * @acceptcnt: pending ACCEPTREG cnt
+ * @liacceptlist: link to listen accept
+ * @miacceptlist: link to uaccept
+ * @listenep: associated listen ep
+ * @conn_work: Non blocking connect work
+ * @conn_port: Connection port
+ * @conn_err: Errors during connection
+ * @conn_async_state: Async connection
+ * @conn_pend_wq: Used by poll while waiting for incoming connections
+ * @conn_list: List of async connection requests
+ * @rma_info: Information for triggering SCIF RMA and DMA operations
+ * @mmu_list: link to list of MMU notifier cleanup work
+ * @anon: anonymous file for use in kernel mode scif poll
+ */
+struct scif_endpt {
+	enum scif_epd_state state;
+	spinlock_t lock;
+	struct scif_port_id port;
+	struct scif_port_id peer;
+	int backlog;
+	struct scif_endpt_qp_info qp_info;
+	struct scif_dev *remote_dev;
+	u64 remote_ep;
+	int conreqcnt;
+	struct files_struct *files;
+	struct list_head conlist;
+	wait_queue_head_t conwq;
+	struct completion discon;
+	wait_queue_head_t sendwq;
+	wait_queue_head_t recvwq;
+	struct mutex sendlock;
+	struct mutex recvlock;
+	struct list_head list;
+	struct list_head li_accept;
+	int acceptcnt;
+	struct list_head liacceptlist;
+	struct list_head miacceptlist;
+	struct scif_endpt *listenep;
+	struct scif_port_id conn_port;
+	int conn_err;
+	int conn_async_state;
+	wait_queue_head_t conn_pend_wq;
+	struct list_head conn_list;
+	struct scif_endpt_rma_info rma_info;
+	struct list_head mmu_list;
+	struct file *anon;
+};
+
+static inline int scifdev_alive(struct scif_endpt *ep)
+{
+	return _scifdev_alive(ep->remote_dev);
+}
+
+/*
+ * scif_verify_epd:
+ * ep: SCIF endpoint
+ *
+ * Checks several generic error conditions and returns the
+ * appropriate error.
+ */
+static inline int scif_verify_epd(struct scif_endpt *ep)
+{
+	if (ep->state == SCIFEP_DISCONNECTED)
+		return -ECONNRESET;
+
+	if (ep->state != SCIFEP_CONNECTED)
+		return -ENOTCONN;
+
+	if (!scifdev_alive(ep))
+		return -ENODEV;
+
+	return 0;
+}
+
+static inline int scif_anon_inode_getfile(scif_epd_t epd)
+{
+	epd->anon = anon_inode_getfile("scif", &scif_anon_fops, NULL, 0);
+	if (IS_ERR(epd->anon))
+		return PTR_ERR(epd->anon);
+	return 0;
+}
+
+static inline void scif_anon_inode_fput(scif_epd_t epd)
+{
+	if (epd->anon) {
+		fput(epd->anon);
+		epd->anon = NULL;
+	}
+}
+
+void scif_cleanup_zombie_epd(void);
+void scif_teardown_ep(void *endpt);
+void scif_cleanup_ep_qp(struct scif_endpt *ep);
+void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held);
+void scif_get_node_info(void);
+void scif_send_acks(struct scif_dev *dev);
+void scif_conn_handler(struct work_struct *work);
+int scif_rsrv_port(u16 port);
+void scif_get_port(u16 port);
+int scif_get_new_port(void);
+void scif_put_port(u16 port);
+int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags);
+int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags);
+void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctgnt(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctgnt_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctgnt_nack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_cnctrej(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_discnct(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_discnt_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg);
+int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block);
+int __scif_flush(scif_epd_t epd);
+int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd);
+unsigned int __scif_pollfd(struct file *f, poll_table *wait,
+			   struct scif_endpt *ep);
+int __scif_pin_pages(void *addr, size_t len, int *out_prot,
+		     int map_flags, scif_pinned_pages_t *pages);
+#endif /* SCIF_EPD_H */
diff --git a/kernel/drivers/misc/mic/scif/scif_fd.c b/kernel/drivers/misc/mic/scif/scif_fd.c
new file mode 100644
index 000000000..f7e826142
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_fd.c
@@ -0,0 +1,471 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+
+static int scif_fdopen(struct inode *inode, struct file *f)
+{
+	struct scif_endpt *priv = scif_open();
+
+	if (!priv)
+		return -ENOMEM;
+	f->private_data = priv;
+	return 0;
+}
+
+static int scif_fdclose(struct inode *inode, struct file *f)
+{
+	struct scif_endpt *priv = f->private_data;
+
+	return scif_close(priv);
+}
+
+static int scif_fdmmap(struct file *f, struct vm_area_struct *vma)
+{
+	struct scif_endpt *priv = f->private_data;
+
+	return scif_mmap(vma, priv);
+}
+
+static unsigned int scif_fdpoll(struct file *f, poll_table *wait)
+{
+	struct scif_endpt *priv = f->private_data;
+
+	return __scif_pollfd(f, wait, priv);
+}
+
+static int scif_fdflush(struct file *f, fl_owner_t id)
+{
+	struct scif_endpt *ep = f->private_data;
+
+	spin_lock(&ep->lock);
+	/*
+	 * The listening endpoint stashes the open file information before
+	 * waiting for incoming connections. The release callback would never be
+	 * called if the application closed the endpoint, while waiting for
+	 * incoming connections from a separate thread since the file descriptor
+	 * reference count is bumped up in the accept IOCTL. Call the flush
+	 * routine if the id matches the endpoint open file information so that
+	 * the listening endpoint can be woken up and the fd released.
+	 */
+	if (ep->files == id)
+		__scif_flush(ep);
+	spin_unlock(&ep->lock);
+	return 0;
+}
+
+static __always_inline void scif_err_debug(int err, const char *str)
+{
+	/*
+	 * ENOTCONN is a common uninteresting error which is
+	 * flooding debug messages to the console unnecessarily.
+	 */
+	if (err < 0 && err != -ENOTCONN)
+		dev_dbg(scif_info.mdev.this_device, "%s err %d\n", str, err);
+}
+
+static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+	struct scif_endpt *priv = f->private_data;
+	void __user *argp = (void __user *)arg;
+	int err = 0;
+	struct scifioctl_msg request;
+	bool non_block = false;
+
+	non_block = !!(f->f_flags & O_NONBLOCK);
+
+	switch (cmd) {
+	case SCIF_BIND:
+	{
+		int pn;
+
+		if (copy_from_user(&pn, argp, sizeof(pn)))
+			return -EFAULT;
+
+		pn = scif_bind(priv, pn);
+		if (pn < 0)
+			return pn;
+
+		if (copy_to_user(argp, &pn, sizeof(pn)))
+			return -EFAULT;
+
+		return 0;
+	}
+	case SCIF_LISTEN:
+		return scif_listen(priv, arg);
+	case SCIF_CONNECT:
+	{
+		struct scifioctl_connect req;
+		struct scif_endpt *ep = (struct scif_endpt *)priv;
+
+		if (copy_from_user(&req, argp, sizeof(req)))
+			return -EFAULT;
+
+		err = __scif_connect(priv, &req.peer, non_block);
+		if (err < 0)
+			return err;
+
+		req.self.node = ep->port.node;
+		req.self.port = ep->port.port;
+
+		if (copy_to_user(argp, &req, sizeof(req)))
+			return -EFAULT;
+
+		return 0;
+	}
+	/*
+	 * Accept is done in two halves.  The request ioctl does the basic
+	 * functionality of accepting the request and returning the information
+	 * about it including the internal ID of the end point.  The register
+	 * is done with the internal ID on a new file descriptor opened by the
+	 * requesting process.
+	 */
+	case SCIF_ACCEPTREQ:
+	{
+		struct scifioctl_accept request;
+		scif_epd_t *ep = (scif_epd_t *)&request.endpt;
+
+		if (copy_from_user(&request, argp, sizeof(request)))
+			return -EFAULT;
+
+		err = scif_accept(priv, &request.peer, ep, request.flags);
+		if (err < 0)
+			return err;
+
+		if (copy_to_user(argp, &request, sizeof(request))) {
+			scif_close(*ep);
+			return -EFAULT;
+		}
+		/*
+		 * Add to the list of user mode eps where the second half
+		 * of the accept is not yet completed.
+		 */
+		mutex_lock(&scif_info.eplock);
+		list_add_tail(&((*ep)->miacceptlist), &scif_info.uaccept);
+		list_add_tail(&((*ep)->liacceptlist), &priv->li_accept);
+		(*ep)->listenep = priv;
+		priv->acceptcnt++;
+		mutex_unlock(&scif_info.eplock);
+
+		return 0;
+	}
+	case SCIF_ACCEPTREG:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scif_endpt *newep;
+		struct scif_endpt *lisep;
+		struct scif_endpt *fep = NULL;
+		struct scif_endpt *tmpep;
+		struct list_head *pos, *tmpq;
+
+		/* Finally replace the pointer to the accepted endpoint */
+		if (copy_from_user(&newep, argp, sizeof(void *)))
+			return -EFAULT;
+
+		/* Remove form the user accept queue */
+		mutex_lock(&scif_info.eplock);
+		list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
+			tmpep = list_entry(pos,
+					   struct scif_endpt, miacceptlist);
+			if (tmpep == newep) {
+				list_del(pos);
+				fep = tmpep;
+				break;
+			}
+		}
+
+		if (!fep) {
+			mutex_unlock(&scif_info.eplock);
+			return -ENOENT;
+		}
+
+		lisep = newep->listenep;
+		list_for_each_safe(pos, tmpq, &lisep->li_accept) {
+			tmpep = list_entry(pos,
+					   struct scif_endpt, liacceptlist);
+			if (tmpep == newep) {
+				list_del(pos);
+				lisep->acceptcnt--;
+				break;
+			}
+		}
+
+		mutex_unlock(&scif_info.eplock);
+
+		/* Free the resources automatically created from the open. */
+		scif_anon_inode_fput(priv);
+		scif_teardown_ep(priv);
+		scif_add_epd_to_zombie_list(priv, !SCIF_EPLOCK_HELD);
+		f->private_data = newep;
+		return 0;
+	}
+	case SCIF_SEND:
+	{
+		struct scif_endpt *priv = f->private_data;
+
+		if (copy_from_user(&request, argp,
+				   sizeof(struct scifioctl_msg))) {
+			err = -EFAULT;
+			goto send_err;
+		}
+		err = scif_user_send(priv, (void __user *)request.msg,
+				     request.len, request.flags);
+		if (err < 0)
+			goto send_err;
+		if (copy_to_user(&
+				 ((struct scifioctl_msg __user *)argp)->out_len,
+				 &err, sizeof(err))) {
+			err = -EFAULT;
+			goto send_err;
+		}
+		err = 0;
+send_err:
+		scif_err_debug(err, "scif_send");
+		return err;
+	}
+	case SCIF_RECV:
+	{
+		struct scif_endpt *priv = f->private_data;
+
+		if (copy_from_user(&request, argp,
+				   sizeof(struct scifioctl_msg))) {
+			err = -EFAULT;
+			goto recv_err;
+		}
+
+		err = scif_user_recv(priv, (void __user *)request.msg,
+				     request.len, request.flags);
+		if (err < 0)
+			goto recv_err;
+
+		if (copy_to_user(&
+				 ((struct scifioctl_msg __user *)argp)->out_len,
+			&err, sizeof(err))) {
+			err = -EFAULT;
+			goto recv_err;
+		}
+		err = 0;
+recv_err:
+		scif_err_debug(err, "scif_recv");
+		return err;
+	}
+	case SCIF_GET_NODEIDS:
+	{
+		struct scifioctl_node_ids node_ids;
+		int entries;
+		u16 *nodes;
+		void __user *unodes, *uself;
+		u16 self;
+
+		if (copy_from_user(&node_ids, argp, sizeof(node_ids))) {
+			err = -EFAULT;
+			goto getnodes_err2;
+		}
+
+		entries = min_t(int, scif_info.maxid, node_ids.len);
+		nodes = kmalloc_array(entries, sizeof(u16), GFP_KERNEL);
+		if (entries && !nodes) {
+			err = -ENOMEM;
+			goto getnodes_err2;
+		}
+		node_ids.len = scif_get_node_ids(nodes, entries, &self);
+
+		unodes = (void __user *)node_ids.nodes;
+		if (copy_to_user(unodes, nodes, sizeof(u16) * entries)) {
+			err = -EFAULT;
+			goto getnodes_err1;
+		}
+
+		uself = (void __user *)node_ids.self;
+		if (copy_to_user(uself, &self, sizeof(u16))) {
+			err = -EFAULT;
+			goto getnodes_err1;
+		}
+
+		if (copy_to_user(argp, &node_ids, sizeof(node_ids))) {
+			err = -EFAULT;
+			goto getnodes_err1;
+		}
+getnodes_err1:
+		kfree(nodes);
+getnodes_err2:
+		return err;
+	}
+	case SCIF_REG:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_reg reg;
+		off_t ret;
+
+		if (copy_from_user(&reg, argp, sizeof(reg))) {
+			err = -EFAULT;
+			goto reg_err;
+		}
+		if (reg.flags & SCIF_MAP_KERNEL) {
+			err = -EINVAL;
+			goto reg_err;
+		}
+		ret = scif_register(priv, (void *)reg.addr, reg.len,
+				    reg.offset, reg.prot, reg.flags);
+		if (ret < 0) {
+			err = (int)ret;
+			goto reg_err;
+		}
+
+		if (copy_to_user(&((struct scifioctl_reg __user *)argp)
+				 ->out_offset, &ret, sizeof(reg.out_offset))) {
+			err = -EFAULT;
+			goto reg_err;
+		}
+		err = 0;
+reg_err:
+		scif_err_debug(err, "scif_register");
+		return err;
+	}
+	case SCIF_UNREG:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_unreg unreg;
+
+		if (copy_from_user(&unreg, argp, sizeof(unreg))) {
+			err = -EFAULT;
+			goto unreg_err;
+		}
+		err = scif_unregister(priv, unreg.offset, unreg.len);
+unreg_err:
+		scif_err_debug(err, "scif_unregister");
+		return err;
+	}
+	case SCIF_READFROM:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto readfrom_err;
+		}
+		err = scif_readfrom(priv, copy.loffset, copy.len, copy.roffset,
+				    copy.flags);
+readfrom_err:
+		scif_err_debug(err, "scif_readfrom");
+		return err;
+	}
+	case SCIF_WRITETO:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto writeto_err;
+		}
+		err = scif_writeto(priv, copy.loffset, copy.len, copy.roffset,
+				   copy.flags);
+writeto_err:
+		scif_err_debug(err, "scif_writeto");
+		return err;
+	}
+	case SCIF_VREADFROM:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto vreadfrom_err;
+		}
+		err = scif_vreadfrom(priv, (void __force *)copy.addr, copy.len,
+				     copy.roffset, copy.flags);
+vreadfrom_err:
+		scif_err_debug(err, "scif_vreadfrom");
+		return err;
+	}
+	case SCIF_VWRITETO:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto vwriteto_err;
+		}
+		err = scif_vwriteto(priv, (void __force *)copy.addr, copy.len,
+				    copy.roffset, copy.flags);
+vwriteto_err:
+		scif_err_debug(err, "scif_vwriteto");
+		return err;
+	}
+	case SCIF_FENCE_MARK:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_fence_mark mark;
+		int tmp_mark = 0;
+
+		if (copy_from_user(&mark, argp, sizeof(mark))) {
+			err = -EFAULT;
+			goto fence_mark_err;
+		}
+		err = scif_fence_mark(priv, mark.flags, &tmp_mark);
+		if (err)
+			goto fence_mark_err;
+		if (copy_to_user((void __user *)mark.mark, &tmp_mark,
+				 sizeof(tmp_mark))) {
+			err = -EFAULT;
+			goto fence_mark_err;
+		}
+fence_mark_err:
+		scif_err_debug(err, "scif_fence_mark");
+		return err;
+	}
+	case SCIF_FENCE_WAIT:
+	{
+		struct scif_endpt *priv = f->private_data;
+
+		err = scif_fence_wait(priv, arg);
+		scif_err_debug(err, "scif_fence_wait");
+		return err;
+	}
+	case SCIF_FENCE_SIGNAL:
+	{
+		struct scif_endpt *priv = f->private_data;
+		struct scifioctl_fence_signal signal;
+
+		if (copy_from_user(&signal, argp, sizeof(signal))) {
+			err = -EFAULT;
+			goto fence_signal_err;
+		}
+
+		err = scif_fence_signal(priv, signal.loff, signal.lval,
+					signal.roff, signal.rval, signal.flags);
+fence_signal_err:
+		scif_err_debug(err, "scif_fence_signal");
+		return err;
+	}
+	}
+	return -EINVAL;
+}
+
+const struct file_operations scif_fops = {
+	.open = scif_fdopen,
+	.release = scif_fdclose,
+	.unlocked_ioctl = scif_fdioctl,
+	.mmap = scif_fdmmap,
+	.poll = scif_fdpoll,
+	.flush = scif_fdflush,
+	.owner = THIS_MODULE,
+};
diff --git a/kernel/drivers/misc/mic/scif/scif_fence.c b/kernel/drivers/misc/mic/scif/scif_fence.c
new file mode 100644
index 000000000..7f2c96f57
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_fence.c
@@ -0,0 +1,771 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+
+#include "scif_main.h"
+
+/**
+ * scif_recv_mark: Handle SCIF_MARK request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested a mark.
+ */
+void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	int mark, err;
+
+	err = _scif_fence_mark(ep, &mark);
+	if (err)
+		msg->uop = SCIF_MARK_NACK;
+	else
+		msg->uop = SCIF_MARK_ACK;
+	msg->payload[0] = ep->remote_ep;
+	msg->payload[2] = mark;
+	scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_mark_resp: Handle SCIF_MARK_(N)ACK messages.
+ * @msg:	Interrupt message
+ *
+ * The peer has responded to a SCIF_MARK message.
+ */
+void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_fence_info *fence_req =
+		(struct scif_fence_info *)msg->payload[1];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (msg->uop == SCIF_MARK_ACK) {
+		fence_req->state = OP_COMPLETED;
+		fence_req->dma_mark = (int)msg->payload[2];
+	} else {
+		fence_req->state = OP_FAILED;
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+	complete(&fence_req->comp);
+}
+
+/**
+ * scif_recv_wait: Handle SCIF_WAIT request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested waiting on a fence.
+ */
+void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_remote_fence_info *fence;
+
+	/*
+	 * Allocate structure for remote fence information and
+	 * send a NACK if the allocation failed. The peer will
+	 * return ENOMEM upon receiving a NACK.
+	 */
+	fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence) {
+		msg->payload[0] = ep->remote_ep;
+		msg->uop = SCIF_WAIT_NACK;
+		scif_nodeqp_send(ep->remote_dev, msg);
+		return;
+	}
+
+	/* Prepare the fence request */
+	memcpy(&fence->msg, msg, sizeof(struct scifmsg));
+	INIT_LIST_HEAD(&fence->list);
+
+	/* Insert to the global remote fence request list */
+	mutex_lock(&scif_info.fencelock);
+	atomic_inc(&ep->rma_info.fence_refcount);
+	list_add_tail(&fence->list, &scif_info.fence);
+	mutex_unlock(&scif_info.fencelock);
+
+	schedule_work(&scif_info.misc_work);
+}
+
+/**
+ * scif_recv_wait_resp: Handle SCIF_WAIT_(N)ACK messages.
+ * @msg:	Interrupt message
+ *
+ * The peer has responded to a SCIF_WAIT message.
+ */
+void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_fence_info *fence_req =
+		(struct scif_fence_info *)msg->payload[1];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (msg->uop == SCIF_WAIT_ACK)
+		fence_req->state = OP_COMPLETED;
+	else
+		fence_req->state = OP_FAILED;
+	mutex_unlock(&ep->rma_info.rma_lock);
+	complete(&fence_req->comp);
+}
+
+/**
+ * scif_recv_sig_local: Handle SCIF_SIG_LOCAL request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested a signal on a local offset.
+ */
+void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	int err;
+
+	err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
+			       SCIF_WINDOW_SELF);
+	if (err)
+		msg->uop = SCIF_SIG_NACK;
+	else
+		msg->uop = SCIF_SIG_ACK;
+	msg->payload[0] = ep->remote_ep;
+	scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_sig_remote: Handle SCIF_SIGNAL_REMOTE request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested a signal on a remote offset.
+ */
+void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	int err;
+
+	err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
+			       SCIF_WINDOW_PEER);
+	if (err)
+		msg->uop = SCIF_SIG_NACK;
+	else
+		msg->uop = SCIF_SIG_ACK;
+	msg->payload[0] = ep->remote_ep;
+	scif_nodeqp_send(ep->remote_dev, msg);
+}
+
+/**
+ * scif_recv_sig_resp: Handle SCIF_SIG_(N)ACK messages.
+ * @msg:	Interrupt message
+ *
+ * The peer has responded to a signal request.
+ */
+void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_fence_info *fence_req =
+		(struct scif_fence_info *)msg->payload[3];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (msg->uop == SCIF_SIG_ACK)
+		fence_req->state = OP_COMPLETED;
+	else
+		fence_req->state = OP_FAILED;
+	mutex_unlock(&ep->rma_info.rma_lock);
+	complete(&fence_req->comp);
+}
+
+static inline void *scif_get_local_va(off_t off, struct scif_window *window)
+{
+	struct page **pages = window->pinned_pages->pages;
+	int page_nr = (off - window->offset) >> PAGE_SHIFT;
+	off_t page_off = off & ~PAGE_MASK;
+
+	return page_address(pages[page_nr]) + page_off;
+}
+
+static void scif_prog_signal_cb(void *arg)
+{
+	struct scif_status *status = arg;
+
+	dma_pool_free(status->ep->remote_dev->signal_pool, status,
+		      status->src_dma_addr);
+}
+
+static int _scif_prog_signal(scif_epd_t epd, dma_addr_t dst, u64 val)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct dma_chan *chan = ep->rma_info.dma_chan;
+	struct dma_device *ddev = chan->device;
+	bool x100 = !is_dma_copy_aligned(chan->device, 1, 1, 1);
+	struct dma_async_tx_descriptor *tx;
+	struct scif_status *status = NULL;
+	dma_addr_t src;
+	dma_cookie_t cookie;
+	int err;
+
+	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto alloc_fail;
+	}
+	cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		err = (int)cookie;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto alloc_fail;
+	}
+	dma_async_issue_pending(chan);
+	if (x100) {
+		/*
+		 * For X100 use the status descriptor to write the value to
+		 * the destination.
+		 */
+		tx = ddev->device_prep_dma_imm_data(chan, dst, val, 0);
+	} else {
+		status = dma_pool_alloc(ep->remote_dev->signal_pool, GFP_KERNEL,
+					&src);
+		if (!status) {
+			err = -ENOMEM;
+			dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+				__func__, __LINE__, err);
+			goto alloc_fail;
+		}
+		status->val = val;
+		status->src_dma_addr = src;
+		status->ep = ep;
+		src += offsetof(struct scif_status, val);
+		tx = ddev->device_prep_dma_memcpy(chan, dst, src, sizeof(val),
+						  DMA_PREP_INTERRUPT);
+	}
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto dma_fail;
+	}
+	if (!x100) {
+		tx->callback = scif_prog_signal_cb;
+		tx->callback_param = status;
+	}
+	cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		err = -EIO;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		goto dma_fail;
+	}
+	dma_async_issue_pending(chan);
+	return 0;
+dma_fail:
+	if (!x100)
+		dma_pool_free(ep->remote_dev->signal_pool, status,
+			      status->src_dma_addr);
+alloc_fail:
+	return err;
+}
+
+/*
+ * scif_prog_signal:
+ * @epd - Endpoint Descriptor
+ * @offset - registered address to write @val to
+ * @val - Value to be written at @offset
+ * @type - Type of the window.
+ *
+ * Arrange to write a value to the registered offset after ensuring that the
+ * offset provided is indeed valid.
+ */
+int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
+		     enum scif_window_type type)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_window *window = NULL;
+	struct scif_rma_req req;
+	dma_addr_t dst_dma_addr;
+	int err;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	req.out_window = &window;
+	req.offset = offset;
+	req.nr_bytes = sizeof(u64);
+	req.prot = SCIF_PROT_WRITE;
+	req.type = SCIF_WINDOW_SINGLE;
+	if (type == SCIF_WINDOW_SELF)
+		req.head = &ep->rma_info.reg_list;
+	else
+		req.head = &ep->rma_info.remote_reg_list;
+	/* Does a valid window exist? */
+	err = scif_query_window(&req);
+	if (err) {
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto unlock_ret;
+	}
+
+	if (scif_is_mgmt_node() && scifdev_self(ep->remote_dev)) {
+		u64 *dst_virt;
+
+		if (type == SCIF_WINDOW_SELF)
+			dst_virt = scif_get_local_va(offset, window);
+		else
+			dst_virt =
+			scif_get_local_va(offset, (struct scif_window *)
+					  window->peer_window);
+		*dst_virt = val;
+	} else {
+		dst_dma_addr = __scif_off_to_dma_addr(window, offset);
+		err = _scif_prog_signal(epd, dst_dma_addr, val);
+	}
+unlock_ret:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	return err;
+}
+
+static int _scif_fence_wait(scif_epd_t epd, int mark)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	dma_cookie_t cookie = mark & ~SCIF_REMOTE_FENCE;
+	int err;
+
+	/* Wait for DMA callback in scif_fence_mark_cb(..) */
+	err = wait_event_interruptible_timeout(ep->rma_info.markwq,
+					       dma_async_is_tx_complete(
+					       ep->rma_info.dma_chan,
+					       cookie, NULL, NULL) ==
+					       DMA_COMPLETE,
+					       SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err)
+		err = -ETIMEDOUT;
+	else if (err > 0)
+		err = 0;
+	return err;
+}
+
+/**
+ * scif_rma_handle_remote_fences:
+ *
+ * This routine services remote fence requests.
+ */
+void scif_rma_handle_remote_fences(void)
+{
+	struct list_head *item, *tmp;
+	struct scif_remote_fence_info *fence;
+	struct scif_endpt *ep;
+	int mark, err;
+
+	might_sleep();
+	mutex_lock(&scif_info.fencelock);
+	list_for_each_safe(item, tmp, &scif_info.fence) {
+		fence = list_entry(item, struct scif_remote_fence_info,
+				   list);
+		/* Remove fence from global list */
+		list_del(&fence->list);
+
+		/* Initiate the fence operation */
+		ep = (struct scif_endpt *)fence->msg.payload[0];
+		mark = fence->msg.payload[2];
+		err = _scif_fence_wait(ep, mark);
+		if (err)
+			fence->msg.uop = SCIF_WAIT_NACK;
+		else
+			fence->msg.uop = SCIF_WAIT_ACK;
+		fence->msg.payload[0] = ep->remote_ep;
+		scif_nodeqp_send(ep->remote_dev, &fence->msg);
+		kfree(fence);
+		if (!atomic_sub_return(1, &ep->rma_info.fence_refcount))
+			schedule_work(&scif_info.misc_work);
+	}
+	mutex_unlock(&scif_info.fencelock);
+}
+
+static int _scif_send_fence(scif_epd_t epd, int uop, int mark, int *out_mark)
+{
+	int err;
+	struct scifmsg msg;
+	struct scif_fence_info *fence_req;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+	fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
+	if (!fence_req) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	fence_req->state = OP_IN_PROGRESS;
+	init_completion(&fence_req->comp);
+
+	msg.src = ep->port;
+	msg.uop = uop;
+	msg.payload[0] = ep->remote_ep;
+	msg.payload[1] = (u64)fence_req;
+	if (uop == SCIF_WAIT)
+		msg.payload[2] = mark;
+	spin_lock(&ep->lock);
+	if (ep->state == SCIFEP_CONNECTED)
+		err = scif_nodeqp_send(ep->remote_dev, &msg);
+	else
+		err = -ENOTCONN;
+	spin_unlock(&ep->lock);
+	if (err)
+		goto error_free;
+retry:
+	/* Wait for a SCIF_WAIT_(N)ACK message */
+	err = wait_for_completion_timeout(&fence_req->comp,
+					  SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+	if (!err)
+		err = -ENODEV;
+	if (err > 0)
+		err = 0;
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (err < 0) {
+		if (fence_req->state == OP_IN_PROGRESS)
+			fence_req->state = OP_FAILED;
+	}
+	if (fence_req->state == OP_FAILED && !err)
+		err = -ENOMEM;
+	if (uop == SCIF_MARK && fence_req->state == OP_COMPLETED)
+		*out_mark = SCIF_REMOTE_FENCE | fence_req->dma_mark;
+	mutex_unlock(&ep->rma_info.rma_lock);
+error_free:
+	kfree(fence_req);
+error:
+	return err;
+}
+
+/**
+ * scif_send_fence_mark:
+ * @epd: end point descriptor.
+ * @out_mark: Output DMA mark reported by peer.
+ *
+ * Send a remote fence mark request.
+ */
+static int scif_send_fence_mark(scif_epd_t epd, int *out_mark)
+{
+	return _scif_send_fence(epd, SCIF_MARK, 0, out_mark);
+}
+
+/**
+ * scif_send_fence_wait:
+ * @epd: end point descriptor.
+ * @mark: DMA mark to wait for.
+ *
+ * Send a remote fence wait request.
+ */
+static int scif_send_fence_wait(scif_epd_t epd, int mark)
+{
+	return _scif_send_fence(epd, SCIF_WAIT, mark, NULL);
+}
+
+static int _scif_send_fence_signal_wait(struct scif_endpt *ep,
+					struct scif_fence_info *fence_req)
+{
+	int err;
+
+retry:
+	/* Wait for a SCIF_SIG_(N)ACK message */
+	err = wait_for_completion_timeout(&fence_req->comp,
+					  SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+	if (!err)
+		err = -ENODEV;
+	if (err > 0)
+		err = 0;
+	if (err < 0) {
+		mutex_lock(&ep->rma_info.rma_lock);
+		if (fence_req->state == OP_IN_PROGRESS)
+			fence_req->state = OP_FAILED;
+		mutex_unlock(&ep->rma_info.rma_lock);
+	}
+	if (fence_req->state == OP_FAILED && !err)
+		err = -ENXIO;
+	return err;
+}
+
+/**
+ * scif_send_fence_signal:
+ * @epd - endpoint descriptor
+ * @loff - local offset
+ * @lval - local value to write to loffset
+ * @roff - remote offset
+ * @rval - remote value to write to roffset
+ * @flags - flags
+ *
+ * Sends a remote fence signal request
+ */
+static int scif_send_fence_signal(scif_epd_t epd, off_t roff, u64 rval,
+				  off_t loff, u64 lval, int flags)
+{
+	int err = 0;
+	struct scifmsg msg;
+	struct scif_fence_info *fence_req;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+
+	fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
+	if (!fence_req) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	fence_req->state = OP_IN_PROGRESS;
+	init_completion(&fence_req->comp);
+	msg.src = ep->port;
+	if (flags & SCIF_SIGNAL_LOCAL) {
+		msg.uop = SCIF_SIG_LOCAL;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = roff;
+		msg.payload[2] = rval;
+		msg.payload[3] = (u64)fence_req;
+		spin_lock(&ep->lock);
+		if (ep->state == SCIFEP_CONNECTED)
+			err = scif_nodeqp_send(ep->remote_dev, &msg);
+		else
+			err = -ENOTCONN;
+		spin_unlock(&ep->lock);
+		if (err)
+			goto error_free;
+		err = _scif_send_fence_signal_wait(ep, fence_req);
+		if (err)
+			goto error_free;
+	}
+	fence_req->state = OP_IN_PROGRESS;
+
+	if (flags & SCIF_SIGNAL_REMOTE) {
+		msg.uop = SCIF_SIG_REMOTE;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = loff;
+		msg.payload[2] = lval;
+		msg.payload[3] = (u64)fence_req;
+		spin_lock(&ep->lock);
+		if (ep->state == SCIFEP_CONNECTED)
+			err = scif_nodeqp_send(ep->remote_dev, &msg);
+		else
+			err = -ENOTCONN;
+		spin_unlock(&ep->lock);
+		if (err)
+			goto error_free;
+		err = _scif_send_fence_signal_wait(ep, fence_req);
+	}
+error_free:
+	kfree(fence_req);
+error:
+	return err;
+}
+
+static void scif_fence_mark_cb(void *arg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)arg;
+
+	wake_up_interruptible(&ep->rma_info.markwq);
+	atomic_dec(&ep->rma_info.fence_refcount);
+}
+
+/*
+ * _scif_fence_mark:
+ *
+ * @epd - endpoint descriptor
+ * Set up a mark for this endpoint and return the value of the mark.
+ */
+int _scif_fence_mark(scif_epd_t epd, int *mark)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct dma_chan *chan = ep->rma_info.dma_chan;
+	struct dma_device *ddev = chan->device;
+	struct dma_async_tx_descriptor *tx;
+	dma_cookie_t cookie;
+	int err;
+
+	tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		err = (int)cookie;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	dma_async_issue_pending(chan);
+	tx = ddev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT);
+	if (!tx) {
+		err = -ENOMEM;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	tx->callback = scif_fence_mark_cb;
+	tx->callback_param = ep;
+	*mark = cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		err = (int)cookie;
+		dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
+			__func__, __LINE__, err);
+		return err;
+	}
+	atomic_inc(&ep->rma_info.fence_refcount);
+	dma_async_issue_pending(chan);
+	return 0;
+}
+
+#define SCIF_LOOPB_MAGIC_MARK 0xdead
+
+int scif_fence_mark(scif_epd_t epd, int flags, int *mark)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x\n",
+		ep, flags, *mark);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	/* Invalid flags? */
+	if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER))
+		return -EINVAL;
+
+	/* At least one of init self or peer RMA should be set */
+	if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
+		return -EINVAL;
+
+	/* Exactly one of init self or peer RMA should be set but not both */
+	if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
+		return -EINVAL;
+
+	/*
+	 * Management node loopback does not need to use DMA.
+	 * Return a valid mark to be symmetric.
+	 */
+	if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
+		*mark = SCIF_LOOPB_MAGIC_MARK;
+		return 0;
+	}
+
+	if (flags & SCIF_FENCE_INIT_SELF)
+		err = _scif_fence_mark(epd, mark);
+	else
+		err = scif_send_fence_mark(ep, mark);
+
+	if (err)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x err %d\n",
+		ep, flags, *mark, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_mark);
+
+int scif_fence_wait(scif_epd_t epd, int mark)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI fence_wait: ep %p mark 0x%x\n",
+		ep, mark);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+	/*
+	 * Management node loopback does not need to use DMA.
+	 * The only valid mark provided is 0 so simply
+	 * return success if the mark is valid.
+	 */
+	if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
+		if (mark == SCIF_LOOPB_MAGIC_MARK)
+			return 0;
+		else
+			return -EINVAL;
+	}
+	if (mark & SCIF_REMOTE_FENCE)
+		err = scif_send_fence_wait(epd, mark);
+	else
+		err = _scif_fence_wait(epd, mark);
+	if (err < 0)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_wait);
+
+int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval,
+		      off_t roff, u64 rval, int flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	int err = 0;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI fence_signal: ep %p loff 0x%lx lval 0x%llx roff 0x%lx rval 0x%llx flags 0x%x\n",
+		ep, loff, lval, roff, rval, flags);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	/* Invalid flags? */
+	if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER |
+			SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE))
+		return -EINVAL;
+
+	/* At least one of init self or peer RMA should be set */
+	if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
+		return -EINVAL;
+
+	/* Exactly one of init self or peer RMA should be set but not both */
+	if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
+		return -EINVAL;
+
+	/* At least one of SCIF_SIGNAL_LOCAL or SCIF_SIGNAL_REMOTE required */
+	if (!(flags & (SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE)))
+		return -EINVAL;
+
+	/* Only Dword offsets allowed */
+	if ((flags & SCIF_SIGNAL_LOCAL) && (loff & (sizeof(u32) - 1)))
+		return -EINVAL;
+
+	/* Only Dword aligned offsets allowed */
+	if ((flags & SCIF_SIGNAL_REMOTE) && (roff & (sizeof(u32) - 1)))
+		return -EINVAL;
+
+	if (flags & SCIF_FENCE_INIT_PEER) {
+		err = scif_send_fence_signal(epd, roff, rval, loff,
+					     lval, flags);
+	} else {
+		/* Local Signal in Local RAS */
+		if (flags & SCIF_SIGNAL_LOCAL) {
+			err = scif_prog_signal(epd, loff, lval,
+					       SCIF_WINDOW_SELF);
+			if (err)
+				goto error_ret;
+		}
+
+		/* Signal in Remote RAS */
+		if (flags & SCIF_SIGNAL_REMOTE)
+			err = scif_prog_signal(epd, roff,
+					       rval, SCIF_WINDOW_PEER);
+	}
+error_ret:
+	if (err)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_fence_signal);
diff --git a/kernel/drivers/misc/mic/scif/scif_main.c b/kernel/drivers/misc/mic/scif/scif_main.c
new file mode 100644
index 000000000..36d847af1
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_main.c
@@ -0,0 +1,359 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/module.h>
+#include <linux/idr.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+#include "../bus/scif_bus.h"
+#include "scif_peer_bus.h"
+#include "scif_main.h"
+#include "scif_map.h"
+
+struct scif_info scif_info = {
+	.mdev = {
+		.minor = MISC_DYNAMIC_MINOR,
+		.name = "scif",
+		.fops = &scif_fops,
+	}
+};
+
+struct scif_dev *scif_dev;
+struct kmem_cache *unaligned_cache;
+static atomic_t g_loopb_cnt;
+
+/* Runs in the context of intr_wq */
+static void scif_intr_bh_handler(struct work_struct *work)
+{
+	struct scif_dev *scifdev =
+			container_of(work, struct scif_dev, intr_bh);
+
+	if (scifdev_self(scifdev))
+		scif_loopb_msg_handler(scifdev, scifdev->qpairs);
+	else
+		scif_nodeqp_intrhandler(scifdev, scifdev->qpairs);
+}
+
+int scif_setup_intr_wq(struct scif_dev *scifdev)
+{
+	if (!scifdev->intr_wq) {
+		snprintf(scifdev->intr_wqname, sizeof(scifdev->intr_wqname),
+			 "SCIF INTR %d", scifdev->node);
+		scifdev->intr_wq =
+			alloc_ordered_workqueue(scifdev->intr_wqname, 0);
+		if (!scifdev->intr_wq)
+			return -ENOMEM;
+		INIT_WORK(&scifdev->intr_bh, scif_intr_bh_handler);
+	}
+	return 0;
+}
+
+void scif_destroy_intr_wq(struct scif_dev *scifdev)
+{
+	if (scifdev->intr_wq) {
+		destroy_workqueue(scifdev->intr_wq);
+		scifdev->intr_wq = NULL;
+	}
+}
+
+irqreturn_t scif_intr_handler(int irq, void *data)
+{
+	struct scif_dev *scifdev = data;
+	struct scif_hw_dev *sdev = scifdev->sdev;
+
+	sdev->hw_ops->ack_interrupt(sdev, scifdev->db);
+	queue_work(scifdev->intr_wq, &scifdev->intr_bh);
+	return IRQ_HANDLED;
+}
+
+static void scif_qp_setup_handler(struct work_struct *work)
+{
+	struct scif_dev *scifdev = container_of(work, struct scif_dev,
+						qp_dwork.work);
+	struct scif_hw_dev *sdev = scifdev->sdev;
+	dma_addr_t da = 0;
+	int err;
+
+	if (scif_is_mgmt_node()) {
+		struct mic_bootparam *bp = sdev->dp;
+
+		da = bp->scif_card_dma_addr;
+		scifdev->rdb = bp->h2c_scif_db;
+	} else {
+		struct mic_bootparam __iomem *bp = sdev->rdp;
+
+		da = readq(&bp->scif_host_dma_addr);
+		scifdev->rdb = ioread8(&bp->c2h_scif_db);
+	}
+	if (da) {
+		err = scif_qp_response(da, scifdev);
+		if (err)
+			dev_err(&scifdev->sdev->dev,
+				"scif_qp_response err %d\n", err);
+	} else {
+		schedule_delayed_work(&scifdev->qp_dwork,
+				      msecs_to_jiffies(1000));
+	}
+}
+
+static int scif_setup_scifdev(void)
+{
+	/* We support a maximum of 129 SCIF nodes including the mgmt node */
+#define MAX_SCIF_NODES 129
+	int i;
+	u8 num_nodes = MAX_SCIF_NODES;
+
+	scif_dev = kcalloc(num_nodes, sizeof(*scif_dev), GFP_KERNEL);
+	if (!scif_dev)
+		return -ENOMEM;
+	for (i = 0; i < num_nodes; i++) {
+		struct scif_dev *scifdev = &scif_dev[i];
+
+		scifdev->node = i;
+		scifdev->exit = OP_IDLE;
+		init_waitqueue_head(&scifdev->disconn_wq);
+		mutex_init(&scifdev->lock);
+		INIT_WORK(&scifdev->peer_add_work, scif_add_peer_device);
+		INIT_DELAYED_WORK(&scifdev->p2p_dwork,
+				  scif_poll_qp_state);
+		INIT_DELAYED_WORK(&scifdev->qp_dwork,
+				  scif_qp_setup_handler);
+		INIT_LIST_HEAD(&scifdev->p2p);
+		RCU_INIT_POINTER(scifdev->spdev, NULL);
+	}
+	return 0;
+}
+
+static void scif_destroy_scifdev(void)
+{
+	kfree(scif_dev);
+}
+
+static int scif_probe(struct scif_hw_dev *sdev)
+{
+	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+	int rc;
+
+	dev_set_drvdata(&sdev->dev, sdev);
+	scifdev->sdev = sdev;
+
+	if (1 == atomic_add_return(1, &g_loopb_cnt)) {
+		struct scif_dev *loopb_dev = &scif_dev[sdev->snode];
+
+		loopb_dev->sdev = sdev;
+		rc = scif_setup_loopback_qp(loopb_dev);
+		if (rc)
+			goto exit;
+	}
+
+	rc = scif_setup_intr_wq(scifdev);
+	if (rc)
+		goto destroy_loopb;
+	rc = scif_setup_qp(scifdev);
+	if (rc)
+		goto destroy_intr;
+	scifdev->db = sdev->hw_ops->next_db(sdev);
+	scifdev->cookie = sdev->hw_ops->request_irq(sdev, scif_intr_handler,
+						    "SCIF_INTR", scifdev,
+						    scifdev->db);
+	if (IS_ERR(scifdev->cookie)) {
+		rc = PTR_ERR(scifdev->cookie);
+		goto free_qp;
+	}
+	if (scif_is_mgmt_node()) {
+		struct mic_bootparam *bp = sdev->dp;
+
+		bp->c2h_scif_db = scifdev->db;
+		bp->scif_host_dma_addr = scifdev->qp_dma_addr;
+	} else {
+		struct mic_bootparam __iomem *bp = sdev->rdp;
+
+		iowrite8(scifdev->db, &bp->h2c_scif_db);
+		writeq(scifdev->qp_dma_addr, &bp->scif_card_dma_addr);
+	}
+	schedule_delayed_work(&scifdev->qp_dwork,
+			      msecs_to_jiffies(1000));
+	return rc;
+free_qp:
+	scif_free_qp(scifdev);
+destroy_intr:
+	scif_destroy_intr_wq(scifdev);
+destroy_loopb:
+	if (atomic_dec_and_test(&g_loopb_cnt))
+		scif_destroy_loopback_qp(&scif_dev[sdev->snode]);
+exit:
+	return rc;
+}
+
+void scif_stop(struct scif_dev *scifdev)
+{
+	struct scif_dev *dev;
+	int i;
+
+	for (i = scif_info.maxid; i >= 0; i--) {
+		dev = &scif_dev[i];
+		if (scifdev_self(dev))
+			continue;
+		scif_handle_remove_node(i);
+	}
+}
+
+static void scif_remove(struct scif_hw_dev *sdev)
+{
+	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+
+	if (scif_is_mgmt_node()) {
+		struct mic_bootparam *bp = sdev->dp;
+
+		bp->c2h_scif_db = -1;
+		bp->scif_host_dma_addr = 0x0;
+	} else {
+		struct mic_bootparam __iomem *bp = sdev->rdp;
+
+		iowrite8(-1, &bp->h2c_scif_db);
+		writeq(0x0, &bp->scif_card_dma_addr);
+	}
+	if (scif_is_mgmt_node()) {
+		scif_disconnect_node(scifdev->node, true);
+	} else {
+		scif_info.card_initiated_exit = true;
+		scif_stop(scifdev);
+	}
+	if (atomic_dec_and_test(&g_loopb_cnt))
+		scif_destroy_loopback_qp(&scif_dev[sdev->snode]);
+	if (scifdev->cookie) {
+		sdev->hw_ops->free_irq(sdev, scifdev->cookie, scifdev);
+		scifdev->cookie = NULL;
+	}
+	scif_destroy_intr_wq(scifdev);
+	cancel_delayed_work(&scifdev->qp_dwork);
+	scif_free_qp(scifdev);
+	scifdev->rdb = -1;
+	scifdev->sdev = NULL;
+}
+
+static struct scif_hw_dev_id id_table[] = {
+	{ MIC_SCIF_DEV, SCIF_DEV_ANY_ID },
+	{ 0 },
+};
+
+static struct scif_driver scif_driver = {
+	.driver.name =	KBUILD_MODNAME,
+	.driver.owner =	THIS_MODULE,
+	.id_table = id_table,
+	.probe = scif_probe,
+	.remove = scif_remove,
+};
+
+static int _scif_init(void)
+{
+	int rc;
+
+	mutex_init(&scif_info.eplock);
+	spin_lock_init(&scif_info.rmalock);
+	spin_lock_init(&scif_info.nb_connect_lock);
+	spin_lock_init(&scif_info.port_lock);
+	mutex_init(&scif_info.conflock);
+	mutex_init(&scif_info.connlock);
+	mutex_init(&scif_info.fencelock);
+	INIT_LIST_HEAD(&scif_info.uaccept);
+	INIT_LIST_HEAD(&scif_info.listen);
+	INIT_LIST_HEAD(&scif_info.zombie);
+	INIT_LIST_HEAD(&scif_info.connected);
+	INIT_LIST_HEAD(&scif_info.disconnected);
+	INIT_LIST_HEAD(&scif_info.rma);
+	INIT_LIST_HEAD(&scif_info.rma_tc);
+	INIT_LIST_HEAD(&scif_info.mmu_notif_cleanup);
+	INIT_LIST_HEAD(&scif_info.fence);
+	INIT_LIST_HEAD(&scif_info.nb_connect_list);
+	init_waitqueue_head(&scif_info.exitwq);
+	scif_info.rma_tc_limit = SCIF_RMA_TEMP_CACHE_LIMIT;
+	scif_info.en_msg_log = 0;
+	scif_info.p2p_enable = 1;
+	rc = scif_setup_scifdev();
+	if (rc)
+		goto error;
+	unaligned_cache = kmem_cache_create("Unaligned_DMA",
+					    SCIF_KMEM_UNALIGNED_BUF_SIZE,
+					    0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!unaligned_cache) {
+		rc = -ENOMEM;
+		goto free_sdev;
+	}
+	INIT_WORK(&scif_info.misc_work, scif_misc_handler);
+	INIT_WORK(&scif_info.mmu_notif_work, scif_mmu_notif_handler);
+	INIT_WORK(&scif_info.conn_work, scif_conn_handler);
+	idr_init(&scif_ports);
+	return 0;
+free_sdev:
+	scif_destroy_scifdev();
+error:
+	return rc;
+}
+
+static void _scif_exit(void)
+{
+	idr_destroy(&scif_ports);
+	kmem_cache_destroy(unaligned_cache);
+	scif_destroy_scifdev();
+}
+
+static int __init scif_init(void)
+{
+	struct miscdevice *mdev = &scif_info.mdev;
+	int rc;
+
+	_scif_init();
+	iova_cache_get();
+	rc = scif_peer_bus_init();
+	if (rc)
+		goto exit;
+	rc = scif_register_driver(&scif_driver);
+	if (rc)
+		goto peer_bus_exit;
+	rc = misc_register(mdev);
+	if (rc)
+		goto unreg_scif;
+	scif_init_debugfs();
+	return 0;
+unreg_scif:
+	scif_unregister_driver(&scif_driver);
+peer_bus_exit:
+	scif_peer_bus_exit();
+exit:
+	_scif_exit();
+	return rc;
+}
+
+static void __exit scif_exit(void)
+{
+	scif_exit_debugfs();
+	misc_deregister(&scif_info.mdev);
+	scif_unregister_driver(&scif_driver);
+	scif_peer_bus_exit();
+	iova_cache_put();
+	_scif_exit();
+}
+
+module_init(scif_init);
+module_exit(scif_exit);
+
+MODULE_DEVICE_TABLE(scif, id_table);
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) SCIF driver");
+MODULE_LICENSE("GPL v2");
diff --git a/kernel/drivers/misc/mic/scif/scif_main.h b/kernel/drivers/misc/mic/scif/scif_main.h
new file mode 100644
index 000000000..a08f0b600
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_main.h
@@ -0,0 +1,283 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_MAIN_H
+#define SCIF_MAIN_H
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/miscdevice.h>
+#include <linux/dmaengine.h>
+#include <linux/iova.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/vmalloc.h>
+#include <linux/scif.h>
+#include "../common/mic_dev.h"
+
+#define SCIF_MGMT_NODE 0
+#define SCIF_DEFAULT_WATCHDOG_TO 30
+#define SCIF_NODE_ACCEPT_TIMEOUT (3 * HZ)
+#define SCIF_NODE_ALIVE_TIMEOUT (SCIF_DEFAULT_WATCHDOG_TO * HZ)
+#define SCIF_RMA_TEMP_CACHE_LIMIT 0x20000
+
+/*
+ * Generic state used for certain node QP message exchanges
+ * like Unregister, Alloc etc.
+ */
+enum scif_msg_state {
+	OP_IDLE = 1,
+	OP_IN_PROGRESS,
+	OP_COMPLETED,
+	OP_FAILED
+};
+
+/*
+ * struct scif_info - Global SCIF information
+ *
+ * @nodeid: Node ID this node is to others
+ * @maxid: Max known node ID
+ * @total: Total number of SCIF nodes
+ * @nr_zombies: number of zombie endpoints
+ * @eplock: Lock to synchronize listening, zombie endpoint lists
+ * @connlock: Lock to synchronize connected and disconnected lists
+ * @nb_connect_lock: Synchronize non blocking connect operations
+ * @port_lock: Synchronize access to SCIF ports
+ * @uaccept: List of user acceptreq waiting for acceptreg
+ * @listen: List of listening end points
+ * @zombie: List of zombie end points with pending RMA's
+ * @connected: List of end points in connected state
+ * @disconnected: List of end points in disconnected state
+ * @nb_connect_list: List for non blocking connections
+ * @misc_work: miscellaneous SCIF tasks
+ * @conflock: Lock to synchronize SCIF node configuration changes
+ * @en_msg_log: Enable debug message logging
+ * @p2p_enable: Enable P2P SCIF network
+ * @mdev: The MISC device
+ * @conn_work: Work for workqueue handling all connections
+ * @exitwq: Wait queue for waiting for an EXIT node QP message response
+ * @loopb_dev: Dummy SCIF device used for loopback
+ * @loopb_wq: Workqueue used for handling loopback messages
+ * @loopb_wqname[16]: Name of loopback workqueue
+ * @loopb_work: Used for submitting work to loopb_wq
+ * @loopb_recv_q: List of messages received on the loopb_wq
+ * @card_initiated_exit: set when the card has initiated the exit
+ * @rmalock: Synchronize access to RMA operations
+ * @fencelock: Synchronize access to list of remote fences requested.
+ * @rma: List of temporary registered windows to be destroyed.
+ * @rma_tc: List of temporary registered & cached Windows to be destroyed
+ * @fence: List of remote fence requests
+ * @mmu_notif_work: Work for registration caching MMU notifier workqueue
+ * @mmu_notif_cleanup: List of temporary cached windows for reg cache
+ * @rma_tc_limit: RMA temporary cache limit
+ */
+struct scif_info {
+	u8 nodeid;
+	u8 maxid;
+	u8 total;
+	u32 nr_zombies;
+	struct mutex eplock;
+	struct mutex connlock;
+	spinlock_t nb_connect_lock;
+	spinlock_t port_lock;
+	struct list_head uaccept;
+	struct list_head listen;
+	struct list_head zombie;
+	struct list_head connected;
+	struct list_head disconnected;
+	struct list_head nb_connect_list;
+	struct work_struct misc_work;
+	struct mutex conflock;
+	u8 en_msg_log;
+	u8 p2p_enable;
+	struct miscdevice mdev;
+	struct work_struct conn_work;
+	wait_queue_head_t exitwq;
+	struct scif_dev *loopb_dev;
+	struct workqueue_struct *loopb_wq;
+	char loopb_wqname[16];
+	struct work_struct loopb_work;
+	struct list_head loopb_recv_q;
+	bool card_initiated_exit;
+	spinlock_t rmalock;
+	struct mutex fencelock;
+	struct list_head rma;
+	struct list_head rma_tc;
+	struct list_head fence;
+	struct work_struct mmu_notif_work;
+	struct list_head mmu_notif_cleanup;
+	unsigned long rma_tc_limit;
+};
+
+/*
+ * struct scif_p2p_info - SCIF mapping information used for P2P
+ *
+ * @ppi_peer_id - SCIF peer node id
+ * @ppi_sg - Scatter list for bar information (One for mmio and one for aper)
+ * @sg_nentries - Number of entries in the scatterlist
+ * @ppi_da: DMA address for MMIO and APER bars
+ * @ppi_len: Length of MMIO and APER bars
+ * @ppi_list: Link in list of mapping information
+ */
+struct scif_p2p_info {
+	u8 ppi_peer_id;
+	struct scatterlist *ppi_sg[2];
+	u64 sg_nentries[2];
+	dma_addr_t ppi_da[2];
+	u64 ppi_len[2];
+#define SCIF_PPI_MMIO 0
+#define SCIF_PPI_APER 1
+	struct list_head ppi_list;
+};
+
+/*
+ * struct scif_dev - SCIF remote device specific fields
+ *
+ * @node: Node id
+ * @p2p: List of P2P mapping information
+ * @qpairs: The node queue pair for exchanging control messages
+ * @intr_wq: Workqueue for handling Node QP messages
+ * @intr_wqname: Name of node QP workqueue for handling interrupts
+ * @intr_bh: Used for submitting work to intr_wq
+ * @lock: Lock used for synchronizing access to the scif device
+ * @sdev: SCIF hardware device on the SCIF hardware bus
+ * @db: doorbell the peer will trigger to generate an interrupt on self
+ * @rdb: Doorbell to trigger on the peer to generate an interrupt on the peer
+ * @cookie: Cookie received while registering the interrupt handler
+ * @peer_add_work: Work for handling device_add for peer devices
+ * @p2p_dwork: Delayed work to enable polling for P2P state
+ * @qp_dwork: Delayed work for enabling polling for remote QP information
+ * @p2p_retry: Number of times to retry polling of P2P state
+ * @base_addr: P2P aperture bar base address
+ * @mic_mw mmio: The peer MMIO information used for P2P
+ * @spdev: SCIF peer device on the SCIF peer bus
+ * @node_remove_ack_pending: True if a node_remove_ack is pending
+ * @exit_ack_pending: true if an exit_ack is pending
+ * @disconn_wq: Used while waiting for a node remove response
+ * @disconn_rescnt: Keeps track of number of node remove requests sent
+ * @exit: Status of exit message
+ * @qp_dma_addr: Queue pair DMA address passed to the peer
+ * @dma_ch_idx: Round robin index for DMA channels
+ * @signal_pool: DMA pool used for scheduling scif_fence_signal DMA's
+*/
+struct scif_dev {
+	u8 node;
+	struct list_head p2p;
+	struct scif_qp *qpairs;
+	struct workqueue_struct *intr_wq;
+	char intr_wqname[16];
+	struct work_struct intr_bh;
+	struct mutex lock;
+	struct scif_hw_dev *sdev;
+	int db;
+	int rdb;
+	struct mic_irq *cookie;
+	struct work_struct peer_add_work;
+	struct delayed_work p2p_dwork;
+	struct delayed_work qp_dwork;
+	int p2p_retry;
+	dma_addr_t base_addr;
+	struct mic_mw mmio;
+	struct scif_peer_dev __rcu *spdev;
+	bool node_remove_ack_pending;
+	bool exit_ack_pending;
+	wait_queue_head_t disconn_wq;
+	atomic_t disconn_rescnt;
+	enum scif_msg_state exit;
+	dma_addr_t qp_dma_addr;
+	int dma_ch_idx;
+	struct dma_pool *signal_pool;
+};
+
+extern bool scif_reg_cache_enable;
+extern bool scif_ulimit_check;
+extern struct scif_info scif_info;
+extern struct idr scif_ports;
+extern struct bus_type scif_peer_bus;
+extern struct scif_dev *scif_dev;
+extern const struct file_operations scif_fops;
+extern const struct file_operations scif_anon_fops;
+
+/* Size of the RB for the Node QP */
+#define SCIF_NODE_QP_SIZE 0x10000
+
+#include "scif_nodeqp.h"
+#include "scif_rma.h"
+#include "scif_rma_list.h"
+
+/*
+ * scifdev_self:
+ * @dev: The remote SCIF Device
+ *
+ * Returns true if the SCIF Device passed is the self aka Loopback SCIF device.
+ */
+static inline int scifdev_self(struct scif_dev *dev)
+{
+	return dev->node == scif_info.nodeid;
+}
+
+static inline bool scif_is_mgmt_node(void)
+{
+	return !scif_info.nodeid;
+}
+
+/*
+ * scifdev_is_p2p:
+ * @dev: The remote SCIF Device
+ *
+ * Returns true if the SCIF Device is a MIC Peer to Peer SCIF device.
+ */
+static inline bool scifdev_is_p2p(struct scif_dev *dev)
+{
+	if (scif_is_mgmt_node())
+		return false;
+	else
+		return dev != &scif_dev[SCIF_MGMT_NODE] &&
+			!scifdev_self(dev);
+}
+
+/*
+ * scifdev_alive:
+ * @scifdev: The remote SCIF Device
+ *
+ * Returns true if the remote SCIF Device is running or sleeping for
+ * this endpoint.
+ */
+static inline int _scifdev_alive(struct scif_dev *scifdev)
+{
+	struct scif_peer_dev *spdev;
+
+	rcu_read_lock();
+	spdev = rcu_dereference(scifdev->spdev);
+	rcu_read_unlock();
+	return !!spdev;
+}
+
+#include "scif_epd.h"
+
+void __init scif_init_debugfs(void);
+void scif_exit_debugfs(void);
+int scif_setup_intr_wq(struct scif_dev *scifdev);
+void scif_destroy_intr_wq(struct scif_dev *scifdev);
+void scif_cleanup_scifdev(struct scif_dev *dev);
+void scif_handle_remove_node(int node);
+void scif_disconnect_node(u32 node_id, bool mgmt_initiated);
+void scif_free_qp(struct scif_dev *dev);
+void scif_misc_handler(struct work_struct *work);
+void scif_stop(struct scif_dev *scifdev);
+irqreturn_t scif_intr_handler(int irq, void *data);
+#endif /* SCIF_MAIN_H */
diff --git a/kernel/drivers/misc/mic/scif/scif_map.h b/kernel/drivers/misc/mic/scif/scif_map.h
new file mode 100644
index 000000000..3e86360ba
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_map.h
@@ -0,0 +1,136 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_MAP_H
+#define SCIF_MAP_H
+
+#include "../bus/scif_bus.h"
+
+static __always_inline void *
+scif_alloc_coherent(dma_addr_t *dma_handle,
+		    struct scif_dev *scifdev, size_t size,
+		    gfp_t gfp)
+{
+	void *va;
+
+	if (scifdev_self(scifdev)) {
+		va = kmalloc(size, gfp);
+		if (va)
+			*dma_handle = virt_to_phys(va);
+	} else {
+		va = dma_alloc_coherent(&scifdev->sdev->dev,
+					size, dma_handle, gfp);
+		if (va && scifdev_is_p2p(scifdev))
+			*dma_handle = *dma_handle + scifdev->base_addr;
+	}
+	return va;
+}
+
+static __always_inline void
+scif_free_coherent(void *va, dma_addr_t local,
+		   struct scif_dev *scifdev, size_t size)
+{
+	if (scifdev_self(scifdev)) {
+		kfree(va);
+	} else {
+		if (scifdev_is_p2p(scifdev) && local > scifdev->base_addr)
+			local = local - scifdev->base_addr;
+		dma_free_coherent(&scifdev->sdev->dev,
+				  size, va, local);
+	}
+}
+
+static __always_inline int
+scif_map_single(dma_addr_t *dma_handle,
+		void *local, struct scif_dev *scifdev, size_t size)
+{
+	int err = 0;
+
+	if (scifdev_self(scifdev)) {
+		*dma_handle = virt_to_phys((local));
+	} else {
+		*dma_handle = dma_map_single(&scifdev->sdev->dev,
+					     local, size, DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(&scifdev->sdev->dev, *dma_handle))
+			err = -ENOMEM;
+		else if (scifdev_is_p2p(scifdev))
+			*dma_handle = *dma_handle + scifdev->base_addr;
+	}
+	if (err)
+		*dma_handle = 0;
+	return err;
+}
+
+static __always_inline void
+scif_unmap_single(dma_addr_t local, struct scif_dev *scifdev,
+		  size_t size)
+{
+	if (!scifdev_self(scifdev)) {
+		if (scifdev_is_p2p(scifdev))
+			local = local - scifdev->base_addr;
+		dma_unmap_single(&scifdev->sdev->dev, local,
+				 size, DMA_BIDIRECTIONAL);
+	}
+}
+
+static __always_inline void *
+scif_ioremap(dma_addr_t phys, size_t size, struct scif_dev *scifdev)
+{
+	void *out_virt;
+	struct scif_hw_dev *sdev = scifdev->sdev;
+
+	if (scifdev_self(scifdev))
+		out_virt = phys_to_virt(phys);
+	else
+		out_virt = (void __force *)
+			   sdev->hw_ops->ioremap(sdev, phys, size);
+	return out_virt;
+}
+
+static __always_inline void
+scif_iounmap(void *virt, size_t len, struct scif_dev *scifdev)
+{
+	if (!scifdev_self(scifdev)) {
+		struct scif_hw_dev *sdev = scifdev->sdev;
+
+		sdev->hw_ops->iounmap(sdev, (void __force __iomem *)virt);
+	}
+}
+
+static __always_inline int
+scif_map_page(dma_addr_t *dma_handle, struct page *page,
+	      struct scif_dev *scifdev)
+{
+	int err = 0;
+
+	if (scifdev_self(scifdev)) {
+		*dma_handle = page_to_phys(page);
+	} else {
+		struct scif_hw_dev *sdev = scifdev->sdev;
+		*dma_handle = dma_map_page(&sdev->dev,
+					   page, 0x0, PAGE_SIZE,
+					   DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(&sdev->dev, *dma_handle))
+			err = -ENOMEM;
+		else if (scifdev_is_p2p(scifdev))
+			*dma_handle = *dma_handle + scifdev->base_addr;
+	}
+	if (err)
+		*dma_handle = 0;
+	return err;
+}
+#endif  /* SCIF_MAP_H */
diff --git a/kernel/drivers/misc/mic/scif/scif_mmap.c b/kernel/drivers/misc/mic/scif/scif_mmap.c
new file mode 100644
index 000000000..49cb8f7b4
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_mmap.c
@@ -0,0 +1,699 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+
+/*
+ * struct scif_vma_info - Information about a remote memory mapping
+ *			  created via scif_mmap(..)
+ * @vma: VM area struct
+ * @list: link to list of active vmas
+ */
+struct scif_vma_info {
+	struct vm_area_struct *vma;
+	struct list_head list;
+};
+
+void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	struct scif_window *recv_window =
+		(struct scif_window *)msg->payload[0];
+	struct scif_endpt *ep;
+
+	ep = (struct scif_endpt *)recv_window->ep;
+	req.out_window = &window;
+	req.offset = recv_window->offset;
+	req.prot = recv_window->prot;
+	req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
+	req.type = SCIF_WINDOW_FULL;
+	req.head = &ep->rma_info.reg_list;
+	msg->payload[0] = ep->remote_ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	if (scif_query_window(&req)) {
+		dev_err(&scifdev->sdev->dev,
+			"%s %d -ENXIO\n", __func__, __LINE__);
+		msg->uop = SCIF_UNREGISTER_ACK;
+		goto error;
+	}
+
+	scif_put_window(window, window->nr_pages);
+
+	if (!window->ref_count) {
+		atomic_inc(&ep->rma_info.tw_refcount);
+		ep->rma_info.async_list_del = 1;
+		list_del_init(&window->list);
+		scif_free_window_offset(ep, window, window->offset);
+	}
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (window && !window->ref_count)
+		scif_queue_for_cleanup(window, &scif_info.rma);
+}
+
+/*
+ * Remove valid remote memory mappings created via scif_mmap(..) from the
+ * process address space since the remote node is lost
+ */
+static void __scif_zap_mmaps(struct scif_endpt *ep)
+{
+	struct list_head *item;
+	struct scif_vma_info *info;
+	struct vm_area_struct *vma;
+	unsigned long size;
+
+	spin_lock(&ep->lock);
+	list_for_each(item, &ep->rma_info.vma_list) {
+		info = list_entry(item, struct scif_vma_info, list);
+		vma = info->vma;
+		size = vma->vm_end - vma->vm_start;
+		zap_vma_ptes(vma, vma->vm_start, size);
+		dev_dbg(scif_info.mdev.this_device,
+			"%s ep %p zap vma %p size 0x%lx\n",
+			__func__, ep, info->vma, size);
+	}
+	spin_unlock(&ep->lock);
+}
+
+/*
+ * Traverse the list of endpoints for a particular remote node and
+ * zap valid remote memory mappings since the remote node is lost
+ */
+static void _scif_zap_mmaps(int node, struct list_head *head)
+{
+	struct scif_endpt *ep;
+	struct list_head *item;
+
+	mutex_lock(&scif_info.connlock);
+	list_for_each(item, head) {
+		ep = list_entry(item, struct scif_endpt, list);
+		if (ep->remote_dev->node == node)
+			__scif_zap_mmaps(ep);
+	}
+	mutex_unlock(&scif_info.connlock);
+}
+
+/*
+ * Wrapper for removing remote memory mappings for a particular node. This API
+ * is called by peer nodes as part of handling a lost node.
+ */
+void scif_zap_mmaps(int node)
+{
+	_scif_zap_mmaps(node, &scif_info.connected);
+	_scif_zap_mmaps(node, &scif_info.disconnected);
+}
+
+/*
+ * This API is only called while handling a lost node:
+ * a) Remote node is dead.
+ * b) Remote memory mappings have been zapped
+ * So we can traverse the remote_reg_list without any locks. Since
+ * the window has not yet been unregistered we can drop the ref count
+ * and queue it to the cleanup thread.
+ */
+static void __scif_cleanup_rma_for_zombies(struct scif_endpt *ep)
+{
+	struct list_head *pos, *tmp;
+	struct scif_window *window;
+
+	list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) {
+		window = list_entry(pos, struct scif_window, list);
+		if (window->ref_count)
+			scif_put_window(window, window->nr_pages);
+		else
+			dev_err(scif_info.mdev.this_device,
+				"%s %d unexpected\n",
+				__func__, __LINE__);
+		if (!window->ref_count) {
+			atomic_inc(&ep->rma_info.tw_refcount);
+			list_del_init(&window->list);
+			scif_queue_for_cleanup(window, &scif_info.rma);
+		}
+	}
+}
+
+/* Cleanup remote registration lists for zombie endpoints */
+void scif_cleanup_rma_for_zombies(int node)
+{
+	struct scif_endpt *ep;
+	struct list_head *item;
+
+	mutex_lock(&scif_info.eplock);
+	list_for_each(item, &scif_info.zombie) {
+		ep = list_entry(item, struct scif_endpt, list);
+		if (ep->remote_dev && ep->remote_dev->node == node)
+			__scif_cleanup_rma_for_zombies(ep);
+	}
+	mutex_unlock(&scif_info.eplock);
+	flush_work(&scif_info.misc_work);
+}
+
+/* Insert the VMA into the per endpoint VMA list */
+static int scif_insert_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
+{
+	struct scif_vma_info *info;
+	int err = 0;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		err = -ENOMEM;
+		goto done;
+	}
+	info->vma = vma;
+	spin_lock(&ep->lock);
+	list_add_tail(&info->list, &ep->rma_info.vma_list);
+	spin_unlock(&ep->lock);
+done:
+	return err;
+}
+
+/* Delete the VMA from the per endpoint VMA list */
+static void scif_delete_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
+{
+	struct list_head *item;
+	struct scif_vma_info *info;
+
+	spin_lock(&ep->lock);
+	list_for_each(item, &ep->rma_info.vma_list) {
+		info = list_entry(item, struct scif_vma_info, list);
+		if (info->vma == vma) {
+			list_del(&info->list);
+			kfree(info);
+			break;
+		}
+	}
+	spin_unlock(&ep->lock);
+}
+
+static phys_addr_t scif_get_phys(phys_addr_t phys, struct scif_endpt *ep)
+{
+	struct scif_dev *scifdev = (struct scif_dev *)ep->remote_dev;
+	struct scif_hw_dev *sdev = scifdev->sdev;
+	phys_addr_t out_phys, apt_base = 0;
+
+	/*
+	 * If the DMA address is card relative then we need to add the
+	 * aperture base for mmap to work correctly
+	 */
+	if (!scifdev_self(scifdev) && sdev->aper && sdev->card_rel_da)
+		apt_base = sdev->aper->pa;
+	out_phys = apt_base + phys;
+	return out_phys;
+}
+
+int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
+		   struct scif_range **pages)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	int nr_pages, err, i;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI get_pinned_pages: ep %p offset 0x%lx len 0x%lx\n",
+		ep, offset, len);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	if (!len || (offset < 0) ||
+	    (offset + len < offset) ||
+	    (ALIGN(offset, PAGE_SIZE) != offset) ||
+	    (ALIGN(len, PAGE_SIZE) != len))
+		return -EINVAL;
+
+	nr_pages = len >> PAGE_SHIFT;
+
+	req.out_window = &window;
+	req.offset = offset;
+	req.prot = 0;
+	req.nr_bytes = len;
+	req.type = SCIF_WINDOW_SINGLE;
+	req.head = &ep->rma_info.remote_reg_list;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	err = scif_query_window(&req);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error;
+	}
+
+	/* Allocate scif_range */
+	*pages = kzalloc(sizeof(**pages), GFP_KERNEL);
+	if (!*pages) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	/* Allocate phys addr array */
+	(*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t));
+	if (!((*pages)->phys_addr)) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) {
+		/* Allocate virtual address array */
+		((*pages)->va = scif_zalloc(nr_pages * sizeof(void *)));
+		if (!(*pages)->va) {
+			err = -ENOMEM;
+			goto error;
+		}
+	}
+	/* Populate the values */
+	(*pages)->cookie = window;
+	(*pages)->nr_pages = nr_pages;
+	(*pages)->prot_flags = window->prot;
+
+	for (i = 0; i < nr_pages; i++) {
+		(*pages)->phys_addr[i] =
+			__scif_off_to_dma_addr(window, offset +
+					       (i * PAGE_SIZE));
+		(*pages)->phys_addr[i] = scif_get_phys((*pages)->phys_addr[i],
+							ep);
+		if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev))
+			(*pages)->va[i] =
+				ep->remote_dev->sdev->aper->va +
+				(*pages)->phys_addr[i] -
+				ep->remote_dev->sdev->aper->pa;
+	}
+
+	scif_get_window(window, nr_pages);
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (err) {
+		if (*pages) {
+			scif_free((*pages)->phys_addr,
+				  nr_pages * sizeof(dma_addr_t));
+			scif_free((*pages)->va,
+				  nr_pages * sizeof(void *));
+			kfree(*pages);
+			*pages = NULL;
+		}
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	}
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_get_pages);
+
+int scif_put_pages(struct scif_range *pages)
+{
+	struct scif_endpt *ep;
+	struct scif_window *window;
+	struct scifmsg msg;
+
+	if (!pages || !pages->cookie)
+		return -EINVAL;
+
+	window = pages->cookie;
+
+	if (!window || window->magic != SCIFEP_MAGIC)
+		return -EINVAL;
+
+	ep = (struct scif_endpt *)window->ep;
+	/*
+	 * If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the
+	 * callee should be allowed to release references to the pages,
+	 * else the endpoint was not connected in the first place,
+	 * hence the ENOTCONN.
+	 */
+	if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED)
+		return -ENOTCONN;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+
+	scif_put_window(window, pages->nr_pages);
+
+	/* Initiate window destruction if ref count is zero */
+	if (!window->ref_count) {
+		list_del(&window->list);
+		mutex_unlock(&ep->rma_info.rma_lock);
+		scif_drain_dma_intr(ep->remote_dev->sdev,
+				    ep->rma_info.dma_chan);
+		/* Inform the peer about this window being destroyed. */
+		msg.uop = SCIF_MUNMAP;
+		msg.src = ep->port;
+		msg.payload[0] = window->peer_window;
+		/* No error handling for notification messages */
+		scif_nodeqp_send(ep->remote_dev, &msg);
+		/* Destroy this window from the peer's registered AS */
+		scif_destroy_remote_window(window);
+	} else {
+		mutex_unlock(&ep->rma_info.rma_lock);
+	}
+
+	scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t));
+	scif_free(pages->va, pages->nr_pages * sizeof(void *));
+	kfree(pages);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(scif_put_pages);
+
+/*
+ * scif_rma_list_mmap:
+ *
+ * Traverse the remote registration list starting from start_window:
+ * 1) Create VtoP mappings via remap_pfn_range(..)
+ * 2) Once step 1) and 2) complete successfully then traverse the range of
+ *    windows again and bump the reference count.
+ * RMA lock must be held.
+ */
+static int scif_rma_list_mmap(struct scif_window *start_window, s64 offset,
+			      int nr_pages, struct vm_area_struct *vma)
+{
+	s64 end_offset, loop_offset = offset;
+	struct scif_window *window = start_window;
+	int loop_nr_pages, nr_pages_left = nr_pages;
+	struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
+	struct list_head *head = &ep->rma_info.remote_reg_list;
+	int i, err = 0;
+	dma_addr_t phys_addr;
+	struct scif_window_iter src_win_iter;
+	size_t contig_bytes = 0;
+
+	might_sleep();
+	list_for_each_entry_from(window, head, list) {
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min_t(int,
+				      (end_offset - loop_offset) >> PAGE_SHIFT,
+				      nr_pages_left);
+		scif_init_window_iter(window, &src_win_iter);
+		for (i = 0; i < loop_nr_pages; i++) {
+			phys_addr = scif_off_to_dma_addr(window, loop_offset,
+							 &contig_bytes,
+							 &src_win_iter);
+			phys_addr = scif_get_phys(phys_addr, ep);
+			err = remap_pfn_range(vma,
+					      vma->vm_start +
+					      loop_offset - offset,
+					      phys_addr >> PAGE_SHIFT,
+					      PAGE_SIZE,
+					      vma->vm_page_prot);
+			if (err)
+				goto error;
+			loop_offset += PAGE_SIZE;
+		}
+		nr_pages_left -= loop_nr_pages;
+		if (!nr_pages_left)
+			break;
+	}
+	/*
+	 * No more failures expected. Bump up the ref count for all
+	 * the windows. Another traversal from start_window required
+	 * for handling errors encountered across windows during
+	 * remap_pfn_range(..).
+	 */
+	loop_offset = offset;
+	nr_pages_left = nr_pages;
+	window = start_window;
+	head = &ep->rma_info.remote_reg_list;
+	list_for_each_entry_from(window, head, list) {
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min_t(int,
+				      (end_offset - loop_offset) >> PAGE_SHIFT,
+				      nr_pages_left);
+		scif_get_window(window, loop_nr_pages);
+		nr_pages_left -= loop_nr_pages;
+		loop_offset += (loop_nr_pages << PAGE_SHIFT);
+		if (!nr_pages_left)
+			break;
+	}
+error:
+	if (err)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+
+/*
+ * scif_rma_list_munmap:
+ *
+ * Traverse the remote registration list starting from window:
+ * 1) Decrement ref count.
+ * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer.
+ * RMA lock must be held.
+ */
+static void scif_rma_list_munmap(struct scif_window *start_window,
+				 s64 offset, int nr_pages)
+{
+	struct scifmsg msg;
+	s64 loop_offset = offset, end_offset;
+	int loop_nr_pages, nr_pages_left = nr_pages;
+	struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
+	struct list_head *head = &ep->rma_info.remote_reg_list;
+	struct scif_window *window = start_window, *_window;
+
+	msg.uop = SCIF_MUNMAP;
+	msg.src = ep->port;
+	loop_offset = offset;
+	nr_pages_left = nr_pages;
+	list_for_each_entry_safe_from(window, _window, head, list) {
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min_t(int,
+				      (end_offset - loop_offset) >> PAGE_SHIFT,
+				      nr_pages_left);
+		scif_put_window(window, loop_nr_pages);
+		if (!window->ref_count) {
+			struct scif_dev *rdev = ep->remote_dev;
+
+			scif_drain_dma_intr(rdev->sdev,
+					    ep->rma_info.dma_chan);
+			/* Inform the peer about this munmap */
+			msg.payload[0] = window->peer_window;
+			/* No error handling for Notification messages. */
+			scif_nodeqp_send(ep->remote_dev, &msg);
+			list_del(&window->list);
+			/* Destroy this window from the peer's registered AS */
+			scif_destroy_remote_window(window);
+		}
+		nr_pages_left -= loop_nr_pages;
+		loop_offset += (loop_nr_pages << PAGE_SHIFT);
+		if (!nr_pages_left)
+			break;
+	}
+}
+
+/*
+ * The private data field of each VMA used to mmap a remote window
+ * points to an instance of struct vma_pvt
+ */
+struct vma_pvt {
+	struct scif_endpt *ep;	/* End point for remote window */
+	s64 offset;		/* offset within remote window */
+	bool valid_offset;	/* offset is valid only if the original
+				 * mmap request was for a single page
+				 * else the offset within the vma is
+				 * the correct offset
+				 */
+	struct kref ref;
+};
+
+static void vma_pvt_release(struct kref *ref)
+{
+	struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref);
+
+	kfree(vmapvt);
+}
+
+/**
+ * scif_vma_open - VMA open driver callback
+ * @vma: VMM memory area.
+ * The open method is called by the kernel to allow the subsystem implementing
+ * the VMA to initialize the area. This method is invoked any time a new
+ * reference to the VMA is made (when a process forks, for example).
+ * The one exception happens when the VMA is first created by mmap;
+ * in this case, the driver's mmap method is called instead.
+ * This function is also invoked when an existing VMA is split by the kernel
+ * due to a call to munmap on a subset of the VMA resulting in two VMAs.
+ * The kernel invokes this function only on one of the two VMAs.
+ */
+static void scif_vma_open(struct vm_area_struct *vma)
+{
+	struct vma_pvt *vmapvt = vma->vm_private_data;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n",
+		vma->vm_start, vma->vm_end);
+	scif_insert_vma(vmapvt->ep, vma);
+	kref_get(&vmapvt->ref);
+}
+
+/**
+ * scif_munmap - VMA close driver callback.
+ * @vma: VMM memory area.
+ * When an area is destroyed, the kernel calls its close operation.
+ * Note that there's no usage count associated with VMA's; the area
+ * is opened and closed exactly once by each process that uses it.
+ */
+static void scif_munmap(struct vm_area_struct *vma)
+{
+	struct scif_endpt *ep;
+	struct vma_pvt *vmapvt = vma->vm_private_data;
+	int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	s64 offset;
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	int err;
+
+	might_sleep();
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n",
+		vma->vm_start, vma->vm_end);
+	ep = vmapvt->ep;
+	offset = vmapvt->valid_offset ? vmapvt->offset :
+		(vma->vm_pgoff) << PAGE_SHIFT;
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI munmap: ep %p nr_pages 0x%x offset 0x%llx\n",
+		ep, nr_pages, offset);
+	req.out_window = &window;
+	req.offset = offset;
+	req.nr_bytes = vma->vm_end - vma->vm_start;
+	req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
+	req.type = SCIF_WINDOW_PARTIAL;
+	req.head = &ep->rma_info.remote_reg_list;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+
+	err = scif_query_window(&req);
+	if (err)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d err %d\n", __func__, __LINE__, err);
+	else
+		scif_rma_list_munmap(window, offset, nr_pages);
+
+	mutex_unlock(&ep->rma_info.rma_lock);
+	/*
+	 * The kernel probably zeroes these out but we still want
+	 * to clean up our own mess just in case.
+	 */
+	vma->vm_ops = NULL;
+	vma->vm_private_data = NULL;
+	kref_put(&vmapvt->ref, vma_pvt_release);
+	scif_delete_vma(ep, vma);
+}
+
+static const struct vm_operations_struct scif_vm_ops = {
+	.open = scif_vma_open,
+	.close = scif_munmap,
+};
+
+/**
+ * scif_mmap - Map pages in virtual address space to a remote window.
+ * @vma: VMM memory area.
+ * @epd: endpoint descriptor
+ *
+ * Return: Upon successful completion, scif_mmap() returns zero
+ * else an apt error is returned as documented in scif.h
+ */
+int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd)
+{
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	s64 start_offset = vma->vm_pgoff << PAGE_SHIFT;
+	int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int err;
+	struct vma_pvt *vmapvt;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI mmap: ep %p start_offset 0x%llx nr_pages 0x%x\n",
+		ep, start_offset, nr_pages);
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	might_sleep();
+
+	err = scif_insert_vma(ep, vma);
+	if (err)
+		return err;
+
+	vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL);
+	if (!vmapvt) {
+		scif_delete_vma(ep, vma);
+		return -ENOMEM;
+	}
+
+	vmapvt->ep = ep;
+	kref_init(&vmapvt->ref);
+
+	req.out_window = &window;
+	req.offset = start_offset;
+	req.nr_bytes = vma->vm_end - vma->vm_start;
+	req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
+	req.type = SCIF_WINDOW_PARTIAL;
+	req.head = &ep->rma_info.remote_reg_list;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	err = scif_query_window(&req);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unlock;
+	}
+
+	/* Default prot for loopback */
+	if (!scifdev_self(ep->remote_dev))
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+
+	/*
+	 * VM_DONTCOPY - Do not copy this vma on fork
+	 * VM_DONTEXPAND - Cannot expand with mremap()
+	 * VM_RESERVED - Count as reserved_vm like IO
+	 * VM_PFNMAP - Page-ranges managed without "struct page"
+	 * VM_IO - Memory mapped I/O or similar
+	 *
+	 * We do not want to copy this VMA automatically on a fork(),
+	 * expand this VMA due to mremap() or swap out these pages since
+	 * the VMA is actually backed by physical pages in the remote
+	 * node's physical memory and not via a struct page.
+	 */
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
+
+	if (!scifdev_self(ep->remote_dev))
+		vma->vm_flags |= VM_IO | VM_PFNMAP;
+
+	/* Map this range of windows */
+	err = scif_rma_list_mmap(window, start_offset, nr_pages, vma);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unlock;
+	}
+	/* Set up the driver call back */
+	vma->vm_ops = &scif_vm_ops;
+	vma->vm_private_data = vmapvt;
+error_unlock:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (err) {
+		kfree(vmapvt);
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		scif_delete_vma(ep, vma);
+	}
+	return err;
+}
diff --git a/kernel/drivers/misc/mic/scif/scif_nm.c b/kernel/drivers/misc/mic/scif/scif_nm.c
new file mode 100644
index 000000000..79f26a02a
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_nm.c
@@ -0,0 +1,237 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_peer_bus.h"
+
+#include "scif_main.h"
+#include "scif_map.h"
+
+/**
+ * scif_invalidate_ep() - Set state for all connected endpoints
+ * to disconnected and wake up all send/recv waitqueues
+ */
+static void scif_invalidate_ep(int node)
+{
+	struct scif_endpt *ep;
+	struct list_head *pos, *tmpq;
+
+	flush_work(&scif_info.conn_work);
+	mutex_lock(&scif_info.connlock);
+	list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
+		ep = list_entry(pos, struct scif_endpt, list);
+		if (ep->remote_dev->node == node) {
+			scif_unmap_all_windows(ep);
+			spin_lock(&ep->lock);
+			scif_cleanup_ep_qp(ep);
+			spin_unlock(&ep->lock);
+		}
+	}
+	list_for_each_safe(pos, tmpq, &scif_info.connected) {
+		ep = list_entry(pos, struct scif_endpt, list);
+		if (ep->remote_dev->node == node) {
+			list_del(pos);
+			spin_lock(&ep->lock);
+			ep->state = SCIFEP_DISCONNECTED;
+			list_add_tail(&ep->list, &scif_info.disconnected);
+			scif_cleanup_ep_qp(ep);
+			wake_up_interruptible(&ep->sendwq);
+			wake_up_interruptible(&ep->recvwq);
+			spin_unlock(&ep->lock);
+			scif_unmap_all_windows(ep);
+		}
+	}
+	mutex_unlock(&scif_info.connlock);
+}
+
+void scif_free_qp(struct scif_dev *scifdev)
+{
+	struct scif_qp *qp = scifdev->qpairs;
+
+	if (!qp)
+		return;
+	scif_unmap_single(qp->local_buf, scifdev, qp->inbound_q.size);
+	kfree(qp->inbound_q.rb_base);
+	scif_unmap_single(qp->local_qp, scifdev, sizeof(struct scif_qp));
+	kfree(scifdev->qpairs);
+	scifdev->qpairs = NULL;
+}
+
+static void scif_cleanup_qp(struct scif_dev *dev)
+{
+	struct scif_qp *qp = &dev->qpairs[0];
+
+	if (!qp)
+		return;
+	scif_iounmap((void *)qp->remote_qp, sizeof(struct scif_qp), dev);
+	scif_iounmap((void *)qp->outbound_q.rb_base,
+		     sizeof(struct scif_qp), dev);
+	qp->remote_qp = NULL;
+	qp->local_write = 0;
+	qp->inbound_q.current_write_offset = 0;
+	qp->inbound_q.current_read_offset = 0;
+	if (scifdev_is_p2p(dev))
+		scif_free_qp(dev);
+}
+
+void scif_send_acks(struct scif_dev *dev)
+{
+	struct scifmsg msg;
+
+	if (dev->node_remove_ack_pending) {
+		msg.uop = SCIF_NODE_REMOVE_ACK;
+		msg.src.node = scif_info.nodeid;
+		msg.dst.node = SCIF_MGMT_NODE;
+		msg.payload[0] = dev->node;
+		scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], &msg);
+		dev->node_remove_ack_pending = false;
+	}
+	if (dev->exit_ack_pending) {
+		msg.uop = SCIF_EXIT_ACK;
+		msg.src.node = scif_info.nodeid;
+		msg.dst.node = dev->node;
+		scif_nodeqp_send(dev, &msg);
+		dev->exit_ack_pending = false;
+	}
+}
+
+/*
+ * scif_cleanup_scifdev
+ *
+ * @dev: Remote SCIF device.
+ * Uninitialize SCIF data structures for remote SCIF device.
+ */
+void scif_cleanup_scifdev(struct scif_dev *dev)
+{
+	struct scif_hw_dev *sdev = dev->sdev;
+
+	if (!dev->sdev)
+		return;
+	if (scifdev_is_p2p(dev)) {
+		if (dev->cookie) {
+			sdev->hw_ops->free_irq(sdev, dev->cookie, dev);
+			dev->cookie = NULL;
+		}
+		scif_destroy_intr_wq(dev);
+	}
+	flush_work(&scif_info.misc_work);
+	scif_destroy_p2p(dev);
+	scif_invalidate_ep(dev->node);
+	scif_zap_mmaps(dev->node);
+	scif_cleanup_rma_for_zombies(dev->node);
+	flush_work(&scif_info.misc_work);
+	scif_send_acks(dev);
+	if (!dev->node && scif_info.card_initiated_exit) {
+		/*
+		 * Send an SCIF_EXIT message which is the last message from MIC
+		 * to the Host and wait for a SCIF_EXIT_ACK
+		 */
+		scif_send_exit(dev);
+		scif_info.card_initiated_exit = false;
+	}
+	scif_cleanup_qp(dev);
+}
+
+/*
+ * scif_remove_node:
+ *
+ * @node: Node to remove
+ */
+void scif_handle_remove_node(int node)
+{
+	struct scif_dev *scifdev = &scif_dev[node];
+
+	if (scif_peer_unregister_device(scifdev))
+		scif_send_acks(scifdev);
+}
+
+static int scif_send_rmnode_msg(int node, int remove_node)
+{
+	struct scifmsg notif_msg;
+	struct scif_dev *dev = &scif_dev[node];
+
+	notif_msg.uop = SCIF_NODE_REMOVE;
+	notif_msg.src.node = scif_info.nodeid;
+	notif_msg.dst.node = node;
+	notif_msg.payload[0] = remove_node;
+	return scif_nodeqp_send(dev, &notif_msg);
+}
+
+/**
+ * scif_node_disconnect:
+ *
+ * @node_id[in]: source node id.
+ * @mgmt_initiated: Disconnection initiated from the mgmt node
+ *
+ * Disconnect a node from the scif network.
+ */
+void scif_disconnect_node(u32 node_id, bool mgmt_initiated)
+{
+	int ret;
+	int msg_cnt = 0;
+	u32 i = 0;
+	struct scif_dev *scifdev = &scif_dev[node_id];
+
+	if (!node_id)
+		return;
+
+	atomic_set(&scifdev->disconn_rescnt, 0);
+
+	/* Destroy p2p network */
+	for (i = 1; i <= scif_info.maxid; i++) {
+		if (i == node_id)
+			continue;
+		ret = scif_send_rmnode_msg(i, node_id);
+		if (!ret)
+			msg_cnt++;
+	}
+	/* Wait for the remote nodes to respond with SCIF_NODE_REMOVE_ACK */
+	ret = wait_event_timeout(scifdev->disconn_wq,
+				 (atomic_read(&scifdev->disconn_rescnt)
+				 == msg_cnt), SCIF_NODE_ALIVE_TIMEOUT);
+	/* Tell the card to clean up */
+	if (mgmt_initiated && _scifdev_alive(scifdev))
+		/*
+		 * Send an SCIF_EXIT message which is the last message from Host
+		 * to the MIC and wait for a SCIF_EXIT_ACK
+		 */
+		scif_send_exit(scifdev);
+	atomic_set(&scifdev->disconn_rescnt, 0);
+	/* Tell the mgmt node to clean up */
+	ret = scif_send_rmnode_msg(SCIF_MGMT_NODE, node_id);
+	if (!ret)
+		/* Wait for mgmt node to respond with SCIF_NODE_REMOVE_ACK */
+		wait_event_timeout(scifdev->disconn_wq,
+				   (atomic_read(&scifdev->disconn_rescnt) == 1),
+				   SCIF_NODE_ALIVE_TIMEOUT);
+}
+
+void scif_get_node_info(void)
+{
+	struct scifmsg msg;
+	DECLARE_COMPLETION_ONSTACK(node_info);
+
+	msg.uop = SCIF_GET_NODE_INFO;
+	msg.src.node = scif_info.nodeid;
+	msg.dst.node = SCIF_MGMT_NODE;
+	msg.payload[3] = (u64)&node_info;
+
+	if ((scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], &msg)))
+		return;
+
+	/* Wait for a response with SCIF_GET_NODE_INFO */
+	wait_for_completion(&node_info);
+}
diff --git a/kernel/drivers/misc/mic/scif/scif_nodeqp.c b/kernel/drivers/misc/mic/scif/scif_nodeqp.c
new file mode 100644
index 000000000..c66ca1a58
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_nodeqp.c
@@ -0,0 +1,1354 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "../bus/scif_bus.h"
+#include "scif_peer_bus.h"
+#include "scif_main.h"
+#include "scif_nodeqp.h"
+#include "scif_map.h"
+
+/*
+ ************************************************************************
+ * SCIF node Queue Pair (QP) setup flow:
+ *
+ * 1) SCIF driver gets probed with a scif_hw_dev via the scif_hw_bus
+ * 2) scif_setup_qp(..) allocates the local qp and calls
+ *	scif_setup_qp_connect(..) which allocates and maps the local
+ *	buffer for the inbound QP
+ * 3) The local node updates the device page with the DMA address of the QP
+ * 4) A delayed work is scheduled (qp_dwork) which periodically reads if
+ *	the peer node has updated its QP DMA address
+ * 5) Once a valid non zero address is found in the QP DMA address field
+ *	in the device page, the local node maps the remote node's QP,
+ *	updates its outbound QP and sends a SCIF_INIT message to the peer
+ * 6) The SCIF_INIT message is received by the peer node QP interrupt bottom
+ *	half handler by calling scif_init(..)
+ * 7) scif_init(..) registers a new SCIF peer node by calling
+ *	scif_peer_register_device(..) which signifies the addition of a new
+ *	SCIF node
+ * 8) On the mgmt node, P2P network setup/teardown is initiated if all the
+ *	remote nodes are online via scif_p2p_setup(..)
+ * 9) For P2P setup, the host maps the remote nodes' aperture and memory
+ *	bars and sends a SCIF_NODE_ADD message to both nodes
+ * 10) As part of scif_nodeadd, both nodes set up their local inbound
+ *	QPs and send a SCIF_NODE_ADD_ACK to the mgmt node
+ * 11) As part of scif_node_add_ack(..) the mgmt node forwards the
+ *	SCIF_NODE_ADD_ACK to the remote nodes
+ * 12) As part of scif_node_add_ack(..) the remote nodes update their
+ *	outbound QPs, make sure they can access memory on the remote node
+ *	and then add a new SCIF peer node by calling
+ *	scif_peer_register_device(..) which signifies the addition of a new
+ *	SCIF node.
+ * 13) The SCIF network is now established across all nodes.
+ *
+ ************************************************************************
+ * SCIF node QP teardown flow (initiated by non mgmt node):
+ *
+ * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus
+ * 2) The device page QP DMA address field is updated with 0x0
+ * 3) A non mgmt node now cleans up all local data structures and sends a
+ *	SCIF_EXIT message to the peer and waits for a SCIF_EXIT_ACK
+ * 4) As part of scif_exit(..) handling scif_disconnect_node(..) is called
+ * 5) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the
+ *	peers and waits for a SCIF_NODE_REMOVE_ACK
+ * 6) As part of scif_node_remove(..) a remote node unregisters the peer
+ *	node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK
+ * 7) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs
+ *	it sends itself a node remove message whose handling cleans up local
+ *	data structures and unregisters the peer node from the SCIF network
+ * 8) The mgmt node sends a SCIF_EXIT_ACK
+ * 9) Upon receipt of the SCIF_EXIT_ACK the node initiating the teardown
+ *	completes the SCIF remove routine
+ * 10) The SCIF network is now torn down for the node initiating the
+ *	teardown sequence
+ *
+ ************************************************************************
+ * SCIF node QP teardown flow (initiated by mgmt node):
+ *
+ * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus
+ * 2) The device page QP DMA address field is updated with 0x0
+ * 3) The mgmt node calls scif_disconnect_node(..)
+ * 4) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the peers
+ *	and waits for a SCIF_NODE_REMOVE_ACK
+ * 5) As part of scif_node_remove(..) a remote node unregisters the peer
+ *	node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK
+ * 6) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs
+ *	it unregisters the peer node from the SCIF network
+ * 7) The mgmt node sends a SCIF_EXIT message and waits for a SCIF_EXIT_ACK.
+ * 8) A non mgmt node upon receipt of a SCIF_EXIT message calls scif_stop(..)
+ *	which would clean up local data structures for all SCIF nodes and
+ *	then send a SCIF_EXIT_ACK back to the mgmt node
+ * 9) Upon receipt of the SCIF_EXIT_ACK the the mgmt node sends itself a node
+ *	remove message whose handling cleans up local data structures and
+ *	destroys any P2P mappings.
+ * 10) The SCIF hardware device for which a remove callback was received is now
+ *	disconnected from the SCIF network.
+ */
+/*
+ * Initializes "local" data structures for the QP. Allocates the QP
+ * ring buffer (rb) and initializes the "in bound" queue.
+ */
+int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset,
+			  int local_size, struct scif_dev *scifdev)
+{
+	void *local_q = qp->inbound_q.rb_base;
+	int err = 0;
+	u32 tmp_rd = 0;
+
+	spin_lock_init(&qp->send_lock);
+	spin_lock_init(&qp->recv_lock);
+
+	/* Allocate rb only if not already allocated */
+	if (!local_q) {
+		local_q = kzalloc(local_size, GFP_KERNEL);
+		if (!local_q) {
+			err = -ENOMEM;
+			return err;
+		}
+	}
+
+	err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
+	if (err)
+		goto kfree;
+	/*
+	 * To setup the inbound_q, the buffer lives locally, the read pointer
+	 * is remote and the write pointer is local.
+	 */
+	scif_rb_init(&qp->inbound_q,
+		     &tmp_rd,
+		     &qp->local_write,
+		     local_q, get_count_order(local_size));
+	/*
+	 * The read pointer is NULL initially and it is unsafe to use the ring
+	 * buffer til this changes!
+	 */
+	qp->inbound_q.read_ptr = NULL;
+	err = scif_map_single(qp_offset, qp,
+			      scifdev, sizeof(struct scif_qp));
+	if (err)
+		goto unmap;
+	qp->local_qp = *qp_offset;
+	return err;
+unmap:
+	scif_unmap_single(qp->local_buf, scifdev, local_size);
+	qp->local_buf = 0;
+kfree:
+	kfree(local_q);
+	return err;
+}
+
+/* When the other side has already done it's allocation, this is called */
+int scif_setup_qp_accept(struct scif_qp *qp, dma_addr_t *qp_offset,
+			 dma_addr_t phys, int local_size,
+			 struct scif_dev *scifdev)
+{
+	void *local_q;
+	void *remote_q;
+	struct scif_qp *remote_qp;
+	int remote_size;
+	int err = 0;
+
+	spin_lock_init(&qp->send_lock);
+	spin_lock_init(&qp->recv_lock);
+	/* Start by figuring out where we need to point */
+	remote_qp = scif_ioremap(phys, sizeof(struct scif_qp), scifdev);
+	if (!remote_qp)
+		return -EIO;
+	qp->remote_qp = remote_qp;
+	if (qp->remote_qp->magic != SCIFEP_MAGIC) {
+		err = -EIO;
+		goto iounmap;
+	}
+	qp->remote_buf = remote_qp->local_buf;
+	remote_size = qp->remote_qp->inbound_q.size;
+	remote_q = scif_ioremap(qp->remote_buf, remote_size, scifdev);
+	if (!remote_q) {
+		err = -EIO;
+		goto iounmap;
+	}
+	qp->remote_qp->local_write = 0;
+	/*
+	 * To setup the outbound_q, the buffer lives in remote memory,
+	 * the read pointer is local, the write pointer is remote
+	 */
+	scif_rb_init(&qp->outbound_q,
+		     &qp->local_read,
+		     &qp->remote_qp->local_write,
+		     remote_q,
+		     get_count_order(remote_size));
+	local_q = kzalloc(local_size, GFP_KERNEL);
+	if (!local_q) {
+		err = -ENOMEM;
+		goto iounmap_1;
+	}
+	err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
+	if (err)
+		goto kfree;
+	qp->remote_qp->local_read = 0;
+	/*
+	 * To setup the inbound_q, the buffer lives locally, the read pointer
+	 * is remote and the write pointer is local
+	 */
+	scif_rb_init(&qp->inbound_q,
+		     &qp->remote_qp->local_read,
+		     &qp->local_write,
+		     local_q, get_count_order(local_size));
+	err = scif_map_single(qp_offset, qp, scifdev,
+			      sizeof(struct scif_qp));
+	if (err)
+		goto unmap;
+	qp->local_qp = *qp_offset;
+	return err;
+unmap:
+	scif_unmap_single(qp->local_buf, scifdev, local_size);
+	qp->local_buf = 0;
+kfree:
+	kfree(local_q);
+iounmap_1:
+	scif_iounmap(remote_q, remote_size, scifdev);
+	qp->outbound_q.rb_base = NULL;
+iounmap:
+	scif_iounmap(qp->remote_qp, sizeof(struct scif_qp), scifdev);
+	qp->remote_qp = NULL;
+	return err;
+}
+
+int scif_setup_qp_connect_response(struct scif_dev *scifdev,
+				   struct scif_qp *qp, u64 payload)
+{
+	int err = 0;
+	void *r_buf;
+	int remote_size;
+	phys_addr_t tmp_phys;
+
+	qp->remote_qp = scif_ioremap(payload, sizeof(struct scif_qp), scifdev);
+
+	if (!qp->remote_qp) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	if (qp->remote_qp->magic != SCIFEP_MAGIC) {
+		dev_err(&scifdev->sdev->dev,
+			"SCIFEP_MAGIC mismatch between self %d remote %d\n",
+			scif_dev[scif_info.nodeid].node, scifdev->node);
+		err = -ENODEV;
+		goto error;
+	}
+
+	tmp_phys = qp->remote_qp->local_buf;
+	remote_size = qp->remote_qp->inbound_q.size;
+	r_buf = scif_ioremap(tmp_phys, remote_size, scifdev);
+
+	if (!r_buf)
+		return -EIO;
+
+	qp->local_read = 0;
+	scif_rb_init(&qp->outbound_q,
+		     &qp->local_read,
+		     &qp->remote_qp->local_write,
+		     r_buf,
+		     get_count_order(remote_size));
+	/*
+	 * Because the node QP may already be processing an INIT message, set
+	 * the read pointer so the cached read offset isn't lost
+	 */
+	qp->remote_qp->local_read = qp->inbound_q.current_read_offset;
+	/*
+	 * resetup the inbound_q now that we know where the
+	 * inbound_read really is.
+	 */
+	scif_rb_init(&qp->inbound_q,
+		     &qp->remote_qp->local_read,
+		     &qp->local_write,
+		     qp->inbound_q.rb_base,
+		     get_count_order(qp->inbound_q.size));
+error:
+	return err;
+}
+
+static __always_inline void
+scif_send_msg_intr(struct scif_dev *scifdev)
+{
+	struct scif_hw_dev *sdev = scifdev->sdev;
+
+	if (scifdev_is_p2p(scifdev))
+		sdev->hw_ops->send_p2p_intr(sdev, scifdev->rdb, &scifdev->mmio);
+	else
+		sdev->hw_ops->send_intr(sdev, scifdev->rdb);
+}
+
+int scif_qp_response(phys_addr_t phys, struct scif_dev *scifdev)
+{
+	int err = 0;
+	struct scifmsg msg;
+
+	err = scif_setup_qp_connect_response(scifdev, scifdev->qpairs, phys);
+	if (!err) {
+		/*
+		 * Now that everything is setup and mapped, we're ready
+		 * to tell the peer about our queue's location
+		 */
+		msg.uop = SCIF_INIT;
+		msg.dst.node = scifdev->node;
+		err = scif_nodeqp_send(scifdev, &msg);
+	}
+	return err;
+}
+
+void scif_send_exit(struct scif_dev *scifdev)
+{
+	struct scifmsg msg;
+	int ret;
+
+	scifdev->exit = OP_IN_PROGRESS;
+	msg.uop = SCIF_EXIT;
+	msg.src.node = scif_info.nodeid;
+	msg.dst.node = scifdev->node;
+	ret = scif_nodeqp_send(scifdev, &msg);
+	if (ret)
+		goto done;
+	/* Wait for a SCIF_EXIT_ACK message */
+	wait_event_timeout(scif_info.exitwq, scifdev->exit == OP_COMPLETED,
+			   SCIF_NODE_ALIVE_TIMEOUT);
+done:
+	scifdev->exit = OP_IDLE;
+}
+
+int scif_setup_qp(struct scif_dev *scifdev)
+{
+	int err = 0;
+	int local_size;
+	struct scif_qp *qp;
+
+	local_size = SCIF_NODE_QP_SIZE;
+
+	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+	if (!qp) {
+		err = -ENOMEM;
+		return err;
+	}
+	qp->magic = SCIFEP_MAGIC;
+	scifdev->qpairs = qp;
+	err = scif_setup_qp_connect(qp, &scifdev->qp_dma_addr,
+				    local_size, scifdev);
+	if (err)
+		goto free_qp;
+	/*
+	 * We're as setup as we can be. The inbound_q is setup, w/o a usable
+	 * outbound q.  When we get a message, the read_ptr will be updated,
+	 * and we will pull the message.
+	 */
+	return err;
+free_qp:
+	kfree(scifdev->qpairs);
+	scifdev->qpairs = NULL;
+	return err;
+}
+
+static void scif_p2p_freesg(struct scatterlist *sg)
+{
+	kfree(sg);
+}
+
+static struct scatterlist *
+scif_p2p_setsg(phys_addr_t pa, int page_size, int page_cnt)
+{
+	struct scatterlist *sg;
+	struct page *page;
+	int i;
+
+	sg = kcalloc(page_cnt, sizeof(struct scatterlist), GFP_KERNEL);
+	if (!sg)
+		return NULL;
+	sg_init_table(sg, page_cnt);
+	for (i = 0; i < page_cnt; i++) {
+		page = pfn_to_page(pa >> PAGE_SHIFT);
+		sg_set_page(&sg[i], page, page_size, 0);
+		pa += page_size;
+	}
+	return sg;
+}
+
+/* Init p2p mappings required to access peerdev from scifdev */
+static struct scif_p2p_info *
+scif_init_p2p_info(struct scif_dev *scifdev, struct scif_dev *peerdev)
+{
+	struct scif_p2p_info *p2p;
+	int num_mmio_pages, num_aper_pages, sg_page_shift, err, num_aper_chunks;
+	struct scif_hw_dev *psdev = peerdev->sdev;
+	struct scif_hw_dev *sdev = scifdev->sdev;
+
+	num_mmio_pages = psdev->mmio->len >> PAGE_SHIFT;
+	num_aper_pages = psdev->aper->len >> PAGE_SHIFT;
+
+	p2p = kzalloc(sizeof(*p2p), GFP_KERNEL);
+	if (!p2p)
+		return NULL;
+	p2p->ppi_sg[SCIF_PPI_MMIO] = scif_p2p_setsg(psdev->mmio->pa,
+						    PAGE_SIZE, num_mmio_pages);
+	if (!p2p->ppi_sg[SCIF_PPI_MMIO])
+		goto free_p2p;
+	p2p->sg_nentries[SCIF_PPI_MMIO] = num_mmio_pages;
+	sg_page_shift = get_order(min(psdev->aper->len, (u64)(1 << 30)));
+	num_aper_chunks = num_aper_pages >> (sg_page_shift - PAGE_SHIFT);
+	p2p->ppi_sg[SCIF_PPI_APER] = scif_p2p_setsg(psdev->aper->pa,
+						    1 << sg_page_shift,
+						    num_aper_chunks);
+	p2p->sg_nentries[SCIF_PPI_APER] = num_aper_chunks;
+	err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+			 num_mmio_pages, PCI_DMA_BIDIRECTIONAL);
+	if (err != num_mmio_pages)
+		goto scif_p2p_free;
+	err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+			 num_aper_chunks, PCI_DMA_BIDIRECTIONAL);
+	if (err != num_aper_chunks)
+		goto dma_unmap;
+	p2p->ppi_da[SCIF_PPI_MMIO] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_MMIO]);
+	p2p->ppi_da[SCIF_PPI_APER] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_APER]);
+	p2p->ppi_len[SCIF_PPI_MMIO] = num_mmio_pages;
+	p2p->ppi_len[SCIF_PPI_APER] = num_aper_pages;
+	p2p->ppi_peer_id = peerdev->node;
+	return p2p;
+dma_unmap:
+	dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+		     p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL);
+scif_p2p_free:
+	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+free_p2p:
+	kfree(p2p);
+	return NULL;
+}
+
+/* Uninitialize and release resources from a p2p mapping */
+static void scif_deinit_p2p_info(struct scif_dev *scifdev,
+				 struct scif_p2p_info *p2p)
+{
+	struct scif_hw_dev *sdev = scifdev->sdev;
+
+	dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+		     p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL);
+	dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+		     p2p->sg_nentries[SCIF_PPI_APER], DMA_BIDIRECTIONAL);
+	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+	scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+	kfree(p2p);
+}
+
+/**
+ * scif_node_connect: Respond to SCIF_NODE_CONNECT interrupt message
+ * @dst: Destination node
+ *
+ * Connect the src and dst node by setting up the p2p connection
+ * between them. Management node here acts like a proxy.
+ */
+static void scif_node_connect(struct scif_dev *scifdev, int dst)
+{
+	struct scif_dev *dev_j = scifdev;
+	struct scif_dev *dev_i = NULL;
+	struct scif_p2p_info *p2p_ij = NULL;    /* bus addr for j from i */
+	struct scif_p2p_info *p2p_ji = NULL;    /* bus addr for i from j */
+	struct scif_p2p_info *p2p;
+	struct list_head *pos, *tmp;
+	struct scifmsg msg;
+	int err;
+	u64 tmppayload;
+
+	if (dst < 1 || dst > scif_info.maxid)
+		return;
+
+	dev_i = &scif_dev[dst];
+
+	if (!_scifdev_alive(dev_i))
+		return;
+	/*
+	 * If the p2p connection is already setup or in the process of setting
+	 * up then just ignore this request. The requested node will get
+	 * informed by SCIF_NODE_ADD_ACK or SCIF_NODE_ADD_NACK
+	 */
+	if (!list_empty(&dev_i->p2p)) {
+		list_for_each_safe(pos, tmp, &dev_i->p2p) {
+			p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+			if (p2p->ppi_peer_id == dev_j->node)
+				return;
+		}
+	}
+	p2p_ij = scif_init_p2p_info(dev_i, dev_j);
+	if (!p2p_ij)
+		return;
+	p2p_ji = scif_init_p2p_info(dev_j, dev_i);
+	if (!p2p_ji) {
+		scif_deinit_p2p_info(dev_i, p2p_ij);
+		return;
+	}
+	list_add_tail(&p2p_ij->ppi_list, &dev_i->p2p);
+	list_add_tail(&p2p_ji->ppi_list, &dev_j->p2p);
+
+	/*
+	 * Send a SCIF_NODE_ADD to dev_i, pass it its bus address
+	 * as seen from dev_j
+	 */
+	msg.uop = SCIF_NODE_ADD;
+	msg.src.node = dev_j->node;
+	msg.dst.node = dev_i->node;
+
+	msg.payload[0] = p2p_ji->ppi_da[SCIF_PPI_APER];
+	msg.payload[1] = p2p_ij->ppi_da[SCIF_PPI_MMIO];
+	msg.payload[2] = p2p_ij->ppi_da[SCIF_PPI_APER];
+	msg.payload[3] = p2p_ij->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT;
+
+	err = scif_nodeqp_send(dev_i,  &msg);
+	if (err) {
+		dev_err(&scifdev->sdev->dev,
+			"%s %d error %d\n", __func__, __LINE__, err);
+		return;
+	}
+
+	/* Same as above but to dev_j */
+	msg.uop = SCIF_NODE_ADD;
+	msg.src.node = dev_i->node;
+	msg.dst.node = dev_j->node;
+
+	tmppayload = msg.payload[0];
+	msg.payload[0] = msg.payload[2];
+	msg.payload[2] = tmppayload;
+	msg.payload[1] = p2p_ji->ppi_da[SCIF_PPI_MMIO];
+	msg.payload[3] = p2p_ji->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT;
+
+	scif_nodeqp_send(dev_j, &msg);
+}
+
+static void scif_p2p_setup(void)
+{
+	int i, j;
+
+	if (!scif_info.p2p_enable)
+		return;
+
+	for (i = 1; i <= scif_info.maxid; i++)
+		if (!_scifdev_alive(&scif_dev[i]))
+			return;
+
+	for (i = 1; i <= scif_info.maxid; i++) {
+		for (j = 1; j <= scif_info.maxid; j++) {
+			struct scif_dev *scifdev = &scif_dev[i];
+
+			if (i == j)
+				continue;
+			scif_node_connect(scifdev, j);
+		}
+	}
+}
+
+static char *message_types[] = {"BAD",
+				"INIT",
+				"EXIT",
+				"SCIF_EXIT_ACK",
+				"SCIF_NODE_ADD",
+				"SCIF_NODE_ADD_ACK",
+				"SCIF_NODE_ADD_NACK",
+				"REMOVE_NODE",
+				"REMOVE_NODE_ACK",
+				"CNCT_REQ",
+				"CNCT_GNT",
+				"CNCT_GNTACK",
+				"CNCT_GNTNACK",
+				"CNCT_REJ",
+				"DISCNCT",
+				"DISCNT_ACK",
+				"CLIENT_SENT",
+				"CLIENT_RCVD",
+				"SCIF_GET_NODE_INFO",
+				"REGISTER",
+				"REGISTER_ACK",
+				"REGISTER_NACK",
+				"UNREGISTER",
+				"UNREGISTER_ACK",
+				"UNREGISTER_NACK",
+				"ALLOC_REQ",
+				"ALLOC_GNT",
+				"ALLOC_REJ",
+				"FREE_PHYS",
+				"FREE_VIRT",
+				"MUNMAP",
+				"MARK",
+				"MARK_ACK",
+				"MARK_NACK",
+				"WAIT",
+				"WAIT_ACK",
+				"WAIT_NACK",
+				"SIGNAL_LOCAL",
+				"SIGNAL_REMOTE",
+				"SIG_ACK",
+				"SIG_NACK"};
+
+static void
+scif_display_message(struct scif_dev *scifdev, struct scifmsg *msg,
+		     const char *label)
+{
+	if (!scif_info.en_msg_log)
+		return;
+	if (msg->uop > SCIF_MAX_MSG) {
+		dev_err(&scifdev->sdev->dev,
+			"%s: unknown msg type %d\n", label, msg->uop);
+		return;
+	}
+	dev_info(&scifdev->sdev->dev,
+		 "%s: msg type %s, src %d:%d, dest %d:%d payload 0x%llx:0x%llx:0x%llx:0x%llx\n",
+		 label, message_types[msg->uop], msg->src.node, msg->src.port,
+		 msg->dst.node, msg->dst.port, msg->payload[0], msg->payload[1],
+		 msg->payload[2], msg->payload[3]);
+}
+
+int _scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_qp *qp = scifdev->qpairs;
+	int err = -ENOMEM, loop_cnt = 0;
+
+	scif_display_message(scifdev, msg, "Sent");
+	if (!qp) {
+		err = -EINVAL;
+		goto error;
+	}
+	spin_lock(&qp->send_lock);
+
+	while ((err = scif_rb_write(&qp->outbound_q,
+				    msg, sizeof(struct scifmsg)))) {
+		mdelay(1);
+#define SCIF_NODEQP_SEND_TO_MSEC (3 * 1000)
+		if (loop_cnt++ > (SCIF_NODEQP_SEND_TO_MSEC)) {
+			err = -ENODEV;
+			break;
+		}
+	}
+	if (!err)
+		scif_rb_commit(&qp->outbound_q);
+	spin_unlock(&qp->send_lock);
+	if (!err) {
+		if (scifdev_self(scifdev))
+			/*
+			 * For loopback we need to emulate an interrupt by
+			 * queuing work for the queue handling real node
+			 * Qp interrupts.
+			 */
+			queue_work(scifdev->intr_wq, &scifdev->intr_bh);
+		else
+			scif_send_msg_intr(scifdev);
+	}
+error:
+	if (err)
+		dev_dbg(&scifdev->sdev->dev,
+			"%s %d error %d uop %d\n",
+			 __func__, __LINE__, err, msg->uop);
+	return err;
+}
+
+/**
+ * scif_nodeqp_send - Send a message on the node queue pair
+ * @scifdev: Scif Device.
+ * @msg: The message to be sent.
+ */
+int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	int err;
+	struct device *spdev = NULL;
+
+	if (msg->uop > SCIF_EXIT_ACK) {
+		/* Dont send messages once the exit flow has begun */
+		if (OP_IDLE != scifdev->exit)
+			return -ENODEV;
+		spdev = scif_get_peer_dev(scifdev);
+		if (IS_ERR(spdev)) {
+			err = PTR_ERR(spdev);
+			return err;
+		}
+	}
+	err = _scif_nodeqp_send(scifdev, msg);
+	if (msg->uop > SCIF_EXIT_ACK)
+		scif_put_peer_dev(spdev);
+	return err;
+}
+
+/*
+ * scif_misc_handler:
+ *
+ * Work queue handler for servicing miscellaneous SCIF tasks.
+ * Examples include:
+ * 1) Remote fence requests.
+ * 2) Destruction of temporary registered windows
+ *    created during scif_vreadfrom()/scif_vwriteto().
+ * 3) Cleanup of zombie endpoints.
+ */
+void scif_misc_handler(struct work_struct *work)
+{
+	scif_rma_handle_remote_fences();
+	scif_rma_destroy_windows();
+	scif_rma_destroy_tcw_invalid();
+	scif_cleanup_zombie_epd();
+}
+
+/**
+ * scif_init() - Respond to SCIF_INIT interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ */
+static __always_inline void
+scif_init(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	/*
+	 * Allow the thread waiting for device page updates for the peer QP DMA
+	 * address to complete initializing the inbound_q.
+	 */
+	flush_delayed_work(&scifdev->qp_dwork);
+
+	scif_peer_register_device(scifdev);
+
+	if (scif_is_mgmt_node()) {
+		mutex_lock(&scif_info.conflock);
+		scif_p2p_setup();
+		mutex_unlock(&scif_info.conflock);
+	}
+}
+
+/**
+ * scif_exit() - Respond to SCIF_EXIT interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ *
+ * This function stops the SCIF interface for the node which sent
+ * the SCIF_EXIT message and starts waiting for that node to
+ * resetup the queue pair again.
+ */
+static __always_inline void
+scif_exit(struct scif_dev *scifdev, struct scifmsg *unused)
+{
+	scifdev->exit_ack_pending = true;
+	if (scif_is_mgmt_node())
+		scif_disconnect_node(scifdev->node, false);
+	else
+		scif_stop(scifdev);
+	schedule_delayed_work(&scifdev->qp_dwork,
+			      msecs_to_jiffies(1000));
+}
+
+/**
+ * scif_exitack() - Respond to SCIF_EXIT_ACK interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ *
+ */
+static __always_inline void
+scif_exit_ack(struct scif_dev *scifdev, struct scifmsg *unused)
+{
+	scifdev->exit = OP_COMPLETED;
+	wake_up(&scif_info.exitwq);
+}
+
+/**
+ * scif_node_add() - Respond to SCIF_NODE_ADD interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ *
+ * When the mgmt node driver has finished initializing a MIC node queue pair it
+ * marks the node as online. It then looks for all currently online MIC cards
+ * and send a SCIF_NODE_ADD message to identify the ID of the new card for
+ * peer to peer initialization
+ *
+ * The local node allocates its incoming queue and sends its address in the
+ * SCIF_NODE_ADD_ACK message back to the mgmt node, the mgmt node "reflects"
+ * this message to the new node
+ */
+static __always_inline void
+scif_node_add(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_dev *newdev;
+	dma_addr_t qp_offset;
+	int qp_connect;
+	struct scif_hw_dev *sdev;
+
+	dev_dbg(&scifdev->sdev->dev,
+		"Scifdev %d:%d received NODE_ADD msg for node %d\n",
+		scifdev->node, msg->dst.node, msg->src.node);
+	dev_dbg(&scifdev->sdev->dev,
+		"Remote address for this node's aperture %llx\n",
+		msg->payload[0]);
+	newdev = &scif_dev[msg->src.node];
+	newdev->node = msg->src.node;
+	newdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev;
+	sdev = newdev->sdev;
+
+	if (scif_setup_intr_wq(newdev)) {
+		dev_err(&scifdev->sdev->dev,
+			"failed to setup interrupts for %d\n", msg->src.node);
+		goto interrupt_setup_error;
+	}
+	newdev->mmio.va = ioremap_nocache(msg->payload[1], sdev->mmio->len);
+	if (!newdev->mmio.va) {
+		dev_err(&scifdev->sdev->dev,
+			"failed to map mmio for %d\n", msg->src.node);
+		goto mmio_map_error;
+	}
+	newdev->qpairs = kzalloc(sizeof(*newdev->qpairs), GFP_KERNEL);
+	if (!newdev->qpairs)
+		goto qp_alloc_error;
+	/*
+	 * Set the base address of the remote node's memory since it gets
+	 * added to qp_offset
+	 */
+	newdev->base_addr = msg->payload[0];
+
+	qp_connect = scif_setup_qp_connect(newdev->qpairs, &qp_offset,
+					   SCIF_NODE_QP_SIZE, newdev);
+	if (qp_connect) {
+		dev_err(&scifdev->sdev->dev,
+			"failed to setup qp_connect %d\n", qp_connect);
+		goto qp_connect_error;
+	}
+
+	newdev->db = sdev->hw_ops->next_db(sdev);
+	newdev->cookie = sdev->hw_ops->request_irq(sdev, scif_intr_handler,
+						   "SCIF_INTR", newdev,
+						   newdev->db);
+	if (IS_ERR(newdev->cookie))
+		goto qp_connect_error;
+	newdev->qpairs->magic = SCIFEP_MAGIC;
+	newdev->qpairs->qp_state = SCIF_QP_OFFLINE;
+
+	msg->uop = SCIF_NODE_ADD_ACK;
+	msg->dst.node = msg->src.node;
+	msg->src.node = scif_info.nodeid;
+	msg->payload[0] = qp_offset;
+	msg->payload[2] = newdev->db;
+	scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg);
+	return;
+qp_connect_error:
+	kfree(newdev->qpairs);
+	newdev->qpairs = NULL;
+qp_alloc_error:
+	iounmap(newdev->mmio.va);
+	newdev->mmio.va = NULL;
+mmio_map_error:
+interrupt_setup_error:
+	dev_err(&scifdev->sdev->dev,
+		"node add failed for node %d\n", msg->src.node);
+	msg->uop = SCIF_NODE_ADD_NACK;
+	msg->dst.node = msg->src.node;
+	msg->src.node = scif_info.nodeid;
+	scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg);
+}
+
+void scif_poll_qp_state(struct work_struct *work)
+{
+#define SCIF_NODE_QP_RETRY 100
+#define SCIF_NODE_QP_TIMEOUT 100
+	struct scif_dev *peerdev = container_of(work, struct scif_dev,
+							p2p_dwork.work);
+	struct scif_qp *qp = &peerdev->qpairs[0];
+
+	if (qp->qp_state != SCIF_QP_ONLINE ||
+	    qp->remote_qp->qp_state != SCIF_QP_ONLINE) {
+		if (peerdev->p2p_retry++ == SCIF_NODE_QP_RETRY) {
+			dev_err(&peerdev->sdev->dev,
+				"Warning: QP check timeout with state %d\n",
+				qp->qp_state);
+			goto timeout;
+		}
+		schedule_delayed_work(&peerdev->p2p_dwork,
+				      msecs_to_jiffies(SCIF_NODE_QP_TIMEOUT));
+		return;
+	}
+	return;
+timeout:
+	dev_err(&peerdev->sdev->dev,
+		"%s %d remote node %d offline,  state = 0x%x\n",
+		__func__, __LINE__, peerdev->node, qp->qp_state);
+	qp->remote_qp->qp_state = SCIF_QP_OFFLINE;
+	scif_peer_unregister_device(peerdev);
+	scif_cleanup_scifdev(peerdev);
+}
+
+/**
+ * scif_node_add_ack() - Respond to SCIF_NODE_ADD_ACK interrupt message
+ * @scifdev:    Remote SCIF device node
+ * @msg:        Interrupt message
+ *
+ * After a MIC node receives the SCIF_NODE_ADD_ACK message it send this
+ * message to the mgmt node to confirm the sequence is finished.
+ *
+ */
+static __always_inline void
+scif_node_add_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_dev *peerdev;
+	struct scif_qp *qp;
+	struct scif_dev *dst_dev = &scif_dev[msg->dst.node];
+
+	dev_dbg(&scifdev->sdev->dev,
+		"Scifdev %d received SCIF_NODE_ADD_ACK msg src %d dst %d\n",
+		scifdev->node, msg->src.node, msg->dst.node);
+	dev_dbg(&scifdev->sdev->dev,
+		"payload %llx %llx %llx %llx\n", msg->payload[0],
+		msg->payload[1], msg->payload[2], msg->payload[3]);
+	if (scif_is_mgmt_node()) {
+		/*
+		 * the lock serializes with scif_qp_response_ack. The mgmt node
+		 * is forwarding the NODE_ADD_ACK message from src to dst we
+		 * need to make sure that the dst has already received a
+		 * NODE_ADD for src and setup its end of the qp to dst
+		 */
+		mutex_lock(&scif_info.conflock);
+		msg->payload[1] = scif_info.maxid;
+		scif_nodeqp_send(dst_dev, msg);
+		mutex_unlock(&scif_info.conflock);
+		return;
+	}
+	peerdev = &scif_dev[msg->src.node];
+	peerdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev;
+	peerdev->node = msg->src.node;
+
+	qp = &peerdev->qpairs[0];
+
+	if ((scif_setup_qp_connect_response(peerdev, &peerdev->qpairs[0],
+					    msg->payload[0])))
+		goto local_error;
+	peerdev->rdb = msg->payload[2];
+	qp->remote_qp->qp_state = SCIF_QP_ONLINE;
+
+	scif_peer_register_device(peerdev);
+
+	schedule_delayed_work(&peerdev->p2p_dwork, 0);
+	return;
+local_error:
+	scif_cleanup_scifdev(peerdev);
+}
+
+/**
+ * scif_node_add_nack: Respond to SCIF_NODE_ADD_NACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * SCIF_NODE_ADD failed, so inform the waiting wq.
+ */
+static __always_inline void
+scif_node_add_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	if (scif_is_mgmt_node()) {
+		struct scif_dev *dst_dev = &scif_dev[msg->dst.node];
+
+		dev_dbg(&scifdev->sdev->dev,
+			"SCIF_NODE_ADD_NACK received from %d\n", scifdev->node);
+		scif_nodeqp_send(dst_dev, msg);
+	}
+}
+
+/*
+ * scif_node_remove: Handle SCIF_NODE_REMOVE message
+ * @msg: Interrupt message
+ *
+ * Handle node removal.
+ */
+static __always_inline void
+scif_node_remove(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	int node = msg->payload[0];
+	struct scif_dev *scdev = &scif_dev[node];
+
+	scdev->node_remove_ack_pending = true;
+	scif_handle_remove_node(node);
+}
+
+/*
+ * scif_node_remove_ack: Handle SCIF_NODE_REMOVE_ACK message
+ * @msg: Interrupt message
+ *
+ * The peer has acked a SCIF_NODE_REMOVE message.
+ */
+static __always_inline void
+scif_node_remove_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_dev *sdev = &scif_dev[msg->payload[0]];
+
+	atomic_inc(&sdev->disconn_rescnt);
+	wake_up(&sdev->disconn_wq);
+}
+
+/**
+ * scif_get_node_info: Respond to SCIF_GET_NODE_INFO interrupt message
+ * @msg:        Interrupt message
+ *
+ * Retrieve node info i.e maxid and total from the mgmt node.
+ */
+static __always_inline void
+scif_get_node_info_resp(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	if (scif_is_mgmt_node()) {
+		swap(msg->dst.node, msg->src.node);
+		mutex_lock(&scif_info.conflock);
+		msg->payload[1] = scif_info.maxid;
+		msg->payload[2] = scif_info.total;
+		mutex_unlock(&scif_info.conflock);
+		scif_nodeqp_send(scifdev, msg);
+	} else {
+		struct completion *node_info =
+			(struct completion *)msg->payload[3];
+
+		mutex_lock(&scif_info.conflock);
+		scif_info.maxid = msg->payload[1];
+		scif_info.total = msg->payload[2];
+		complete_all(node_info);
+		mutex_unlock(&scif_info.conflock);
+	}
+}
+
+static void
+scif_msg_unknown(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	/* Bogus Node Qp Message? */
+	dev_err(&scifdev->sdev->dev,
+		"Unknown message 0x%xn scifdev->node 0x%x\n",
+		msg->uop, scifdev->node);
+}
+
+static void (*scif_intr_func[SCIF_MAX_MSG + 1])
+	    (struct scif_dev *, struct scifmsg *msg) = {
+	scif_msg_unknown,	/* Error */
+	scif_init,		/* SCIF_INIT */
+	scif_exit,		/* SCIF_EXIT */
+	scif_exit_ack,		/* SCIF_EXIT_ACK */
+	scif_node_add,		/* SCIF_NODE_ADD */
+	scif_node_add_ack,	/* SCIF_NODE_ADD_ACK */
+	scif_node_add_nack,	/* SCIF_NODE_ADD_NACK */
+	scif_node_remove,	/* SCIF_NODE_REMOVE */
+	scif_node_remove_ack,	/* SCIF_NODE_REMOVE_ACK */
+	scif_cnctreq,		/* SCIF_CNCT_REQ */
+	scif_cnctgnt,		/* SCIF_CNCT_GNT */
+	scif_cnctgnt_ack,	/* SCIF_CNCT_GNTACK */
+	scif_cnctgnt_nack,	/* SCIF_CNCT_GNTNACK */
+	scif_cnctrej,		/* SCIF_CNCT_REJ */
+	scif_discnct,		/* SCIF_DISCNCT */
+	scif_discnt_ack,	/* SCIF_DISCNT_ACK */
+	scif_clientsend,	/* SCIF_CLIENT_SENT */
+	scif_clientrcvd,	/* SCIF_CLIENT_RCVD */
+	scif_get_node_info_resp,/* SCIF_GET_NODE_INFO */
+	scif_recv_reg,		/* SCIF_REGISTER */
+	scif_recv_reg_ack,	/* SCIF_REGISTER_ACK */
+	scif_recv_reg_nack,	/* SCIF_REGISTER_NACK */
+	scif_recv_unreg,	/* SCIF_UNREGISTER */
+	scif_recv_unreg_ack,	/* SCIF_UNREGISTER_ACK */
+	scif_recv_unreg_nack,	/* SCIF_UNREGISTER_NACK */
+	scif_alloc_req,		/* SCIF_ALLOC_REQ */
+	scif_alloc_gnt_rej,	/* SCIF_ALLOC_GNT */
+	scif_alloc_gnt_rej,	/* SCIF_ALLOC_REJ */
+	scif_free_virt,		/* SCIF_FREE_VIRT */
+	scif_recv_munmap,	/* SCIF_MUNMAP */
+	scif_recv_mark,		/* SCIF_MARK */
+	scif_recv_mark_resp,	/* SCIF_MARK_ACK */
+	scif_recv_mark_resp,	/* SCIF_MARK_NACK */
+	scif_recv_wait,		/* SCIF_WAIT */
+	scif_recv_wait_resp,	/* SCIF_WAIT_ACK */
+	scif_recv_wait_resp,	/* SCIF_WAIT_NACK */
+	scif_recv_sig_local,	/* SCIF_SIG_LOCAL */
+	scif_recv_sig_remote,	/* SCIF_SIG_REMOTE */
+	scif_recv_sig_resp,	/* SCIF_SIG_ACK */
+	scif_recv_sig_resp,	/* SCIF_SIG_NACK */
+};
+
+/**
+ * scif_nodeqp_msg_handler() - Common handler for node messages
+ * @scifdev: Remote device to respond to
+ * @qp: Remote memory pointer
+ * @msg: The message to be handled.
+ *
+ * This routine calls the appropriate routine to handle a Node Qp
+ * message receipt
+ */
+static int scif_max_msg_id = SCIF_MAX_MSG;
+
+static void
+scif_nodeqp_msg_handler(struct scif_dev *scifdev,
+			struct scif_qp *qp, struct scifmsg *msg)
+{
+	scif_display_message(scifdev, msg, "Rcvd");
+
+	if (msg->uop > (u32)scif_max_msg_id) {
+		/* Bogus Node Qp Message? */
+		dev_err(&scifdev->sdev->dev,
+			"Unknown message 0x%xn scifdev->node 0x%x\n",
+			msg->uop, scifdev->node);
+		return;
+	}
+
+	scif_intr_func[msg->uop](scifdev, msg);
+}
+
+/**
+ * scif_nodeqp_intrhandler() - Interrupt handler for node messages
+ * @scifdev:    Remote device to respond to
+ * @qp:         Remote memory pointer
+ *
+ * This routine is triggered by the interrupt mechanism.  It reads
+ * messages from the node queue RB and calls the Node QP Message handling
+ * routine.
+ */
+void scif_nodeqp_intrhandler(struct scif_dev *scifdev, struct scif_qp *qp)
+{
+	struct scifmsg msg;
+	int read_size;
+
+	do {
+		read_size = scif_rb_get_next(&qp->inbound_q, &msg, sizeof(msg));
+		if (!read_size)
+			break;
+		scif_nodeqp_msg_handler(scifdev, qp, &msg);
+		/*
+		 * The node queue pair is unmapped so skip the read pointer
+		 * update after receipt of a SCIF_EXIT_ACK
+		 */
+		if (SCIF_EXIT_ACK == msg.uop)
+			break;
+		scif_rb_update_read_ptr(&qp->inbound_q);
+	} while (1);
+}
+
+/**
+ * scif_loopb_wq_handler - Loopback Workqueue Handler.
+ * @work: loop back work
+ *
+ * This work queue routine is invoked by the loopback work queue handler.
+ * It grabs the recv lock, dequeues any available messages from the head
+ * of the loopback message list, calls the node QP message handler,
+ * waits for it to return, then frees up this message and dequeues more
+ * elements of the list if available.
+ */
+static void scif_loopb_wq_handler(struct work_struct *unused)
+{
+	struct scif_dev *scifdev = scif_info.loopb_dev;
+	struct scif_qp *qp = scifdev->qpairs;
+	struct scif_loopb_msg *msg;
+
+	do {
+		msg = NULL;
+		spin_lock(&qp->recv_lock);
+		if (!list_empty(&scif_info.loopb_recv_q)) {
+			msg = list_first_entry(&scif_info.loopb_recv_q,
+					       struct scif_loopb_msg,
+					       list);
+			list_del(&msg->list);
+		}
+		spin_unlock(&qp->recv_lock);
+
+		if (msg) {
+			scif_nodeqp_msg_handler(scifdev, qp, &msg->msg);
+			kfree(msg);
+		}
+	} while (msg);
+}
+
+/**
+ * scif_loopb_msg_handler() - Workqueue handler for loopback messages.
+ * @scifdev: SCIF device
+ * @qp: Queue pair.
+ *
+ * This work queue routine is triggered when a loopback message is received.
+ *
+ * We need special handling for receiving Node Qp messages on a loopback SCIF
+ * device via two workqueues for receiving messages.
+ *
+ * The reason we need the extra workqueue which is not required with *normal*
+ * non-loopback SCIF devices is the potential classic deadlock described below:
+ *
+ * Thread A tries to send a message on a loopback SCIF device and blocks since
+ * there is no space in the RB while it has the send_lock held or another
+ * lock called lock X for example.
+ *
+ * Thread B: The Loopback Node QP message receive workqueue receives the message
+ * and tries to send a message (eg an ACK) to the loopback SCIF device. It tries
+ * to grab the send lock again or lock X and deadlocks with Thread A. The RB
+ * cannot be drained any further due to this classic deadlock.
+ *
+ * In order to avoid deadlocks as mentioned above we have an extra level of
+ * indirection achieved by having two workqueues.
+ * 1) The first workqueue whose handler is scif_loopb_msg_handler reads
+ * messages from the Node QP RB, adds them to a list and queues work for the
+ * second workqueue.
+ *
+ * 2) The second workqueue whose handler is scif_loopb_wq_handler dequeues
+ * messages from the list, handles them, frees up the memory and dequeues
+ * more elements from the list if possible.
+ */
+int
+scif_loopb_msg_handler(struct scif_dev *scifdev, struct scif_qp *qp)
+{
+	int read_size;
+	struct scif_loopb_msg *msg;
+
+	do {
+		msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+		if (!msg)
+			return -ENOMEM;
+		read_size = scif_rb_get_next(&qp->inbound_q, &msg->msg,
+					     sizeof(struct scifmsg));
+		if (read_size != sizeof(struct scifmsg)) {
+			kfree(msg);
+			scif_rb_update_read_ptr(&qp->inbound_q);
+			break;
+		}
+		spin_lock(&qp->recv_lock);
+		list_add_tail(&msg->list, &scif_info.loopb_recv_q);
+		spin_unlock(&qp->recv_lock);
+		queue_work(scif_info.loopb_wq, &scif_info.loopb_work);
+		scif_rb_update_read_ptr(&qp->inbound_q);
+	} while (read_size == sizeof(struct scifmsg));
+	return read_size;
+}
+
+/**
+ * scif_setup_loopback_qp - One time setup work for Loopback Node Qp.
+ * @scifdev: SCIF device
+ *
+ * Sets up the required loopback workqueues, queue pairs and ring buffers
+ */
+int scif_setup_loopback_qp(struct scif_dev *scifdev)
+{
+	int err = 0;
+	void *local_q;
+	struct scif_qp *qp;
+
+	err = scif_setup_intr_wq(scifdev);
+	if (err)
+		goto exit;
+	INIT_LIST_HEAD(&scif_info.loopb_recv_q);
+	snprintf(scif_info.loopb_wqname, sizeof(scif_info.loopb_wqname),
+		 "SCIF LOOPB %d", scifdev->node);
+	scif_info.loopb_wq =
+		alloc_ordered_workqueue(scif_info.loopb_wqname, 0);
+	if (!scif_info.loopb_wq) {
+		err = -ENOMEM;
+		goto destroy_intr;
+	}
+	INIT_WORK(&scif_info.loopb_work, scif_loopb_wq_handler);
+	/* Allocate Self Qpair */
+	scifdev->qpairs = kzalloc(sizeof(*scifdev->qpairs), GFP_KERNEL);
+	if (!scifdev->qpairs) {
+		err = -ENOMEM;
+		goto destroy_loopb_wq;
+	}
+
+	qp = scifdev->qpairs;
+	qp->magic = SCIFEP_MAGIC;
+	spin_lock_init(&qp->send_lock);
+	spin_lock_init(&qp->recv_lock);
+
+	local_q = kzalloc(SCIF_NODE_QP_SIZE, GFP_KERNEL);
+	if (!local_q) {
+		err = -ENOMEM;
+		goto free_qpairs;
+	}
+	/*
+	 * For loopback the inbound_q and outbound_q are essentially the same
+	 * since the Node sends a message on the loopback interface to the
+	 * outbound_q which is then received on the inbound_q.
+	 */
+	scif_rb_init(&qp->outbound_q,
+		     &qp->local_read,
+		     &qp->local_write,
+		     local_q, get_count_order(SCIF_NODE_QP_SIZE));
+
+	scif_rb_init(&qp->inbound_q,
+		     &qp->local_read,
+		     &qp->local_write,
+		     local_q, get_count_order(SCIF_NODE_QP_SIZE));
+	scif_info.nodeid = scifdev->node;
+
+	scif_peer_register_device(scifdev);
+
+	scif_info.loopb_dev = scifdev;
+	return err;
+free_qpairs:
+	kfree(scifdev->qpairs);
+destroy_loopb_wq:
+	destroy_workqueue(scif_info.loopb_wq);
+destroy_intr:
+	scif_destroy_intr_wq(scifdev);
+exit:
+	return err;
+}
+
+/**
+ * scif_destroy_loopback_qp - One time uninit work for Loopback Node Qp
+ * @scifdev: SCIF device
+ *
+ * Destroys the workqueues and frees up the Ring Buffer and Queue Pair memory.
+ */
+int scif_destroy_loopback_qp(struct scif_dev *scifdev)
+{
+	scif_peer_unregister_device(scifdev);
+	destroy_workqueue(scif_info.loopb_wq);
+	scif_destroy_intr_wq(scifdev);
+	kfree(scifdev->qpairs->outbound_q.rb_base);
+	kfree(scifdev->qpairs);
+	scifdev->sdev = NULL;
+	scif_info.loopb_dev = NULL;
+	return 0;
+}
+
+void scif_destroy_p2p(struct scif_dev *scifdev)
+{
+	struct scif_dev *peer_dev;
+	struct scif_p2p_info *p2p;
+	struct list_head *pos, *tmp;
+	int bd;
+
+	mutex_lock(&scif_info.conflock);
+	/* Free P2P mappings in the given node for all its peer nodes */
+	list_for_each_safe(pos, tmp, &scifdev->p2p) {
+		p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+		dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
+			     p2p->sg_nentries[SCIF_PPI_MMIO],
+			     DMA_BIDIRECTIONAL);
+		dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
+			     p2p->sg_nentries[SCIF_PPI_APER],
+			     DMA_BIDIRECTIONAL);
+		scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+		scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+		list_del(pos);
+		kfree(p2p);
+	}
+
+	/* Free P2P mapping created in the peer nodes for the given node */
+	for (bd = SCIF_MGMT_NODE + 1; bd <= scif_info.maxid; bd++) {
+		peer_dev = &scif_dev[bd];
+		list_for_each_safe(pos, tmp, &peer_dev->p2p) {
+			p2p = list_entry(pos, struct scif_p2p_info, ppi_list);
+			if (p2p->ppi_peer_id == scifdev->node) {
+				dma_unmap_sg(&peer_dev->sdev->dev,
+					     p2p->ppi_sg[SCIF_PPI_MMIO],
+					     p2p->sg_nentries[SCIF_PPI_MMIO],
+					     DMA_BIDIRECTIONAL);
+				dma_unmap_sg(&peer_dev->sdev->dev,
+					     p2p->ppi_sg[SCIF_PPI_APER],
+					     p2p->sg_nentries[SCIF_PPI_APER],
+					     DMA_BIDIRECTIONAL);
+				scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
+				scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
+				list_del(pos);
+				kfree(p2p);
+			}
+		}
+	}
+	mutex_unlock(&scif_info.conflock);
+}
diff --git a/kernel/drivers/misc/mic/scif/scif_nodeqp.h b/kernel/drivers/misc/mic/scif/scif_nodeqp.h
new file mode 100644
index 000000000..958962731
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_nodeqp.h
@@ -0,0 +1,221 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_NODEQP
+#define SCIF_NODEQP
+
+#include "scif_rb.h"
+#include "scif_peer_bus.h"
+
+#define SCIF_INIT 1  /* First message sent to the peer node for discovery */
+#define SCIF_EXIT 2  /* Last message from the peer informing intent to exit */
+#define SCIF_EXIT_ACK 3 /* Response to SCIF_EXIT message */
+#define SCIF_NODE_ADD 4  /* Tell Online nodes a new node exits */
+#define SCIF_NODE_ADD_ACK 5  /* Confirm to mgmt node sequence is finished */
+#define SCIF_NODE_ADD_NACK 6 /* SCIF_NODE_ADD failed */
+#define SCIF_NODE_REMOVE 7 /* Request to deactivate a SCIF node */
+#define SCIF_NODE_REMOVE_ACK 8 /* Response to a SCIF_NODE_REMOVE message */
+#define SCIF_CNCT_REQ 9  /* Phys addr of Request connection to a port */
+#define SCIF_CNCT_GNT 10  /* Phys addr of new Grant connection request */
+#define SCIF_CNCT_GNTACK 11  /* Error type Reject a connection request */
+#define SCIF_CNCT_GNTNACK 12  /* Error type Reject a connection request */
+#define SCIF_CNCT_REJ 13  /* Error type Reject a connection request */
+#define SCIF_DISCNCT 14 /* Notify peer that connection is being terminated */
+#define SCIF_DISCNT_ACK 15 /* Notify peer that connection is being terminated */
+#define SCIF_CLIENT_SENT 16 /* Notify the peer that data has been written */
+#define SCIF_CLIENT_RCVD 17 /* Notify the peer that data has been read */
+#define SCIF_GET_NODE_INFO 18 /* Get current node mask from the mgmt node*/
+#define SCIF_REGISTER 19 /* Tell peer about a new registered window */
+#define SCIF_REGISTER_ACK 20 /* Notify peer about unregistration success */
+#define SCIF_REGISTER_NACK 21 /* Notify peer about registration success */
+#define SCIF_UNREGISTER 22 /* Tell peer about unregistering a window */
+#define SCIF_UNREGISTER_ACK 23 /* Notify peer about registration failure */
+#define SCIF_UNREGISTER_NACK 24 /* Notify peer about unregistration failure */
+#define SCIF_ALLOC_REQ 25 /* Request a mapped buffer */
+#define SCIF_ALLOC_GNT 26 /* Notify peer about allocation success */
+#define SCIF_ALLOC_REJ 27 /* Notify peer about allocation failure */
+#define SCIF_FREE_VIRT 28 /* Free previously allocated virtual memory */
+#define SCIF_MUNMAP 29 /* Acknowledgment for a SCIF_MMAP request */
+#define SCIF_MARK 30 /* SCIF Remote Fence Mark Request */
+#define SCIF_MARK_ACK 31 /* SCIF Remote Fence Mark Success */
+#define SCIF_MARK_NACK 32 /* SCIF Remote Fence Mark Failure */
+#define SCIF_WAIT 33 /* SCIF Remote Fence Wait Request */
+#define SCIF_WAIT_ACK 34 /* SCIF Remote Fence Wait Success */
+#define SCIF_WAIT_NACK 35 /* SCIF Remote Fence Wait Failure */
+#define SCIF_SIG_LOCAL 36 /* SCIF Remote Fence Local Signal Request */
+#define SCIF_SIG_REMOTE 37 /* SCIF Remote Fence Remote Signal Request */
+#define SCIF_SIG_ACK 38 /* SCIF Remote Fence Remote Signal Success */
+#define SCIF_SIG_NACK 39 /* SCIF Remote Fence Remote Signal Failure */
+#define SCIF_MAX_MSG SCIF_SIG_NACK
+
+/*
+ * struct scifmsg - Node QP message format
+ *
+ * @src: Source information
+ * @dst: Destination information
+ * @uop: The message opcode
+ * @payload: Unique payload format for each message
+ */
+struct scifmsg {
+	struct scif_port_id src;
+	struct scif_port_id dst;
+	u32 uop;
+	u64 payload[4];
+} __packed;
+
+/*
+ * struct scif_allocmsg - Used with SCIF_ALLOC_REQ to request
+ * the remote note to allocate memory
+ *
+ * phys_addr: Physical address of the buffer
+ * vaddr: Virtual address of the buffer
+ * size: Size of the buffer
+ * state: Current state
+ * allocwq: wait queue for status
+ */
+struct scif_allocmsg {
+	dma_addr_t phys_addr;
+	unsigned long vaddr;
+	size_t size;
+	enum scif_msg_state state;
+	wait_queue_head_t allocwq;
+};
+
+/*
+ * struct scif_qp - Node Queue Pair
+ *
+ * Interesting structure -- a little difficult because we can only
+ * write across the PCIe, so any r/w pointer we need to read is
+ * local. We only need to read the read pointer on the inbound_q
+ * and read the write pointer in the outbound_q
+ *
+ * @magic: Magic value to ensure the peer sees the QP correctly
+ * @outbound_q: The outbound ring buffer for sending messages
+ * @inbound_q: The inbound ring buffer for receiving messages
+ * @local_write: Local write index
+ * @local_read: Local read index
+ * @remote_qp: The remote queue pair
+ * @local_buf: DMA address of local ring buffer
+ * @local_qp: DMA address of the local queue pair data structure
+ * @remote_buf: DMA address of remote ring buffer
+ * @qp_state: QP state i.e. online or offline used for P2P
+ * @send_lock: synchronize access to outbound queue
+ * @recv_lock: Synchronize access to inbound queue
+ */
+struct scif_qp {
+	u64 magic;
+#define SCIFEP_MAGIC 0x5c1f000000005c1fULL
+	struct scif_rb outbound_q;
+	struct scif_rb inbound_q;
+
+	u32 local_write __aligned(64);
+	u32 local_read __aligned(64);
+	struct scif_qp *remote_qp;
+	dma_addr_t local_buf;
+	dma_addr_t local_qp;
+	dma_addr_t remote_buf;
+	u32 qp_state;
+#define SCIF_QP_OFFLINE 0xdead
+#define SCIF_QP_ONLINE 0xc0de
+	spinlock_t send_lock;
+	spinlock_t recv_lock;
+};
+
+/*
+ * struct scif_loopb_msg - An element in the loopback Node QP message list.
+ *
+ * @msg - The SCIF node QP message
+ * @list - link in the list of messages
+ */
+struct scif_loopb_msg {
+	struct scifmsg msg;
+	struct list_head list;
+};
+
+int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg);
+int _scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_nodeqp_intrhandler(struct scif_dev *scifdev, struct scif_qp *qp);
+int scif_loopb_msg_handler(struct scif_dev *scifdev, struct scif_qp *qp);
+int scif_setup_qp(struct scif_dev *scifdev);
+int scif_qp_response(phys_addr_t phys, struct scif_dev *dev);
+int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset,
+			  int local_size, struct scif_dev *scifdev);
+int scif_setup_qp_accept(struct scif_qp *qp, dma_addr_t *qp_offset,
+			 dma_addr_t phys, int local_size,
+			 struct scif_dev *scifdev);
+int scif_setup_qp_connect_response(struct scif_dev *scifdev,
+				   struct scif_qp *qp, u64 payload);
+int scif_setup_loopback_qp(struct scif_dev *scifdev);
+int scif_destroy_loopback_qp(struct scif_dev *scifdev);
+void scif_poll_qp_state(struct work_struct *work);
+void scif_destroy_p2p(struct scif_dev *scifdev);
+void scif_send_exit(struct scif_dev *scifdev);
+static inline struct device *scif_get_peer_dev(struct scif_dev *scifdev)
+{
+	struct scif_peer_dev *spdev;
+	struct device *spdev_ret;
+
+	rcu_read_lock();
+	spdev = rcu_dereference(scifdev->spdev);
+	if (spdev)
+		spdev_ret = get_device(&spdev->dev);
+	else
+		spdev_ret = ERR_PTR(-ENODEV);
+	rcu_read_unlock();
+	return spdev_ret;
+}
+
+static inline void scif_put_peer_dev(struct device *dev)
+{
+	put_device(dev);
+}
+#endif  /* SCIF_NODEQP */
diff --git a/kernel/drivers/misc/mic/scif/scif_peer_bus.c b/kernel/drivers/misc/mic/scif/scif_peer_bus.c
new file mode 100644
index 000000000..6ffa3bdbd
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_peer_bus.c
@@ -0,0 +1,183 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ */
+#include "scif_main.h"
+#include "../bus/scif_bus.h"
+#include "scif_peer_bus.h"
+
+static inline struct scif_peer_dev *
+dev_to_scif_peer(struct device *dev)
+{
+	return container_of(dev, struct scif_peer_dev, dev);
+}
+
+struct bus_type scif_peer_bus = {
+	.name  = "scif_peer_bus",
+};
+
+static void scif_peer_release_dev(struct device *d)
+{
+	struct scif_peer_dev *sdev = dev_to_scif_peer(d);
+	struct scif_dev *scifdev = &scif_dev[sdev->dnode];
+
+	scif_cleanup_scifdev(scifdev);
+	kfree(sdev);
+}
+
+static int scif_peer_initialize_device(struct scif_dev *scifdev)
+{
+	struct scif_peer_dev *spdev;
+	int ret;
+
+	spdev = kzalloc(sizeof(*spdev), GFP_KERNEL);
+	if (!spdev) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	spdev->dev.parent = scifdev->sdev->dev.parent;
+	spdev->dev.release = scif_peer_release_dev;
+	spdev->dnode = scifdev->node;
+	spdev->dev.bus = &scif_peer_bus;
+	dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode);
+
+	device_initialize(&spdev->dev);
+	get_device(&spdev->dev);
+	rcu_assign_pointer(scifdev->spdev, spdev);
+
+	mutex_lock(&scif_info.conflock);
+	scif_info.total++;
+	scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid);
+	mutex_unlock(&scif_info.conflock);
+	return 0;
+err:
+	dev_err(&scifdev->sdev->dev,
+		"dnode %d: initialize_device rc %d\n", scifdev->node, ret);
+	return ret;
+}
+
+static int scif_peer_add_device(struct scif_dev *scifdev)
+{
+	struct scif_peer_dev *spdev = rcu_dereference(scifdev->spdev);
+	char pool_name[16];
+	int ret;
+
+	ret = device_add(&spdev->dev);
+	put_device(&spdev->dev);
+	if (ret) {
+		dev_err(&scifdev->sdev->dev,
+			"dnode %d: peer device_add failed\n", scifdev->node);
+		goto put_spdev;
+	}
+
+	scnprintf(pool_name, sizeof(pool_name), "scif-%d", spdev->dnode);
+	scifdev->signal_pool = dmam_pool_create(pool_name, &scifdev->sdev->dev,
+						sizeof(struct scif_status), 1,
+						0);
+	if (!scifdev->signal_pool) {
+		dev_err(&scifdev->sdev->dev,
+			"dnode %d: dmam_pool_create failed\n", scifdev->node);
+		ret = -ENOMEM;
+		goto del_spdev;
+	}
+	dev_dbg(&spdev->dev, "Added peer dnode %d\n", spdev->dnode);
+	return 0;
+del_spdev:
+	device_del(&spdev->dev);
+put_spdev:
+	RCU_INIT_POINTER(scifdev->spdev, NULL);
+	synchronize_rcu();
+	put_device(&spdev->dev);
+
+	mutex_lock(&scif_info.conflock);
+	scif_info.total--;
+	mutex_unlock(&scif_info.conflock);
+	return ret;
+}
+
+void scif_add_peer_device(struct work_struct *work)
+{
+	struct scif_dev *scifdev = container_of(work, struct scif_dev,
+						peer_add_work);
+
+	scif_peer_add_device(scifdev);
+}
+
+/*
+ * Peer device registration is split into a device_initialize and a device_add.
+ * The reason for doing this is as follows: First, peer device registration
+ * itself cannot be done in the message processing thread and must be delegated
+ * to another workqueue, otherwise if SCIF client probe, called during peer
+ * device registration, calls scif_connect(..), it will block the message
+ * processing thread causing a deadlock. Next, device_initialize is done in the
+ * "top-half" message processing thread and device_add in the "bottom-half"
+ * workqueue. If this is not done, SCIF_CNCT_REQ message processing executing
+ * concurrently with SCIF_INIT message processing is unable to get a reference
+ * on the peer device, thereby failing the connect request.
+ */
+void scif_peer_register_device(struct scif_dev *scifdev)
+{
+	int ret;
+
+	mutex_lock(&scifdev->lock);
+	ret = scif_peer_initialize_device(scifdev);
+	if (ret)
+		goto exit;
+	schedule_work(&scifdev->peer_add_work);
+exit:
+	mutex_unlock(&scifdev->lock);
+}
+
+int scif_peer_unregister_device(struct scif_dev *scifdev)
+{
+	struct scif_peer_dev *spdev;
+
+	mutex_lock(&scifdev->lock);
+	/* Flush work to ensure device register is complete */
+	flush_work(&scifdev->peer_add_work);
+
+	/*
+	 * Continue holding scifdev->lock since theoretically unregister_device
+	 * can be called simultaneously from multiple threads
+	 */
+	spdev = rcu_dereference(scifdev->spdev);
+	if (!spdev) {
+		mutex_unlock(&scifdev->lock);
+		return -ENODEV;
+	}
+
+	RCU_INIT_POINTER(scifdev->spdev, NULL);
+	synchronize_rcu();
+	mutex_unlock(&scifdev->lock);
+
+	dev_dbg(&spdev->dev, "Removing peer dnode %d\n", spdev->dnode);
+	device_unregister(&spdev->dev);
+
+	mutex_lock(&scif_info.conflock);
+	scif_info.total--;
+	mutex_unlock(&scif_info.conflock);
+	return 0;
+}
+
+int scif_peer_bus_init(void)
+{
+	return bus_register(&scif_peer_bus);
+}
+
+void scif_peer_bus_exit(void)
+{
+	bus_unregister(&scif_peer_bus);
+}
diff --git a/kernel/drivers/misc/mic/scif/scif_peer_bus.h b/kernel/drivers/misc/mic/scif/scif_peer_bus.h
new file mode 100644
index 000000000..a3b8dd2ed
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_peer_bus.h
@@ -0,0 +1,31 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ */
+#ifndef _SCIF_PEER_BUS_H_
+#define _SCIF_PEER_BUS_H_
+
+#include <linux/device.h>
+#include <linux/mic_common.h>
+#include <linux/scif.h>
+
+struct scif_dev;
+
+void scif_add_peer_device(struct work_struct *work);
+void scif_peer_register_device(struct scif_dev *sdev);
+int scif_peer_unregister_device(struct scif_dev *scifdev);
+int scif_peer_bus_init(void);
+void scif_peer_bus_exit(void);
+#endif /* _SCIF_PEER_BUS_H */
diff --git a/kernel/drivers/misc/mic/scif/scif_ports.c b/kernel/drivers/misc/mic/scif/scif_ports.c
new file mode 100644
index 000000000..594e18d27
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_ports.c
@@ -0,0 +1,124 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/idr.h>
+
+#include "scif_main.h"
+
+#define SCIF_PORT_COUNT	0x10000	/* Ports available */
+
+struct idr scif_ports;
+
+/*
+ * struct scif_port - SCIF port information
+ *
+ * @ref_cnt - Reference count since there can be multiple endpoints
+ *		created via scif_accept(..) simultaneously using a port.
+ */
+struct scif_port {
+	int ref_cnt;
+};
+
+/**
+ * __scif_get_port - Reserve a specified port # for SCIF and add it
+ * to the global list.
+ * @port : port # to be reserved.
+ *
+ * @return : Allocated SCIF port #, or -ENOSPC if port unavailable.
+ *		On memory allocation failure, returns -ENOMEM.
+ */
+static int __scif_get_port(int start, int end)
+{
+	int id;
+	struct scif_port *port = kzalloc(sizeof(*port), GFP_ATOMIC);
+
+	if (!port)
+		return -ENOMEM;
+	spin_lock(&scif_info.port_lock);
+	id = idr_alloc(&scif_ports, port, start, end, GFP_ATOMIC);
+	if (id >= 0)
+		port->ref_cnt++;
+	spin_unlock(&scif_info.port_lock);
+	return id;
+}
+
+/**
+ * scif_rsrv_port - Reserve a specified port # for SCIF.
+ * @port : port # to be reserved.
+ *
+ * @return : Allocated SCIF port #, or -ENOSPC if port unavailable.
+ *		On memory allocation failure, returns -ENOMEM.
+ */
+int scif_rsrv_port(u16 port)
+{
+	return __scif_get_port(port, port + 1);
+}
+
+/**
+ * scif_get_new_port - Get and reserve any port # for SCIF in the range
+ *			SCIF_PORT_RSVD + 1 to SCIF_PORT_COUNT - 1.
+ *
+ * @return : Allocated SCIF port #, or -ENOSPC if no ports available.
+ *		On memory allocation failure, returns -ENOMEM.
+ */
+int scif_get_new_port(void)
+{
+	return __scif_get_port(SCIF_PORT_RSVD + 1, SCIF_PORT_COUNT);
+}
+
+/**
+ * scif_get_port - Increment the reference count for a SCIF port
+ * @id : SCIF port
+ *
+ * @return : None
+ */
+void scif_get_port(u16 id)
+{
+	struct scif_port *port;
+
+	if (!id)
+		return;
+	spin_lock(&scif_info.port_lock);
+	port = idr_find(&scif_ports, id);
+	if (port)
+		port->ref_cnt++;
+	spin_unlock(&scif_info.port_lock);
+}
+
+/**
+ * scif_put_port - Release a reserved SCIF port
+ * @id : SCIF port to be released.
+ *
+ * @return : None
+ */
+void scif_put_port(u16 id)
+{
+	struct scif_port *port;
+
+	if (!id)
+		return;
+	spin_lock(&scif_info.port_lock);
+	port = idr_find(&scif_ports, id);
+	if (port) {
+		port->ref_cnt--;
+		if (!port->ref_cnt) {
+			idr_remove(&scif_ports, id);
+			kfree(port);
+		}
+	}
+	spin_unlock(&scif_info.port_lock);
+}
diff --git a/kernel/drivers/misc/mic/scif/scif_rb.c b/kernel/drivers/misc/mic/scif/scif_rb.c
new file mode 100644
index 000000000..637cc4686
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_rb.c
@@ -0,0 +1,249 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/circ_buf.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/errno.h>
+
+#include "scif_rb.h"
+
+#define scif_rb_ring_cnt(head, tail, size) CIRC_CNT(head, tail, size)
+#define scif_rb_ring_space(head, tail, size) CIRC_SPACE(head, tail, size)
+
+/**
+ * scif_rb_init - Initializes the ring buffer
+ * @rb: ring buffer
+ * @read_ptr: A pointer to the read offset
+ * @write_ptr: A pointer to the write offset
+ * @rb_base: A pointer to the base of the ring buffer
+ * @size: The size of the ring buffer in powers of two
+ */
+void scif_rb_init(struct scif_rb *rb, u32 *read_ptr, u32 *write_ptr,
+		  void *rb_base, u8 size)
+{
+	rb->rb_base = rb_base;
+	rb->size = (1 << size);
+	rb->read_ptr = read_ptr;
+	rb->write_ptr = write_ptr;
+	rb->current_read_offset = *read_ptr;
+	rb->current_write_offset = *write_ptr;
+}
+
+/* Copies a message to the ring buffer -- handles the wrap around case */
+static void memcpy_torb(struct scif_rb *rb, void *header,
+			void *msg, u32 size)
+{
+	u32 size1, size2;
+
+	if (header + size >= rb->rb_base + rb->size) {
+		/* Need to call two copies if it wraps around */
+		size1 = (u32)(rb->rb_base + rb->size - header);
+		size2 = size - size1;
+		memcpy_toio((void __iomem __force *)header, msg, size1);
+		memcpy_toio((void __iomem __force *)rb->rb_base,
+			    msg + size1, size2);
+	} else {
+		memcpy_toio((void __iomem __force *)header, msg, size);
+	}
+}
+
+/* Copies a message from the ring buffer -- handles the wrap around case */
+static void memcpy_fromrb(struct scif_rb *rb, void *header,
+			  void *msg, u32 size)
+{
+	u32 size1, size2;
+
+	if (header + size >= rb->rb_base + rb->size) {
+		/* Need to call two copies if it wraps around */
+		size1 = (u32)(rb->rb_base + rb->size - header);
+		size2 = size - size1;
+		memcpy_fromio(msg, (void __iomem __force *)header, size1);
+		memcpy_fromio(msg + size1,
+			      (void __iomem __force *)rb->rb_base, size2);
+	} else {
+		memcpy_fromio(msg, (void __iomem __force *)header, size);
+	}
+}
+
+/**
+ * scif_rb_space - Query space available for writing to the RB
+ * @rb: ring buffer
+ *
+ * Return: size available for writing to RB in bytes.
+ */
+u32 scif_rb_space(struct scif_rb *rb)
+{
+	rb->current_read_offset = *rb->read_ptr;
+	/*
+	 * Update from the HW read pointer only once the peer has exposed the
+	 * new empty slot. This barrier is paired with the memory barrier
+	 * scif_rb_update_read_ptr()
+	 */
+	mb();
+	return scif_rb_ring_space(rb->current_write_offset,
+				  rb->current_read_offset, rb->size);
+}
+
+/**
+ * scif_rb_write - Write a message to the RB
+ * @rb: ring buffer
+ * @msg: buffer to send the message.  Must be at least size bytes long
+ * @size: the size (in bytes) to be copied to the RB
+ *
+ * This API does not block if there isn't enough space in the RB.
+ * Returns: 0 on success or -ENOMEM on failure
+ */
+int scif_rb_write(struct scif_rb *rb, void *msg, u32 size)
+{
+	void *header;
+
+	if (scif_rb_space(rb) < size)
+		return -ENOMEM;
+	header = rb->rb_base + rb->current_write_offset;
+	memcpy_torb(rb, header, msg, size);
+	/*
+	 * Wait until scif_rb_commit(). Update the local ring
+	 * buffer data, not the shared data until commit.
+	 */
+	rb->current_write_offset =
+		(rb->current_write_offset + size) & (rb->size - 1);
+	return 0;
+}
+
+/**
+ * scif_rb_commit - To submit the message to let the peer fetch it
+ * @rb: ring buffer
+ */
+void scif_rb_commit(struct scif_rb *rb)
+{
+	/*
+	 * We must ensure ordering between the all the data committed
+	 * previously before we expose the new message to the peer by
+	 * updating the write_ptr. This write barrier is paired with
+	 * the read barrier in scif_rb_count(..)
+	 */
+	wmb();
+	ACCESS_ONCE(*rb->write_ptr) = rb->current_write_offset;
+#ifdef CONFIG_INTEL_MIC_CARD
+	/*
+	 * X100 Si bug: For the case where a Core is performing an EXT_WR
+	 * followed by a Doorbell Write, the Core must perform two EXT_WR to the
+	 * same address with the same data before it does the Doorbell Write.
+	 * This way, if ordering is violated for the Interrupt Message, it will
+	 * fall just behind the first Posted associated with the first EXT_WR.
+	 */
+	ACCESS_ONCE(*rb->write_ptr) = rb->current_write_offset;
+#endif
+}
+
+/**
+ * scif_rb_get - To get next message from the ring buffer
+ * @rb: ring buffer
+ * @size: Number of bytes to be read
+ *
+ * Return: NULL if no bytes to be read from the ring buffer, otherwise the
+ *	pointer to the next byte
+ */
+static void *scif_rb_get(struct scif_rb *rb, u32 size)
+{
+	void *header = NULL;
+
+	if (scif_rb_count(rb, size) >= size)
+		header = rb->rb_base + rb->current_read_offset;
+	return header;
+}
+
+/*
+ * scif_rb_get_next - Read from ring buffer.
+ * @rb: ring buffer
+ * @msg: buffer to hold the message.  Must be at least size bytes long
+ * @size: Number of bytes to be read
+ *
+ * Return: number of bytes read if available bytes are >= size, otherwise
+ * returns zero.
+ */
+u32 scif_rb_get_next(struct scif_rb *rb, void *msg, u32 size)
+{
+	void *header = NULL;
+	int read_size = 0;
+
+	header = scif_rb_get(rb, size);
+	if (header) {
+		u32 next_cmd_offset =
+			(rb->current_read_offset + size) & (rb->size - 1);
+
+		read_size = size;
+		rb->current_read_offset = next_cmd_offset;
+		memcpy_fromrb(rb, header, msg, size);
+	}
+	return read_size;
+}
+
+/**
+ * scif_rb_update_read_ptr
+ * @rb: ring buffer
+ */
+void scif_rb_update_read_ptr(struct scif_rb *rb)
+{
+	u32 new_offset;
+
+	new_offset = rb->current_read_offset;
+	/*
+	 * We must ensure ordering between the all the data committed or read
+	 * previously before we expose the empty slot to the peer by updating
+	 * the read_ptr. This barrier is paired with the memory barrier in
+	 * scif_rb_space(..)
+	 */
+	mb();
+	ACCESS_ONCE(*rb->read_ptr) = new_offset;
+#ifdef CONFIG_INTEL_MIC_CARD
+	/*
+	 * X100 Si Bug: For the case where a Core is performing an EXT_WR
+	 * followed by a Doorbell Write, the Core must perform two EXT_WR to the
+	 * same address with the same data before it does the Doorbell Write.
+	 * This way, if ordering is violated for the Interrupt Message, it will
+	 * fall just behind the first Posted associated with the first EXT_WR.
+	 */
+	ACCESS_ONCE(*rb->read_ptr) = new_offset;
+#endif
+}
+
+/**
+ * scif_rb_count
+ * @rb: ring buffer
+ * @size: Number of bytes expected to be read
+ *
+ * Return: number of bytes that can be read from the RB
+ */
+u32 scif_rb_count(struct scif_rb *rb, u32 size)
+{
+	if (scif_rb_ring_cnt(rb->current_write_offset,
+			     rb->current_read_offset,
+			     rb->size) < size) {
+		rb->current_write_offset = *rb->write_ptr;
+		/*
+		 * Update from the HW write pointer if empty only once the peer
+		 * has exposed the new message. This read barrier is paired
+		 * with the write barrier in scif_rb_commit(..)
+		 */
+		smp_rmb();
+	}
+	return scif_rb_ring_cnt(rb->current_write_offset,
+				rb->current_read_offset,
+				rb->size);
+}
diff --git a/kernel/drivers/misc/mic/scif/scif_rb.h b/kernel/drivers/misc/mic/scif/scif_rb.h
new file mode 100644
index 000000000..166dffe30
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_rb.h
@@ -0,0 +1,100 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ */
+#ifndef SCIF_RB_H
+#define SCIF_RB_H
+/*
+ * This file describes a general purpose, byte based ring buffer. Writers to the
+ * ring buffer need to synchronize using a lock. The same is true for readers,
+ * although in practice, the ring buffer has a single reader. It is lockless
+ * between producer and consumer so it can handle being used across the PCIe
+ * bus. The ring buffer ensures that there are no reads across the PCIe bus for
+ * performance reasons. Two of these are used to form a single bidirectional
+ * queue-pair across PCIe.
+ */
+/*
+ * struct scif_rb - SCIF Ring Buffer
+ *
+ * @rb_base: The base of the memory used for storing RB messages
+ * @read_ptr: Pointer to the read offset
+ * @write_ptr: Pointer to the write offset
+ * @size: Size of the memory in rb_base
+ * @current_read_offset: Cached read offset for performance
+ * @current_write_offset: Cached write offset for performance
+ */
+struct scif_rb {
+	void *rb_base;
+	u32 *read_ptr;
+	u32 *write_ptr;
+	u32 size;
+	u32 current_read_offset;
+	u32 current_write_offset;
+};
+
+/* methods used by both */
+void scif_rb_init(struct scif_rb *rb, u32 *read_ptr, u32 *write_ptr,
+		  void *rb_base, u8 size);
+/* writer only methods */
+/* write a new command, then scif_rb_commit() */
+int scif_rb_write(struct scif_rb *rb, void *msg, u32 size);
+/* after write(), then scif_rb_commit() */
+void scif_rb_commit(struct scif_rb *rb);
+/* query space available for writing to a RB. */
+u32 scif_rb_space(struct scif_rb *rb);
+
+/* reader only methods */
+/* read a new message from the ring buffer of size bytes */
+u32 scif_rb_get_next(struct scif_rb *rb, void *msg, u32 size);
+/* update the read pointer so that the space can be reused */
+void scif_rb_update_read_ptr(struct scif_rb *rb);
+/* count the number of bytes that can be read */
+u32 scif_rb_count(struct scif_rb *rb, u32 size);
+#endif
diff --git a/kernel/drivers/misc/mic/scif/scif_rma.c b/kernel/drivers/misc/mic/scif/scif_rma.c
new file mode 100644
index 000000000..8310b4dbf
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_rma.c
@@ -0,0 +1,1775 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include <linux/dma_remapping.h>
+#include <linux/pagemap.h>
+#include "scif_main.h"
+#include "scif_map.h"
+
+/* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */
+#define SCIF_MAP_ULIMIT 0x40
+
+bool scif_ulimit_check = 1;
+
+/**
+ * scif_rma_ep_init:
+ * @ep: end point
+ *
+ * Initialize RMA per EP data structures.
+ */
+void scif_rma_ep_init(struct scif_endpt *ep)
+{
+	struct scif_endpt_rma_info *rma = &ep->rma_info;
+
+	mutex_init(&rma->rma_lock);
+	init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN,
+			 SCIF_DMA_64BIT_PFN);
+	spin_lock_init(&rma->tc_lock);
+	mutex_init(&rma->mmn_lock);
+	INIT_LIST_HEAD(&rma->reg_list);
+	INIT_LIST_HEAD(&rma->remote_reg_list);
+	atomic_set(&rma->tw_refcount, 0);
+	atomic_set(&rma->tcw_refcount, 0);
+	atomic_set(&rma->tcw_total_pages, 0);
+	atomic_set(&rma->fence_refcount, 0);
+
+	rma->async_list_del = 0;
+	rma->dma_chan = NULL;
+	INIT_LIST_HEAD(&rma->mmn_list);
+	INIT_LIST_HEAD(&rma->vma_list);
+	init_waitqueue_head(&rma->markwq);
+}
+
+/**
+ * scif_rma_ep_can_uninit:
+ * @ep: end point
+ *
+ * Returns 1 if an endpoint can be uninitialized and 0 otherwise.
+ */
+int scif_rma_ep_can_uninit(struct scif_endpt *ep)
+{
+	int ret = 0;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Destroy RMA Info only if both lists are empty */
+	if (list_empty(&ep->rma_info.reg_list) &&
+	    list_empty(&ep->rma_info.remote_reg_list) &&
+	    list_empty(&ep->rma_info.mmn_list) &&
+	    !atomic_read(&ep->rma_info.tw_refcount) &&
+	    !atomic_read(&ep->rma_info.tcw_refcount) &&
+	    !atomic_read(&ep->rma_info.fence_refcount))
+		ret = 1;
+	mutex_unlock(&ep->rma_info.rma_lock);
+	return ret;
+}
+
+/**
+ * scif_create_pinned_pages:
+ * @nr_pages: number of pages in window
+ * @prot: read/write protection
+ *
+ * Allocate and prepare a set of pinned pages.
+ */
+static struct scif_pinned_pages *
+scif_create_pinned_pages(int nr_pages, int prot)
+{
+	struct scif_pinned_pages *pin;
+
+	might_sleep();
+	pin = scif_zalloc(sizeof(*pin));
+	if (!pin)
+		goto error;
+
+	pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages));
+	if (!pin->pages)
+		goto error_free_pinned_pages;
+
+	pin->prot = prot;
+	pin->magic = SCIFEP_MAGIC;
+	return pin;
+
+error_free_pinned_pages:
+	scif_free(pin, sizeof(*pin));
+error:
+	return NULL;
+}
+
+/**
+ * scif_destroy_pinned_pages:
+ * @pin: A set of pinned pages.
+ *
+ * Deallocate resources for pinned pages.
+ */
+static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin)
+{
+	int j;
+	int writeable = pin->prot & SCIF_PROT_WRITE;
+	int kernel = SCIF_MAP_KERNEL & pin->map_flags;
+
+	for (j = 0; j < pin->nr_pages; j++) {
+		if (pin->pages[j] && !kernel) {
+			if (writeable)
+				SetPageDirty(pin->pages[j]);
+			put_page(pin->pages[j]);
+		}
+	}
+
+	scif_free(pin->pages,
+		  pin->nr_pages * sizeof(*pin->pages));
+	scif_free(pin, sizeof(*pin));
+	return 0;
+}
+
+/*
+ * scif_create_window:
+ * @ep: end point
+ * @nr_pages: number of pages
+ * @offset: registration offset
+ * @temp: true if a temporary window is being created
+ *
+ * Allocate and prepare a self registration window.
+ */
+struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
+				       s64 offset, bool temp)
+{
+	struct scif_window *window;
+
+	might_sleep();
+	window = scif_zalloc(sizeof(*window));
+	if (!window)
+		goto error;
+
+	window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
+	if (!window->dma_addr)
+		goto error_free_window;
+
+	window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages));
+	if (!window->num_pages)
+		goto error_free_window;
+
+	window->offset = offset;
+	window->ep = (u64)ep;
+	window->magic = SCIFEP_MAGIC;
+	window->reg_state = OP_IDLE;
+	init_waitqueue_head(&window->regwq);
+	window->unreg_state = OP_IDLE;
+	init_waitqueue_head(&window->unregwq);
+	INIT_LIST_HEAD(&window->list);
+	window->type = SCIF_WINDOW_SELF;
+	window->temp = temp;
+	return window;
+
+error_free_window:
+	scif_free(window->dma_addr,
+		  nr_pages * sizeof(*window->dma_addr));
+	scif_free(window, sizeof(*window));
+error:
+	return NULL;
+}
+
+/**
+ * scif_destroy_incomplete_window:
+ * @ep: end point
+ * @window: registration window
+ *
+ * Deallocate resources for self window.
+ */
+static void scif_destroy_incomplete_window(struct scif_endpt *ep,
+					   struct scif_window *window)
+{
+	int err;
+	int nr_pages = window->nr_pages;
+	struct scif_allocmsg *alloc = &window->alloc_handle;
+	struct scifmsg msg;
+
+retry:
+	/* Wait for a SCIF_ALLOC_GNT/REJ message */
+	err = wait_event_timeout(alloc->allocwq,
+				 alloc->state != OP_IN_PROGRESS,
+				 SCIF_NODE_ALIVE_TIMEOUT);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (alloc->state == OP_COMPLETED) {
+		msg.uop = SCIF_FREE_VIRT;
+		msg.src = ep->port;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = window->alloc_handle.vaddr;
+		msg.payload[2] = (u64)window;
+		msg.payload[3] = SCIF_REGISTER;
+		_scif_nodeqp_send(ep->remote_dev, &msg);
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+
+	scif_free_window_offset(ep, window, window->offset);
+	scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
+	scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
+	scif_free(window, sizeof(*window));
+}
+
+/**
+ * scif_unmap_window:
+ * @remote_dev: SCIF remote device
+ * @window: registration window
+ *
+ * Delete any DMA mappings created for a registered self window
+ */
+void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window)
+{
+	int j;
+
+	if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) {
+		if (window->st) {
+			dma_unmap_sg(&remote_dev->sdev->dev,
+				     window->st->sgl, window->st->nents,
+				     DMA_BIDIRECTIONAL);
+			sg_free_table(window->st);
+			kfree(window->st);
+			window->st = NULL;
+		}
+	} else {
+		for (j = 0; j < window->nr_contig_chunks; j++) {
+			if (window->dma_addr[j]) {
+				scif_unmap_single(window->dma_addr[j],
+						  remote_dev,
+						  window->num_pages[j] <<
+						  PAGE_SHIFT);
+				window->dma_addr[j] = 0x0;
+			}
+		}
+	}
+}
+
+static inline struct mm_struct *__scif_acquire_mm(void)
+{
+	if (scif_ulimit_check)
+		return get_task_mm(current);
+	return NULL;
+}
+
+static inline void __scif_release_mm(struct mm_struct *mm)
+{
+	if (mm)
+		mmput(mm);
+}
+
+static inline int
+__scif_dec_pinned_vm_lock(struct mm_struct *mm,
+			  int nr_pages, bool try_lock)
+{
+	if (!mm || !nr_pages || !scif_ulimit_check)
+		return 0;
+	if (try_lock) {
+		if (!down_write_trylock(&mm->mmap_sem)) {
+			dev_err(scif_info.mdev.this_device,
+				"%s %d err\n", __func__, __LINE__);
+			return -1;
+		}
+	} else {
+		down_write(&mm->mmap_sem);
+	}
+	mm->pinned_vm -= nr_pages;
+	up_write(&mm->mmap_sem);
+	return 0;
+}
+
+static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
+					     int nr_pages)
+{
+	unsigned long locked, lock_limit;
+
+	if (!mm || !nr_pages || !scif_ulimit_check)
+		return 0;
+
+	locked = nr_pages;
+	locked += mm->pinned_vm;
+	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+		dev_err(scif_info.mdev.this_device,
+			"locked(%lu) > lock_limit(%lu)\n",
+			locked, lock_limit);
+		return -ENOMEM;
+	}
+	mm->pinned_vm = locked;
+	return 0;
+}
+
+/**
+ * scif_destroy_window:
+ * @ep: end point
+ * @window: registration window
+ *
+ * Deallocate resources for self window.
+ */
+int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window)
+{
+	int j;
+	struct scif_pinned_pages *pinned_pages = window->pinned_pages;
+	int nr_pages = window->nr_pages;
+
+	might_sleep();
+	if (!window->temp && window->mm) {
+		__scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0);
+		__scif_release_mm(window->mm);
+		window->mm = NULL;
+	}
+
+	scif_free_window_offset(ep, window, window->offset);
+	scif_unmap_window(ep->remote_dev, window);
+	/*
+	 * Decrement references for this set of pinned pages from
+	 * this window.
+	 */
+	j = atomic_sub_return(1, &pinned_pages->ref_count);
+	if (j < 0)
+		dev_err(scif_info.mdev.this_device,
+			"%s %d incorrect ref count %d\n",
+			__func__, __LINE__, j);
+	/*
+	 * If the ref count for pinned_pages is zero then someone
+	 * has already called scif_unpin_pages() for it and we should
+	 * destroy the page cache.
+	 */
+	if (!j)
+		scif_destroy_pinned_pages(window->pinned_pages);
+	scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
+	scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
+	window->magic = 0;
+	scif_free(window, sizeof(*window));
+	return 0;
+}
+
+/**
+ * scif_create_remote_lookup:
+ * @remote_dev: SCIF remote device
+ * @window: remote window
+ *
+ * Allocate and prepare lookup entries for the remote
+ * end to copy over the physical addresses.
+ * Returns 0 on success and appropriate errno on failure.
+ */
+static int scif_create_remote_lookup(struct scif_dev *remote_dev,
+				     struct scif_window *window)
+{
+	int i, j, err = 0;
+	int nr_pages = window->nr_pages;
+	bool vmalloc_dma_phys, vmalloc_num_pages;
+
+	might_sleep();
+	/* Map window */
+	err = scif_map_single(&window->mapped_offset,
+			      window, remote_dev, sizeof(*window));
+	if (err)
+		goto error_window;
+
+	/* Compute the number of lookup entries. 21 == 2MB Shift */
+	window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE,
+					((2) * 1024 * 1024)) >> 21;
+
+	window->dma_addr_lookup.lookup =
+		scif_alloc_coherent(&window->dma_addr_lookup.offset,
+				    remote_dev, window->nr_lookup *
+				    sizeof(*window->dma_addr_lookup.lookup),
+				    GFP_KERNEL | __GFP_ZERO);
+	if (!window->dma_addr_lookup.lookup) {
+		err = -ENOMEM;
+		goto error_window;
+	}
+
+	window->num_pages_lookup.lookup =
+		scif_alloc_coherent(&window->num_pages_lookup.offset,
+				    remote_dev, window->nr_lookup *
+				    sizeof(*window->num_pages_lookup.lookup),
+				    GFP_KERNEL | __GFP_ZERO);
+	if (!window->num_pages_lookup.lookup) {
+		err = -ENOMEM;
+		goto error_window;
+	}
+
+	vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]);
+	vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]);
+
+	/* Now map each of the pages containing physical addresses */
+	for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) {
+		err = scif_map_page(&window->dma_addr_lookup.lookup[j],
+				    vmalloc_dma_phys ?
+				    vmalloc_to_page(&window->dma_addr[i]) :
+				    virt_to_page(&window->dma_addr[i]),
+				    remote_dev);
+		if (err)
+			goto error_window;
+		err = scif_map_page(&window->num_pages_lookup.lookup[j],
+				    vmalloc_dma_phys ?
+				    vmalloc_to_page(&window->num_pages[i]) :
+				    virt_to_page(&window->num_pages[i]),
+				    remote_dev);
+		if (err)
+			goto error_window;
+	}
+	return 0;
+error_window:
+	return err;
+}
+
+/**
+ * scif_destroy_remote_lookup:
+ * @remote_dev: SCIF remote device
+ * @window: remote window
+ *
+ * Destroy lookup entries used for the remote
+ * end to copy over the physical addresses.
+ */
+static void scif_destroy_remote_lookup(struct scif_dev *remote_dev,
+				       struct scif_window *window)
+{
+	int i, j;
+
+	if (window->nr_lookup) {
+		struct scif_rma_lookup *lup = &window->dma_addr_lookup;
+		struct scif_rma_lookup *npup = &window->num_pages_lookup;
+
+		for (i = 0, j = 0; i < window->nr_pages;
+			i += SCIF_NR_ADDR_IN_PAGE, j++) {
+			if (lup->lookup && lup->lookup[j])
+				scif_unmap_single(lup->lookup[j],
+						  remote_dev,
+						  PAGE_SIZE);
+			if (npup->lookup && npup->lookup[j])
+				scif_unmap_single(npup->lookup[j],
+						  remote_dev,
+						  PAGE_SIZE);
+		}
+		if (lup->lookup)
+			scif_free_coherent(lup->lookup, lup->offset,
+					   remote_dev, window->nr_lookup *
+					   sizeof(*lup->lookup));
+		if (npup->lookup)
+			scif_free_coherent(npup->lookup, npup->offset,
+					   remote_dev, window->nr_lookup *
+					   sizeof(*npup->lookup));
+		if (window->mapped_offset)
+			scif_unmap_single(window->mapped_offset,
+					  remote_dev, sizeof(*window));
+		window->nr_lookup = 0;
+	}
+}
+
+/**
+ * scif_create_remote_window:
+ * @ep: end point
+ * @nr_pages: number of pages in window
+ *
+ * Allocate and prepare a remote registration window.
+ */
+static struct scif_window *
+scif_create_remote_window(struct scif_dev *scifdev, int nr_pages)
+{
+	struct scif_window *window;
+
+	might_sleep();
+	window = scif_zalloc(sizeof(*window));
+	if (!window)
+		goto error_ret;
+
+	window->magic = SCIFEP_MAGIC;
+	window->nr_pages = nr_pages;
+
+	window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
+	if (!window->dma_addr)
+		goto error_window;
+
+	window->num_pages = scif_zalloc(nr_pages *
+					sizeof(*window->num_pages));
+	if (!window->num_pages)
+		goto error_window;
+
+	if (scif_create_remote_lookup(scifdev, window))
+		goto error_window;
+
+	window->type = SCIF_WINDOW_PEER;
+	window->unreg_state = OP_IDLE;
+	INIT_LIST_HEAD(&window->list);
+	return window;
+error_window:
+	scif_destroy_remote_window(window);
+error_ret:
+	return NULL;
+}
+
+/**
+ * scif_destroy_remote_window:
+ * @ep: end point
+ * @window: remote registration window
+ *
+ * Deallocate resources for remote window.
+ */
+void
+scif_destroy_remote_window(struct scif_window *window)
+{
+	scif_free(window->dma_addr, window->nr_pages *
+		  sizeof(*window->dma_addr));
+	scif_free(window->num_pages, window->nr_pages *
+		  sizeof(*window->num_pages));
+	window->magic = 0;
+	scif_free(window, sizeof(*window));
+}
+
+/**
+ * scif_iommu_map: create DMA mappings if the IOMMU is enabled
+ * @remote_dev: SCIF remote device
+ * @window: remote registration window
+ *
+ * Map the physical pages using dma_map_sg(..) and then detect the number
+ * of contiguous DMA mappings allocated
+ */
+static int scif_iommu_map(struct scif_dev *remote_dev,
+			  struct scif_window *window)
+{
+	struct scatterlist *sg;
+	int i, err;
+	scif_pinned_pages_t pin = window->pinned_pages;
+
+	window->st = kzalloc(sizeof(*window->st), GFP_KERNEL);
+	if (!window->st)
+		return -ENOMEM;
+
+	err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL);
+	if (err)
+		return err;
+
+	for_each_sg(window->st->sgl, sg, window->st->nents, i)
+		sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0);
+
+	err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl,
+			 window->st->nents, DMA_BIDIRECTIONAL);
+	if (!err)
+		return -ENOMEM;
+	/* Detect contiguous ranges of DMA mappings */
+	sg = window->st->sgl;
+	for (i = 0; sg; i++) {
+		dma_addr_t last_da;
+
+		window->dma_addr[i] = sg_dma_address(sg);
+		window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT;
+		last_da = sg_dma_address(sg) + sg_dma_len(sg);
+		while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) {
+			window->num_pages[i] +=
+				(sg_dma_len(sg) >> PAGE_SHIFT);
+			last_da = window->dma_addr[i] +
+				sg_dma_len(sg);
+		}
+		window->nr_contig_chunks++;
+	}
+	return 0;
+}
+
+/**
+ * scif_map_window:
+ * @remote_dev: SCIF remote device
+ * @window: self registration window
+ *
+ * Map pages of a window into the aperture/PCI.
+ * Also determine addresses required for DMA.
+ */
+int
+scif_map_window(struct scif_dev *remote_dev, struct scif_window *window)
+{
+	int i, j, k, err = 0, nr_contig_pages;
+	scif_pinned_pages_t pin;
+	phys_addr_t phys_prev, phys_curr;
+
+	might_sleep();
+
+	pin = window->pinned_pages;
+
+	if (intel_iommu_enabled && !scifdev_self(remote_dev))
+		return scif_iommu_map(remote_dev, window);
+
+	for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) {
+		phys_prev = page_to_phys(pin->pages[i]);
+		nr_contig_pages = 1;
+
+		/* Detect physically contiguous chunks */
+		for (k = i + 1; k < window->nr_pages; k++) {
+			phys_curr = page_to_phys(pin->pages[k]);
+			if (phys_curr != (phys_prev + PAGE_SIZE))
+				break;
+			phys_prev = phys_curr;
+			nr_contig_pages++;
+		}
+		window->num_pages[j] = nr_contig_pages;
+		window->nr_contig_chunks++;
+		if (scif_is_mgmt_node()) {
+			/*
+			 * Management node has to deal with SMPT on X100 and
+			 * hence the DMA mapping is required
+			 */
+			err = scif_map_single(&window->dma_addr[j],
+					      phys_to_virt(page_to_phys(
+							   pin->pages[i])),
+					      remote_dev,
+					      nr_contig_pages << PAGE_SHIFT);
+			if (err)
+				return err;
+		} else {
+			window->dma_addr[j] = page_to_phys(pin->pages[i]);
+		}
+	}
+	return err;
+}
+
+/**
+ * scif_send_scif_unregister:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a SCIF_UNREGISTER message.
+ */
+static int scif_send_scif_unregister(struct scif_endpt *ep,
+				     struct scif_window *window)
+{
+	struct scifmsg msg;
+
+	msg.uop = SCIF_UNREGISTER;
+	msg.src = ep->port;
+	msg.payload[0] = window->alloc_handle.vaddr;
+	msg.payload[1] = (u64)window;
+	return scif_nodeqp_send(ep->remote_dev, &msg);
+}
+
+/**
+ * scif_unregister_window:
+ * @window: self registration window
+ *
+ * Send an unregistration request and wait for a response.
+ */
+int scif_unregister_window(struct scif_window *window)
+{
+	int err = 0;
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+	bool send_msg = false;
+
+	might_sleep();
+	switch (window->unreg_state) {
+	case OP_IDLE:
+	{
+		window->unreg_state = OP_IN_PROGRESS;
+		send_msg = true;
+		/* fall through */
+	}
+	case OP_IN_PROGRESS:
+	{
+		scif_get_window(window, 1);
+		mutex_unlock(&ep->rma_info.rma_lock);
+		if (send_msg) {
+			err = scif_send_scif_unregister(ep, window);
+			if (err) {
+				window->unreg_state = OP_COMPLETED;
+				goto done;
+			}
+		} else {
+			/* Return ENXIO since unregistration is in progress */
+			mutex_lock(&ep->rma_info.rma_lock);
+			return -ENXIO;
+		}
+retry:
+		/* Wait for a SCIF_UNREGISTER_(N)ACK message */
+		err = wait_event_timeout(window->unregwq,
+					 window->unreg_state != OP_IN_PROGRESS,
+					 SCIF_NODE_ALIVE_TIMEOUT);
+		if (!err && scifdev_alive(ep))
+			goto retry;
+		if (!err) {
+			err = -ENODEV;
+			window->unreg_state = OP_COMPLETED;
+			dev_err(scif_info.mdev.this_device,
+				"%s %d err %d\n", __func__, __LINE__, err);
+		}
+		if (err > 0)
+			err = 0;
+done:
+		mutex_lock(&ep->rma_info.rma_lock);
+		scif_put_window(window, 1);
+		break;
+	}
+	case OP_FAILED:
+	{
+		if (!scifdev_alive(ep)) {
+			err = -ENODEV;
+			window->unreg_state = OP_COMPLETED;
+		}
+		break;
+	}
+	case OP_COMPLETED:
+		break;
+	default:
+		err = -ENODEV;
+	}
+
+	if (window->unreg_state == OP_COMPLETED && window->ref_count)
+		scif_put_window(window, window->nr_pages);
+
+	if (!window->ref_count) {
+		atomic_inc(&ep->rma_info.tw_refcount);
+		list_del_init(&window->list);
+		scif_free_window_offset(ep, window, window->offset);
+		mutex_unlock(&ep->rma_info.rma_lock);
+		if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) &&
+		    scifdev_alive(ep)) {
+			scif_drain_dma_intr(ep->remote_dev->sdev,
+					    ep->rma_info.dma_chan);
+		} else {
+			if (!__scif_dec_pinned_vm_lock(window->mm,
+						       window->nr_pages, 1)) {
+				__scif_release_mm(window->mm);
+				window->mm = NULL;
+			}
+		}
+		scif_queue_for_cleanup(window, &scif_info.rma);
+		mutex_lock(&ep->rma_info.rma_lock);
+	}
+	return err;
+}
+
+/**
+ * scif_send_alloc_request:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a remote window allocation request
+ */
+static int scif_send_alloc_request(struct scif_endpt *ep,
+				   struct scif_window *window)
+{
+	struct scifmsg msg;
+	struct scif_allocmsg *alloc = &window->alloc_handle;
+
+	/* Set up the Alloc Handle */
+	alloc->state = OP_IN_PROGRESS;
+	init_waitqueue_head(&alloc->allocwq);
+
+	/* Send out an allocation request */
+	msg.uop = SCIF_ALLOC_REQ;
+	msg.payload[1] = window->nr_pages;
+	msg.payload[2] = (u64)&window->alloc_handle;
+	return _scif_nodeqp_send(ep->remote_dev, &msg);
+}
+
+/**
+ * scif_prep_remote_window:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a remote window allocation request, wait for an allocation response,
+ * and prepares the remote window by copying over the page lists
+ */
+static int scif_prep_remote_window(struct scif_endpt *ep,
+				   struct scif_window *window)
+{
+	struct scifmsg msg;
+	struct scif_window *remote_window;
+	struct scif_allocmsg *alloc = &window->alloc_handle;
+	dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1;
+	int i = 0, j = 0;
+	int nr_contig_chunks, loop_nr_contig_chunks;
+	int remaining_nr_contig_chunks, nr_lookup;
+	int err, map_err;
+
+	map_err = scif_map_window(ep->remote_dev, window);
+	if (map_err)
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d map_err %d\n", __func__, __LINE__, map_err);
+	remaining_nr_contig_chunks = window->nr_contig_chunks;
+	nr_contig_chunks = window->nr_contig_chunks;
+retry:
+	/* Wait for a SCIF_ALLOC_GNT/REJ message */
+	err = wait_event_timeout(alloc->allocwq,
+				 alloc->state != OP_IN_PROGRESS,
+				 SCIF_NODE_ALIVE_TIMEOUT);
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Synchronize with the thread waking up allocwq */
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+
+	if (!err)
+		err = -ENODEV;
+
+	if (err > 0)
+		err = 0;
+	else
+		return err;
+
+	/* Bail out. The remote end rejected this request */
+	if (alloc->state == OP_FAILED)
+		return -ENOMEM;
+
+	if (map_err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, map_err);
+		msg.uop = SCIF_FREE_VIRT;
+		msg.src = ep->port;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = window->alloc_handle.vaddr;
+		msg.payload[2] = (u64)window;
+		msg.payload[3] = SCIF_REGISTER;
+		spin_lock(&ep->lock);
+		if (ep->state == SCIFEP_CONNECTED)
+			err = _scif_nodeqp_send(ep->remote_dev, &msg);
+		else
+			err = -ENOTCONN;
+		spin_unlock(&ep->lock);
+		return err;
+	}
+
+	remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window),
+				     ep->remote_dev);
+
+	/* Compute the number of lookup entries. 21 == 2MB Shift */
+	nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE)
+			  >> ilog2(SCIF_NR_ADDR_IN_PAGE);
+
+	dma_phys_lookup =
+		scif_ioremap(remote_window->dma_addr_lookup.offset,
+			     nr_lookup *
+			     sizeof(*remote_window->dma_addr_lookup.lookup),
+			     ep->remote_dev);
+	num_pages_lookup =
+		scif_ioremap(remote_window->num_pages_lookup.offset,
+			     nr_lookup *
+			     sizeof(*remote_window->num_pages_lookup.lookup),
+			     ep->remote_dev);
+
+	while (remaining_nr_contig_chunks) {
+		loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks,
+					      (int)SCIF_NR_ADDR_IN_PAGE);
+		/* #1/2 - Copy  physical addresses over to the remote side */
+
+		/* #2/2 - Copy DMA addresses (addresses that are fed into the
+		 * DMA engine) We transfer bus addresses which are then
+		 * converted into a MIC physical address on the remote
+		 * side if it is a MIC, if the remote node is a mgmt node we
+		 * transfer the MIC physical address
+		 */
+		tmp = scif_ioremap(dma_phys_lookup[j],
+				   loop_nr_contig_chunks *
+				   sizeof(*window->dma_addr),
+				   ep->remote_dev);
+		tmp1 = scif_ioremap(num_pages_lookup[j],
+				    loop_nr_contig_chunks *
+				    sizeof(*window->num_pages),
+				    ep->remote_dev);
+		if (scif_is_mgmt_node()) {
+			memcpy_toio((void __force __iomem *)tmp,
+				    &window->dma_addr[i], loop_nr_contig_chunks
+				    * sizeof(*window->dma_addr));
+			memcpy_toio((void __force __iomem *)tmp1,
+				    &window->num_pages[i], loop_nr_contig_chunks
+				    * sizeof(*window->num_pages));
+		} else {
+			if (scifdev_is_p2p(ep->remote_dev)) {
+				/*
+				 * add remote node's base address for this node
+				 * to convert it into a MIC address
+				 */
+				int m;
+				dma_addr_t dma_addr;
+
+				for (m = 0; m < loop_nr_contig_chunks; m++) {
+					dma_addr = window->dma_addr[i + m] +
+						ep->remote_dev->base_addr;
+					writeq(dma_addr,
+					       (void __force __iomem *)&tmp[m]);
+				}
+				memcpy_toio((void __force __iomem *)tmp1,
+					    &window->num_pages[i],
+					    loop_nr_contig_chunks
+					    * sizeof(*window->num_pages));
+			} else {
+				/* Mgmt node or loopback - transfer DMA
+				 * addresses as is, this is the same as a
+				 * MIC physical address (we use the dma_addr
+				 * and not the phys_addr array since the
+				 * phys_addr is only setup if there is a mmap()
+				 * request from the mgmt node)
+				 */
+				memcpy_toio((void __force __iomem *)tmp,
+					    &window->dma_addr[i],
+					    loop_nr_contig_chunks *
+					    sizeof(*window->dma_addr));
+				memcpy_toio((void __force __iomem *)tmp1,
+					    &window->num_pages[i],
+					    loop_nr_contig_chunks *
+					    sizeof(*window->num_pages));
+			}
+		}
+		remaining_nr_contig_chunks -= loop_nr_contig_chunks;
+		i += loop_nr_contig_chunks;
+		j++;
+		scif_iounmap(tmp, loop_nr_contig_chunks *
+			     sizeof(*window->dma_addr), ep->remote_dev);
+		scif_iounmap(tmp1, loop_nr_contig_chunks *
+			     sizeof(*window->num_pages), ep->remote_dev);
+	}
+
+	/* Prepare the remote window for the peer */
+	remote_window->peer_window = (u64)window;
+	remote_window->offset = window->offset;
+	remote_window->prot = window->prot;
+	remote_window->nr_contig_chunks = nr_contig_chunks;
+	remote_window->ep = ep->remote_ep;
+	scif_iounmap(num_pages_lookup,
+		     nr_lookup *
+		     sizeof(*remote_window->num_pages_lookup.lookup),
+		     ep->remote_dev);
+	scif_iounmap(dma_phys_lookup,
+		     nr_lookup *
+		     sizeof(*remote_window->dma_addr_lookup.lookup),
+		     ep->remote_dev);
+	scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev);
+	window->peer_window = alloc->vaddr;
+	return err;
+}
+
+/**
+ * scif_send_scif_register:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a SCIF_REGISTER message if EP is connected and wait for a
+ * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT
+ * message so that the peer can free its remote window allocated earlier.
+ */
+static int scif_send_scif_register(struct scif_endpt *ep,
+				   struct scif_window *window)
+{
+	int err = 0;
+	struct scifmsg msg;
+
+	msg.src = ep->port;
+	msg.payload[0] = ep->remote_ep;
+	msg.payload[1] = window->alloc_handle.vaddr;
+	msg.payload[2] = (u64)window;
+	spin_lock(&ep->lock);
+	if (ep->state == SCIFEP_CONNECTED) {
+		msg.uop = SCIF_REGISTER;
+		window->reg_state = OP_IN_PROGRESS;
+		err = _scif_nodeqp_send(ep->remote_dev, &msg);
+		spin_unlock(&ep->lock);
+		if (!err) {
+retry:
+			/* Wait for a SCIF_REGISTER_(N)ACK message */
+			err = wait_event_timeout(window->regwq,
+						 window->reg_state !=
+						 OP_IN_PROGRESS,
+						 SCIF_NODE_ALIVE_TIMEOUT);
+			if (!err && scifdev_alive(ep))
+				goto retry;
+			err = !err ? -ENODEV : 0;
+			if (window->reg_state == OP_FAILED)
+				err = -ENOTCONN;
+		}
+	} else {
+		msg.uop = SCIF_FREE_VIRT;
+		msg.payload[3] = SCIF_REGISTER;
+		err = _scif_nodeqp_send(ep->remote_dev, &msg);
+		spin_unlock(&ep->lock);
+		if (!err)
+			err = -ENOTCONN;
+	}
+	return err;
+}
+
+/**
+ * scif_get_window_offset:
+ * @ep: end point descriptor
+ * @flags: flags
+ * @offset: offset hint
+ * @num_pages: number of pages
+ * @out_offset: computed offset returned by reference.
+ *
+ * Compute/Claim a new offset for this EP.
+ */
+int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset,
+			   int num_pages, s64 *out_offset)
+{
+	s64 page_index;
+	struct iova *iova_ptr;
+	int err = 0;
+
+	if (flags & SCIF_MAP_FIXED) {
+		page_index = SCIF_IOVA_PFN(offset);
+		iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index,
+					page_index + num_pages - 1);
+		if (!iova_ptr)
+			err = -EADDRINUSE;
+	} else {
+		iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages,
+				      SCIF_DMA_63BIT_PFN - 1, 0);
+		if (!iova_ptr)
+			err = -ENOMEM;
+	}
+	if (!err)
+		*out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT;
+	return err;
+}
+
+/**
+ * scif_free_window_offset:
+ * @ep: end point descriptor
+ * @window: registration window
+ * @offset: Offset to be freed
+ *
+ * Free offset for this EP. The callee is supposed to grab
+ * the RMA mutex before calling this API.
+ */
+void scif_free_window_offset(struct scif_endpt *ep,
+			     struct scif_window *window, s64 offset)
+{
+	if ((window && !window->offset_freed) || !window) {
+		free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT);
+		if (window)
+			window->offset_freed = true;
+	}
+}
+
+/**
+ * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side is requesting a memory allocation.
+ */
+void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	int err;
+	struct scif_window *window = NULL;
+	int nr_pages = msg->payload[1];
+
+	window = scif_create_remote_window(scifdev, nr_pages);
+	if (!window) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	/* The peer's allocation request is granted */
+	msg->uop = SCIF_ALLOC_GNT;
+	msg->payload[0] = (u64)window;
+	msg->payload[1] = window->mapped_offset;
+	err = scif_nodeqp_send(scifdev, msg);
+	if (err)
+		scif_destroy_remote_window(window);
+	return;
+error:
+	/* The peer's allocation request is rejected */
+	dev_err(&scifdev->sdev->dev,
+		"%s %d error %d alloc_ptr %p nr_pages 0x%x\n",
+		__func__, __LINE__, err, window, nr_pages);
+	msg->uop = SCIF_ALLOC_REJ;
+	scif_nodeqp_send(scifdev, msg);
+}
+
+/**
+ * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side responded to a memory allocation.
+ */
+void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2];
+	struct scif_window *window = container_of(handle, struct scif_window,
+						  alloc_handle);
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	handle->vaddr = msg->payload[0];
+	handle->phys_addr = msg->payload[1];
+	if (msg->uop == SCIF_ALLOC_GNT)
+		handle->state = OP_COMPLETED;
+	else
+		handle->state = OP_FAILED;
+	wake_up(&handle->allocwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message
+ * @msg:        Interrupt message
+ *
+ * Free up memory kmalloc'd earlier.
+ */
+void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window = (struct scif_window *)msg->payload[1];
+
+	scif_destroy_remote_window(window);
+}
+
+static void
+scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window)
+{
+	int j;
+	struct scif_hw_dev *sdev = dev->sdev;
+	phys_addr_t apt_base = 0;
+
+	/*
+	 * Add the aperture base if the DMA address is not card relative
+	 * since the DMA addresses need to be an offset into the bar
+	 */
+	if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
+	    sdev->aper && !sdev->card_rel_da)
+		apt_base = sdev->aper->pa;
+	else
+		return;
+
+	for (j = 0; j < window->nr_contig_chunks; j++) {
+		if (window->num_pages[j])
+			window->dma_addr[j] += apt_base;
+		else
+			break;
+	}
+}
+
+/**
+ * scif_recv_reg: Respond to SCIF_REGISTER interrupt message
+ * @msg:        Interrupt message
+ *
+ * Update remote window list with a new registered window.
+ */
+void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[1];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	spin_lock(&ep->lock);
+	if (ep->state == SCIFEP_CONNECTED) {
+		msg->uop = SCIF_REGISTER_ACK;
+		scif_nodeqp_send(ep->remote_dev, msg);
+		scif_fixup_aper_base(ep->remote_dev, window);
+		/* No further failures expected. Insert new window */
+		scif_insert_window(window, &ep->rma_info.remote_reg_list);
+	} else {
+		msg->uop = SCIF_REGISTER_NACK;
+		scif_nodeqp_send(ep->remote_dev, msg);
+	}
+	spin_unlock(&ep->lock);
+	mutex_unlock(&ep->rma_info.rma_lock);
+	/* free up any lookup resources now that page lists are transferred */
+	scif_destroy_remote_lookup(ep->remote_dev, window);
+	/*
+	 * We could not insert the window but we need to
+	 * destroy the window.
+	 */
+	if (msg->uop == SCIF_REGISTER_NACK)
+		scif_destroy_remote_window(window);
+}
+
+/**
+ * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remove window from remote registration list;
+ */
+void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_rma_req req;
+	struct scif_window *window = NULL;
+	struct scif_window *recv_window =
+		(struct scif_window *)msg->payload[0];
+	struct scif_endpt *ep;
+	int del_window = 0;
+
+	ep = (struct scif_endpt *)recv_window->ep;
+	req.out_window = &window;
+	req.offset = recv_window->offset;
+	req.prot = 0;
+	req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
+	req.type = SCIF_WINDOW_FULL;
+	req.head = &ep->rma_info.remote_reg_list;
+	msg->payload[0] = ep->remote_ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	if (scif_query_window(&req)) {
+		dev_err(&scifdev->sdev->dev,
+			"%s %d -ENXIO\n", __func__, __LINE__);
+		msg->uop = SCIF_UNREGISTER_ACK;
+		goto error;
+	}
+	if (window) {
+		if (window->ref_count)
+			scif_put_window(window, window->nr_pages);
+		else
+			dev_err(&scifdev->sdev->dev,
+				"%s %d ref count should be +ve\n",
+				__func__, __LINE__);
+		window->unreg_state = OP_COMPLETED;
+		if (!window->ref_count) {
+			msg->uop = SCIF_UNREGISTER_ACK;
+			atomic_inc(&ep->rma_info.tw_refcount);
+			ep->rma_info.async_list_del = 1;
+			list_del_init(&window->list);
+			del_window = 1;
+		} else {
+			/* NACK! There are valid references to this window */
+			msg->uop = SCIF_UNREGISTER_NACK;
+		}
+	} else {
+		/* The window did not make its way to the list at all. ACK */
+		msg->uop = SCIF_UNREGISTER_ACK;
+		scif_destroy_remote_window(recv_window);
+	}
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (del_window)
+		scif_drain_dma_intr(ep->remote_dev->sdev,
+				    ep->rma_info.dma_chan);
+	scif_nodeqp_send(ep->remote_dev, msg);
+	if (del_window)
+		scif_queue_for_cleanup(window, &scif_info.rma);
+}
+
+/**
+ * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to complete registration.
+ */
+void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[2];
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	window->reg_state = OP_COMPLETED;
+	wake_up(&window->regwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to inform it that registration
+ * cannot be completed.
+ */
+void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[2];
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	window->reg_state = OP_FAILED;
+	wake_up(&window->regwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to complete unregistration.
+ */
+void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[1];
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	window->unreg_state = OP_COMPLETED;
+	wake_up(&window->unregwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to inform it that unregistration
+ * cannot be completed immediately.
+ */
+void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
+{
+	struct scif_window *window =
+		(struct scif_window *)msg->payload[1];
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	window->unreg_state = OP_FAILED;
+	wake_up(&window->unregwq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+int __scif_pin_pages(void *addr, size_t len, int *out_prot,
+		     int map_flags, scif_pinned_pages_t *pages)
+{
+	struct scif_pinned_pages *pinned_pages;
+	int nr_pages, err = 0, i;
+	bool vmalloc_addr = false;
+	bool try_upgrade = false;
+	int prot = *out_prot;
+	int ulimit = 0;
+	struct mm_struct *mm = NULL;
+
+	/* Unsupported flags */
+	if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT))
+		return -EINVAL;
+	ulimit = !!(map_flags & SCIF_MAP_ULIMIT);
+
+	/* Unsupported protection requested */
+	if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
+		return -EINVAL;
+
+	/* addr/len must be page aligned. len should be non zero */
+	if (!len ||
+	    (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
+	    (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
+		return -EINVAL;
+
+	might_sleep();
+
+	nr_pages = len >> PAGE_SHIFT;
+
+	/* Allocate a set of pinned pages */
+	pinned_pages = scif_create_pinned_pages(nr_pages, prot);
+	if (!pinned_pages)
+		return -ENOMEM;
+
+	if (map_flags & SCIF_MAP_KERNEL) {
+		if (is_vmalloc_addr(addr))
+			vmalloc_addr = true;
+
+		for (i = 0; i < nr_pages; i++) {
+			if (vmalloc_addr)
+				pinned_pages->pages[i] =
+					vmalloc_to_page(addr + (i * PAGE_SIZE));
+			else
+				pinned_pages->pages[i] =
+					virt_to_page(addr + (i * PAGE_SIZE));
+		}
+		pinned_pages->nr_pages = nr_pages;
+		pinned_pages->map_flags = SCIF_MAP_KERNEL;
+	} else {
+		/*
+		 * SCIF supports registration caching. If a registration has
+		 * been requested with read only permissions, then we try
+		 * to pin the pages with RW permissions so that a subsequent
+		 * transfer with RW permission can hit the cache instead of
+		 * invalidating it. If the upgrade fails with RW then we
+		 * revert back to R permission and retry
+		 */
+		if (prot == SCIF_PROT_READ)
+			try_upgrade = true;
+		prot |= SCIF_PROT_WRITE;
+retry:
+		mm = current->mm;
+		down_write(&mm->mmap_sem);
+		if (ulimit) {
+			err = __scif_check_inc_pinned_vm(mm, nr_pages);
+			if (err) {
+				up_write(&mm->mmap_sem);
+				pinned_pages->nr_pages = 0;
+				goto error_unmap;
+			}
+		}
+
+		pinned_pages->nr_pages = get_user_pages(
+				current,
+				mm,
+				(u64)addr,
+				nr_pages,
+				!!(prot & SCIF_PROT_WRITE),
+				0,
+				pinned_pages->pages,
+				NULL);
+		up_write(&mm->mmap_sem);
+		if (nr_pages != pinned_pages->nr_pages) {
+			if (try_upgrade) {
+				if (ulimit)
+					__scif_dec_pinned_vm_lock(mm,
+								  nr_pages, 0);
+				/* Roll back any pinned pages */
+				for (i = 0; i < pinned_pages->nr_pages; i++) {
+					if (pinned_pages->pages[i])
+						put_page(
+						pinned_pages->pages[i]);
+				}
+				prot &= ~SCIF_PROT_WRITE;
+				try_upgrade = false;
+				goto retry;
+			}
+		}
+		pinned_pages->map_flags = 0;
+	}
+
+	if (pinned_pages->nr_pages < nr_pages) {
+		err = -EFAULT;
+		pinned_pages->nr_pages = nr_pages;
+		goto dec_pinned;
+	}
+
+	*out_prot = prot;
+	atomic_set(&pinned_pages->ref_count, 1);
+	*pages = pinned_pages;
+	return err;
+dec_pinned:
+	if (ulimit)
+		__scif_dec_pinned_vm_lock(mm, nr_pages, 0);
+	/* Something went wrong! Rollback */
+error_unmap:
+	pinned_pages->nr_pages = nr_pages;
+	scif_destroy_pinned_pages(pinned_pages);
+	*pages = NULL;
+	dev_dbg(scif_info.mdev.this_device,
+		"%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
+	return err;
+}
+
+int scif_pin_pages(void *addr, size_t len, int prot,
+		   int map_flags, scif_pinned_pages_t *pages)
+{
+	return __scif_pin_pages(addr, len, &prot, map_flags, pages);
+}
+EXPORT_SYMBOL_GPL(scif_pin_pages);
+
+int scif_unpin_pages(scif_pinned_pages_t pinned_pages)
+{
+	int err = 0, ret;
+
+	if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic)
+		return -EINVAL;
+
+	ret = atomic_sub_return(1, &pinned_pages->ref_count);
+	if (ret < 0) {
+		dev_err(scif_info.mdev.this_device,
+			"%s %d scif_unpin_pages called without pinning? rc %d\n",
+			__func__, __LINE__, ret);
+		return -EINVAL;
+	}
+	/*
+	 * Destroy the window if the ref count for this set of pinned
+	 * pages has dropped to zero. If it is positive then there is
+	 * a valid registered window which is backed by these pages and
+	 * it will be destroyed once all such windows are unregistered.
+	 */
+	if (!ret)
+		err = scif_destroy_pinned_pages(pinned_pages);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_unpin_pages);
+
+static inline void
+scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep)
+{
+	mutex_lock(&ep->rma_info.rma_lock);
+	scif_insert_window(window, &ep->rma_info.reg_list);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+off_t scif_register_pinned_pages(scif_epd_t epd,
+				 scif_pinned_pages_t pinned_pages,
+				 off_t offset, int map_flags)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	s64 computed_offset;
+	struct scif_window *window;
+	int err;
+	size_t len;
+	struct device *spdev;
+
+	/* Unsupported flags */
+	if (map_flags & ~SCIF_MAP_FIXED)
+		return -EINVAL;
+
+	len = pinned_pages->nr_pages << PAGE_SHIFT;
+
+	/*
+	 * Offset is not page aligned/negative or offset+len
+	 * wraps around with SCIF_MAP_FIXED.
+	 */
+	if ((map_flags & SCIF_MAP_FIXED) &&
+	    ((ALIGN(offset, PAGE_SIZE) != offset) ||
+	    (offset < 0) ||
+	    (offset + (off_t)len < offset)))
+		return -EINVAL;
+
+	might_sleep();
+
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+	/*
+	 * It is an error to pass pinned_pages to scif_register_pinned_pages()
+	 * after calling scif_unpin_pages().
+	 */
+	if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0))
+		return -EINVAL;
+
+	/* Compute the offset for this registration */
+	err = scif_get_window_offset(ep, map_flags, offset,
+				     len, &computed_offset);
+	if (err) {
+		atomic_sub(1, &pinned_pages->ref_count);
+		return err;
+	}
+
+	/* Allocate and prepare self registration window */
+	window = scif_create_window(ep, pinned_pages->nr_pages,
+				    computed_offset, false);
+	if (!window) {
+		atomic_sub(1, &pinned_pages->ref_count);
+		scif_free_window_offset(ep, NULL, computed_offset);
+		return -ENOMEM;
+	}
+
+	window->pinned_pages = pinned_pages;
+	window->nr_pages = pinned_pages->nr_pages;
+	window->prot = pinned_pages->prot;
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		scif_destroy_window(ep, window);
+		return err;
+	}
+	err = scif_send_alloc_request(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	/* Prepare the remote registration window */
+	err = scif_prep_remote_window(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	/* Tell the peer about the new window */
+	err = scif_send_scif_register(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	scif_put_peer_dev(spdev);
+	/* No further failures expected. Insert new window */
+	scif_insert_local_window(window, ep);
+	return computed_offset;
+error_unmap:
+	scif_destroy_window(ep, window);
+	scif_put_peer_dev(spdev);
+	dev_err(&ep->remote_dev->sdev->dev,
+		"%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_register_pinned_pages);
+
+off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
+		    int prot, int map_flags)
+{
+	scif_pinned_pages_t pinned_pages;
+	off_t err;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	s64 computed_offset;
+	struct scif_window *window;
+	struct mm_struct *mm = NULL;
+	struct device *spdev;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n",
+		epd, addr, len, offset, prot, map_flags);
+	/* Unsupported flags */
+	if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL))
+		return -EINVAL;
+
+	/*
+	 * Offset is not page aligned/negative or offset+len
+	 * wraps around with SCIF_MAP_FIXED.
+	 */
+	if ((map_flags & SCIF_MAP_FIXED) &&
+	    ((ALIGN(offset, PAGE_SIZE) != offset) ||
+	    (offset < 0) ||
+	    (offset + (off_t)len < offset)))
+		return -EINVAL;
+
+	/* Unsupported protection requested */
+	if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
+		return -EINVAL;
+
+	/* addr/len must be page aligned. len should be non zero */
+	if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
+	    (ALIGN(len, PAGE_SIZE) != len))
+		return -EINVAL;
+
+	might_sleep();
+
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	/* Compute the offset for this registration */
+	err = scif_get_window_offset(ep, map_flags, offset,
+				     len >> PAGE_SHIFT, &computed_offset);
+	if (err)
+		return err;
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		scif_free_window_offset(ep, NULL, computed_offset);
+		return err;
+	}
+	/* Allocate and prepare self registration window */
+	window = scif_create_window(ep, len >> PAGE_SHIFT,
+				    computed_offset, false);
+	if (!window) {
+		scif_free_window_offset(ep, NULL, computed_offset);
+		scif_put_peer_dev(spdev);
+		return -ENOMEM;
+	}
+
+	window->nr_pages = len >> PAGE_SHIFT;
+
+	err = scif_send_alloc_request(ep, window);
+	if (err) {
+		scif_destroy_incomplete_window(ep, window);
+		scif_put_peer_dev(spdev);
+		return err;
+	}
+
+	if (!(map_flags & SCIF_MAP_KERNEL)) {
+		mm = __scif_acquire_mm();
+		map_flags |= SCIF_MAP_ULIMIT;
+	}
+	/* Pin down the pages */
+	err = __scif_pin_pages(addr, len, &prot,
+			       map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT),
+			       &pinned_pages);
+	if (err) {
+		scif_destroy_incomplete_window(ep, window);
+		__scif_release_mm(mm);
+		goto error;
+	}
+
+	window->pinned_pages = pinned_pages;
+	window->prot = pinned_pages->prot;
+	window->mm = mm;
+
+	/* Prepare the remote registration window */
+	err = scif_prep_remote_window(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %ld\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	/* Tell the peer about the new window */
+	err = scif_send_scif_register(ep, window);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %ld\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	scif_put_peer_dev(spdev);
+	/* No further failures expected. Insert new window */
+	scif_insert_local_window(window, ep);
+	dev_dbg(&ep->remote_dev->sdev->dev,
+		"SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n",
+		epd, addr, len, computed_offset);
+	return computed_offset;
+error_unmap:
+	scif_destroy_window(ep, window);
+error:
+	scif_put_peer_dev(spdev);
+	dev_err(&ep->remote_dev->sdev->dev,
+		"%s %d err %ld\n", __func__, __LINE__, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_register);
+
+int
+scif_unregister(scif_epd_t epd, off_t offset, size_t len)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct scif_window *window = NULL;
+	struct scif_rma_req req;
+	int nr_pages, err;
+	struct device *spdev;
+
+	dev_dbg(scif_info.mdev.this_device,
+		"SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n",
+		ep, offset, len);
+	/* len must be page aligned. len should be non zero */
+	if (!len ||
+	    (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
+		return -EINVAL;
+
+	/* Offset is not page aligned or offset+len wraps around */
+	if ((ALIGN(offset, PAGE_SIZE) != offset) ||
+	    (offset + (off_t)len < offset))
+		return -EINVAL;
+
+	err = scif_verify_epd(ep);
+	if (err)
+		return err;
+
+	might_sleep();
+	nr_pages = len >> PAGE_SHIFT;
+
+	req.out_window = &window;
+	req.offset = offset;
+	req.prot = 0;
+	req.nr_bytes = len;
+	req.type = SCIF_WINDOW_FULL;
+	req.head = &ep->rma_info.reg_list;
+
+	spdev = scif_get_peer_dev(ep->remote_dev);
+	if (IS_ERR(spdev)) {
+		err = PTR_ERR(spdev);
+		return err;
+	}
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	err = scif_query_window(&req);
+	if (err) {
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+		goto error;
+	}
+	/* Unregister all the windows in this range */
+	err = scif_rma_list_unregister(window, offset, nr_pages);
+	if (err)
+		dev_err(&ep->remote_dev->sdev->dev,
+			"%s %d err %d\n", __func__, __LINE__, err);
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	scif_put_peer_dev(spdev);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scif_unregister);
diff --git a/kernel/drivers/misc/mic/scif/scif_rma.h b/kernel/drivers/misc/mic/scif/scif_rma.h
new file mode 100644
index 000000000..fa6722279
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_rma.h
@@ -0,0 +1,464 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_RMA_H
+#define SCIF_RMA_H
+
+#include <linux/dma_remapping.h>
+#include <linux/mmu_notifier.h>
+
+#include "../bus/scif_bus.h"
+
+/* If this bit is set then the mark is a remote fence mark */
+#define SCIF_REMOTE_FENCE_BIT          31
+/* Magic value used to indicate a remote fence request */
+#define SCIF_REMOTE_FENCE BIT_ULL(SCIF_REMOTE_FENCE_BIT)
+
+#define SCIF_MAX_UNALIGNED_BUF_SIZE (1024 * 1024ULL)
+#define SCIF_KMEM_UNALIGNED_BUF_SIZE (SCIF_MAX_UNALIGNED_BUF_SIZE + \
+				      (L1_CACHE_BYTES << 1))
+
+#define SCIF_IOVA_START_PFN		(1)
+#define SCIF_IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
+#define SCIF_DMA_64BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(64))
+#define SCIF_DMA_63BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(63))
+
+/*
+ * struct scif_endpt_rma_info - Per Endpoint Remote Memory Access Information
+ *
+ * @reg_list: List of registration windows for self
+ * @remote_reg_list: List of registration windows for peer
+ * @iovad: Offset generator
+ * @rma_lock: Synchronizes access to self/remote list and also protects the
+ *	      window from being destroyed while RMAs are in progress.
+ * @tc_lock: Synchronizes access to temporary cached windows list
+ *	     for SCIF Registration Caching.
+ * @mmn_lock: Synchronizes access to the list of MMU notifiers registered
+ * @tw_refcount: Keeps track of number of outstanding temporary registered
+ *		 windows created by scif_vreadfrom/scif_vwriteto which have
+ *		 not been destroyed.
+ * @tcw_refcount: Same as tw_refcount but for temporary cached windows
+ * @tcw_total_pages: Same as tcw_refcount but in terms of pages pinned
+ * @mmn_list: MMU notifier so that we can destroy the windows when required
+ * @fence_refcount: Keeps track of number of outstanding remote fence
+ *		    requests which have been received by the peer.
+ * @dma_chan: DMA channel used for all DMA transfers for this endpoint.
+ * @async_list_del: Detect asynchronous list entry deletion
+ * @vma_list: List of vmas with remote memory mappings
+ * @markwq: Wait queue used for scif_fence_mark/scif_fence_wait
+*/
+struct scif_endpt_rma_info {
+	struct list_head reg_list;
+	struct list_head remote_reg_list;
+	struct iova_domain iovad;
+	struct mutex rma_lock;
+	spinlock_t tc_lock;
+	struct mutex mmn_lock;
+	atomic_t tw_refcount;
+	atomic_t tcw_refcount;
+	atomic_t tcw_total_pages;
+	struct list_head mmn_list;
+	atomic_t fence_refcount;
+	struct dma_chan	*dma_chan;
+	int async_list_del;
+	struct list_head vma_list;
+	wait_queue_head_t markwq;
+};
+
+/*
+ * struct scif_fence_info - used for tracking fence requests
+ *
+ * @state: State of this transfer
+ * @wq: Fences wait on this queue
+ * @dma_mark: Used for storing the DMA mark
+ */
+struct scif_fence_info {
+	enum scif_msg_state state;
+	struct completion comp;
+	int dma_mark;
+};
+
+/*
+ * struct scif_remote_fence_info - used for tracking remote fence requests
+ *
+ * @msg: List of SCIF node QP fence messages
+ * @list: Link to list of remote fence requests
+ */
+struct scif_remote_fence_info {
+	struct scifmsg msg;
+	struct list_head list;
+};
+
+/*
+ * Specifies whether an RMA operation can span across partial windows, a single
+ * window or multiple contiguous windows. Mmaps can span across partial windows.
+ * Unregistration can span across complete windows. scif_get_pages() can span a
+ * single window. A window can also be of type self or peer.
+ */
+enum scif_window_type {
+	SCIF_WINDOW_PARTIAL,
+	SCIF_WINDOW_SINGLE,
+	SCIF_WINDOW_FULL,
+	SCIF_WINDOW_SELF,
+	SCIF_WINDOW_PEER
+};
+
+/* The number of physical addresses that can be stored in a PAGE. */
+#define SCIF_NR_ADDR_IN_PAGE   (0x1000 >> 3)
+
+/*
+ * struct scif_rma_lookup - RMA lookup data structure for page list transfers
+ *
+ * Store an array of lookup offsets. Each offset in this array maps
+ * one 4K page containing 512 physical addresses i.e. 2MB. 512 such
+ * offsets in a 4K page will correspond to 1GB of registered address space.
+
+ * @lookup: Array of offsets
+ * @offset: DMA offset of lookup array
+ */
+struct scif_rma_lookup {
+	dma_addr_t *lookup;
+	dma_addr_t offset;
+};
+
+/*
+ * struct scif_pinned_pages - A set of pinned pages obtained with
+ * scif_pin_pages() which could be part of multiple registered
+ * windows across different end points.
+ *
+ * @nr_pages: Number of pages which is defined as a s64 instead of an int
+ * to avoid sign extension with buffers >= 2GB
+ * @prot: read/write protections
+ * @map_flags: Flags specified during the pin operation
+ * @ref_count: Reference count bumped in terms of number of pages
+ * @magic: A magic value
+ * @pages: Array of pointers to struct pages populated with get_user_pages(..)
+ */
+struct scif_pinned_pages {
+	s64 nr_pages;
+	int prot;
+	int map_flags;
+	atomic_t ref_count;
+	u64 magic;
+	struct page **pages;
+};
+
+/*
+ * struct scif_status - Stores DMA status update information
+ *
+ * @src_dma_addr: Source buffer DMA address
+ * @val: src location for value to be written to the destination
+ * @ep: SCIF endpoint
+ */
+struct scif_status {
+	dma_addr_t src_dma_addr;
+	u64 val;
+	struct scif_endpt *ep;
+};
+
+/*
+ * struct scif_window - Registration Window for Self and Remote
+ *
+ * @nr_pages: Number of pages which is defined as a s64 instead of an int
+ * to avoid sign extension with buffers >= 2GB
+ * @nr_contig_chunks: Number of contiguous physical chunks
+ * @prot: read/write protections
+ * @ref_count: reference count in terms of number of pages
+ * @magic: Cookie to detect corruption
+ * @offset: registered offset
+ * @va_for_temp: va address that this window represents
+ * @dma_mark: Used to determine if all DMAs against the window are done
+ * @ep: Pointer to EP. Useful for passing EP around with messages to
+	avoid expensive list traversals.
+ * @list: link to list of windows for the endpoint
+ * @type: self or peer window
+ * @peer_window: Pointer to peer window. Useful for sending messages to peer
+ *		 without requiring an extra list traversal
+ * @unreg_state: unregistration state
+ * @offset_freed: True if the offset has been freed
+ * @temp: True for temporary windows created via scif_vreadfrom/scif_vwriteto
+ * @mm: memory descriptor for the task_struct which initiated the RMA
+ * @st: scatter gather table for DMA mappings with IOMMU enabled
+ * @pinned_pages: The set of pinned_pages backing this window
+ * @alloc_handle: Handle for sending ALLOC_REQ
+ * @regwq: Wait Queue for an registration (N)ACK
+ * @reg_state: Registration state
+ * @unregwq: Wait Queue for an unregistration (N)ACK
+ * @dma_addr_lookup: Lookup for physical addresses used for DMA
+ * @nr_lookup: Number of entries in lookup
+ * @mapped_offset: Offset used to map the window by the peer
+ * @dma_addr: Array of physical addresses used for Mgmt node & MIC initiated DMA
+ * @num_pages: Array specifying number of pages for each physical address
+ */
+struct scif_window {
+	s64 nr_pages;
+	int nr_contig_chunks;
+	int prot;
+	int ref_count;
+	u64 magic;
+	s64 offset;
+	unsigned long va_for_temp;
+	int dma_mark;
+	u64 ep;
+	struct list_head list;
+	enum scif_window_type type;
+	u64 peer_window;
+	enum scif_msg_state unreg_state;
+	bool offset_freed;
+	bool temp;
+	struct mm_struct *mm;
+	struct sg_table *st;
+	union {
+		struct {
+			struct scif_pinned_pages *pinned_pages;
+			struct scif_allocmsg alloc_handle;
+			wait_queue_head_t regwq;
+			enum scif_msg_state reg_state;
+			wait_queue_head_t unregwq;
+		};
+		struct {
+			struct scif_rma_lookup dma_addr_lookup;
+			struct scif_rma_lookup num_pages_lookup;
+			int nr_lookup;
+			dma_addr_t mapped_offset;
+		};
+	};
+	dma_addr_t *dma_addr;
+	u64 *num_pages;
+} __packed;
+
+/*
+ * scif_mmu_notif - SCIF mmu notifier information
+ *
+ * @mmu_notifier ep_mmu_notifier: MMU notifier operations
+ * @tc_reg_list: List of temp registration windows for self
+ * @mm: memory descriptor for the task_struct which initiated the RMA
+ * @ep: SCIF endpoint
+ * @list: link to list of MMU notifier information
+ */
+struct scif_mmu_notif {
+#ifdef CONFIG_MMU_NOTIFIER
+	struct mmu_notifier ep_mmu_notifier;
+#endif
+	struct list_head tc_reg_list;
+	struct mm_struct *mm;
+	struct scif_endpt *ep;
+	struct list_head list;
+};
+
+enum scif_rma_dir {
+	SCIF_LOCAL_TO_REMOTE,
+	SCIF_REMOTE_TO_LOCAL
+};
+
+extern struct kmem_cache *unaligned_cache;
+/* Initialize RMA for this EP */
+void scif_rma_ep_init(struct scif_endpt *ep);
+/* Check if epd can be uninitialized */
+int scif_rma_ep_can_uninit(struct scif_endpt *ep);
+/* Obtain a new offset. Callee must grab RMA lock */
+int scif_get_window_offset(struct scif_endpt *ep, int flags,
+			   s64 offset, int nr_pages, s64 *out_offset);
+/* Free offset. Callee must grab RMA lock */
+void scif_free_window_offset(struct scif_endpt *ep,
+			     struct scif_window *window, s64 offset);
+/* Create self registration window */
+struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
+				       s64 offset, bool temp);
+/* Destroy self registration window.*/
+int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window);
+void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window);
+/* Map pages of self window to Aperture/PCI */
+int scif_map_window(struct scif_dev *remote_dev,
+		    struct scif_window *window);
+/* Unregister a self window */
+int scif_unregister_window(struct scif_window *window);
+/* Destroy remote registration window */
+void
+scif_destroy_remote_window(struct scif_window *window);
+/* remove valid remote memory mappings from process address space */
+void scif_zap_mmaps(int node);
+/* Query if any applications have remote memory mappings */
+bool scif_rma_do_apps_have_mmaps(int node);
+/* Cleanup remote registration lists for zombie endpoints */
+void scif_cleanup_rma_for_zombies(int node);
+/* Reserve a DMA channel for a particular endpoint */
+int scif_reserve_dma_chan(struct scif_endpt *ep);
+/* Setup a DMA mark for an endpoint */
+int _scif_fence_mark(scif_epd_t epd, int *mark);
+int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
+		     enum scif_window_type type);
+void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg);
+void scif_mmu_notif_handler(struct work_struct *work);
+void scif_rma_handle_remote_fences(void);
+void scif_rma_destroy_windows(void);
+void scif_rma_destroy_tcw_invalid(void);
+int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan);
+
+struct scif_window_iter {
+	s64 offset;
+	int index;
+};
+
+static inline void
+scif_init_window_iter(struct scif_window *window, struct scif_window_iter *iter)
+{
+	iter->offset = window->offset;
+	iter->index = 0;
+}
+
+dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
+				size_t *nr_bytes,
+				struct scif_window_iter *iter);
+static inline
+dma_addr_t __scif_off_to_dma_addr(struct scif_window *window, s64 off)
+{
+	return scif_off_to_dma_addr(window, off, NULL, NULL);
+}
+
+static inline bool scif_unaligned(off_t src_offset, off_t dst_offset)
+{
+	src_offset = src_offset & (L1_CACHE_BYTES - 1);
+	dst_offset = dst_offset & (L1_CACHE_BYTES - 1);
+	return !(src_offset == dst_offset);
+}
+
+/*
+ * scif_zalloc:
+ * @size: Size of the allocation request.
+ *
+ * Helper API which attempts to allocate zeroed pages via
+ * __get_free_pages(..) first and then falls back on
+ * vzalloc(..) if that fails.
+ */
+static inline void *scif_zalloc(size_t size)
+{
+	void *ret = NULL;
+	size_t align = ALIGN(size, PAGE_SIZE);
+
+	if (align && get_order(align) < MAX_ORDER)
+		ret = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					       get_order(align));
+	return ret ? ret : vzalloc(align);
+}
+
+/*
+ * scif_free:
+ * @addr: Address to be freed.
+ * @size: Size of the allocation.
+ * Helper API which frees memory allocated via scif_zalloc().
+ */
+static inline void scif_free(void *addr, size_t size)
+{
+	size_t align = ALIGN(size, PAGE_SIZE);
+
+	if (is_vmalloc_addr(addr))
+		vfree(addr);
+	else
+		free_pages((unsigned long)addr, get_order(align));
+}
+
+static inline void scif_get_window(struct scif_window *window, int nr_pages)
+{
+	window->ref_count += nr_pages;
+}
+
+static inline void scif_put_window(struct scif_window *window, int nr_pages)
+{
+	window->ref_count -= nr_pages;
+}
+
+static inline void scif_set_window_ref(struct scif_window *window, int nr_pages)
+{
+	window->ref_count = nr_pages;
+}
+
+static inline void
+scif_queue_for_cleanup(struct scif_window *window, struct list_head *list)
+{
+	spin_lock(&scif_info.rmalock);
+	list_add_tail(&window->list, list);
+	spin_unlock(&scif_info.rmalock);
+	schedule_work(&scif_info.misc_work);
+}
+
+static inline void __scif_rma_destroy_tcw_helper(struct scif_window *window)
+{
+	list_del_init(&window->list);
+	scif_queue_for_cleanup(window, &scif_info.rma_tc);
+}
+
+static inline bool scif_is_iommu_enabled(void)
+{
+#ifdef CONFIG_INTEL_IOMMU
+	return intel_iommu_enabled;
+#else
+	return false;
+#endif
+}
+#endif /* SCIF_RMA_H */
diff --git a/kernel/drivers/misc/mic/scif/scif_rma_list.c b/kernel/drivers/misc/mic/scif/scif_rma_list.c
new file mode 100644
index 000000000..e1ef8daed
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_rma_list.c
@@ -0,0 +1,291 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#include "scif_main.h"
+#include <linux/mmu_notifier.h>
+#include <linux/highmem.h>
+
+/*
+ * scif_insert_tcw:
+ *
+ * Insert a temp window to the temp registration list sorted by va_for_temp.
+ * RMA lock must be held.
+ */
+void scif_insert_tcw(struct scif_window *window, struct list_head *head)
+{
+	struct scif_window *curr = NULL;
+	struct scif_window *prev = list_entry(head, struct scif_window, list);
+	struct list_head *item;
+
+	INIT_LIST_HEAD(&window->list);
+	/* Compare with tail and if the entry is new tail add it to the end */
+	if (!list_empty(head)) {
+		curr = list_entry(head->prev, struct scif_window, list);
+		if (curr->va_for_temp < window->va_for_temp) {
+			list_add_tail(&window->list, head);
+			return;
+		}
+	}
+	list_for_each(item, head) {
+		curr = list_entry(item, struct scif_window, list);
+		if (curr->va_for_temp > window->va_for_temp)
+			break;
+		prev = curr;
+	}
+	list_add(&window->list, &prev->list);
+}
+
+/*
+ * scif_insert_window:
+ *
+ * Insert a window to the self registration list sorted by offset.
+ * RMA lock must be held.
+ */
+void scif_insert_window(struct scif_window *window, struct list_head *head)
+{
+	struct scif_window *curr = NULL, *prev = NULL;
+	struct list_head *item;
+
+	INIT_LIST_HEAD(&window->list);
+	list_for_each(item, head) {
+		curr = list_entry(item, struct scif_window, list);
+		if (curr->offset > window->offset)
+			break;
+		prev = curr;
+	}
+	if (!prev)
+		list_add(&window->list, head);
+	else
+		list_add(&window->list, &prev->list);
+	scif_set_window_ref(window, window->nr_pages);
+}
+
+/*
+ * scif_query_tcw:
+ *
+ * Query the temp cached registration list of ep for an overlapping window
+ * in case of permission mismatch, destroy the previous window. if permissions
+ * match and overlap is partial, destroy the window but return the new range
+ * RMA lock must be held.
+ */
+int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *req)
+{
+	struct list_head *item, *temp, *head = req->head;
+	struct scif_window *window;
+	u64 start_va_window, start_va_req = req->va_for_temp;
+	u64 end_va_window, end_va_req = start_va_req + req->nr_bytes;
+
+	if (!req->nr_bytes)
+		return -EINVAL;
+	/*
+	 * Avoid traversing the entire list to find out that there
+	 * is no entry that matches
+	 */
+	if (!list_empty(head)) {
+		window = list_last_entry(head, struct scif_window, list);
+		end_va_window = window->va_for_temp +
+			(window->nr_pages << PAGE_SHIFT);
+		if (start_va_req > end_va_window)
+			return -ENXIO;
+	}
+	list_for_each_safe(item, temp, head) {
+		window = list_entry(item, struct scif_window, list);
+		start_va_window = window->va_for_temp;
+		end_va_window = window->va_for_temp +
+			(window->nr_pages << PAGE_SHIFT);
+		if (start_va_req < start_va_window &&
+		    end_va_req < start_va_window)
+			break;
+		if (start_va_req >= end_va_window)
+			continue;
+		if ((window->prot & req->prot) == req->prot) {
+			if (start_va_req >= start_va_window &&
+			    end_va_req <= end_va_window) {
+				*req->out_window = window;
+				return 0;
+			}
+			/* expand window */
+			if (start_va_req < start_va_window) {
+				req->nr_bytes +=
+					start_va_window - start_va_req;
+				req->va_for_temp = start_va_window;
+			}
+			if (end_va_req >= end_va_window)
+				req->nr_bytes += end_va_window - end_va_req;
+		}
+		/* Destroy the old window to create a new one */
+		__scif_rma_destroy_tcw_helper(window);
+		break;
+	}
+	return -ENXIO;
+}
+
+/*
+ * scif_query_window:
+ *
+ * Query the registration list and check if a valid contiguous
+ * range of windows exist.
+ * RMA lock must be held.
+ */
+int scif_query_window(struct scif_rma_req *req)
+{
+	struct list_head *item;
+	struct scif_window *window;
+	s64 end_offset, offset = req->offset;
+	u64 tmp_min, nr_bytes_left = req->nr_bytes;
+
+	if (!req->nr_bytes)
+		return -EINVAL;
+
+	list_for_each(item, req->head) {
+		window = list_entry(item, struct scif_window, list);
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		if (offset < window->offset)
+			/* Offset not found! */
+			return -ENXIO;
+		if (offset >= end_offset)
+			continue;
+		/* Check read/write protections. */
+		if ((window->prot & req->prot) != req->prot)
+			return -EPERM;
+		if (nr_bytes_left == req->nr_bytes)
+			/* Store the first window */
+			*req->out_window = window;
+		tmp_min = min((u64)end_offset - offset, nr_bytes_left);
+		nr_bytes_left -= tmp_min;
+		offset += tmp_min;
+		/*
+		 * Range requested encompasses
+		 * multiple windows contiguously.
+		 */
+		if (!nr_bytes_left) {
+			/* Done for partial window */
+			if (req->type == SCIF_WINDOW_PARTIAL ||
+			    req->type == SCIF_WINDOW_SINGLE)
+				return 0;
+			/* Extra logic for full windows */
+			if (offset == end_offset)
+				/* Spanning multiple whole windows */
+				return 0;
+				/* Not spanning multiple whole windows */
+			return -ENXIO;
+		}
+		if (req->type == SCIF_WINDOW_SINGLE)
+			break;
+	}
+	dev_err(scif_info.mdev.this_device,
+		"%s %d ENXIO\n", __func__, __LINE__);
+	return -ENXIO;
+}
+
+/*
+ * scif_rma_list_unregister:
+ *
+ * Traverse the self registration list starting from window:
+ * 1) Call scif_unregister_window(..)
+ * RMA lock must be held.
+ */
+int scif_rma_list_unregister(struct scif_window *window,
+			     s64 offset, int nr_pages)
+{
+	struct scif_endpt *ep = (struct scif_endpt *)window->ep;
+	struct list_head *head = &ep->rma_info.reg_list;
+	s64 end_offset;
+	int err = 0;
+	int loop_nr_pages;
+	struct scif_window *_window;
+
+	list_for_each_entry_safe_from(window, _window, head, list) {
+		end_offset = window->offset + (window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min((int)((end_offset - offset) >> PAGE_SHIFT),
+				    nr_pages);
+		err = scif_unregister_window(window);
+		if (err)
+			return err;
+		nr_pages -= loop_nr_pages;
+		offset += (loop_nr_pages << PAGE_SHIFT);
+		if (!nr_pages)
+			break;
+	}
+	return 0;
+}
+
+/*
+ * scif_unmap_all_window:
+ *
+ * Traverse all the windows in the self registration list and:
+ * 1) Delete any DMA mappings created
+ */
+void scif_unmap_all_windows(scif_epd_t epd)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct list_head *head = &ep->rma_info.reg_list;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	list_for_each_safe(item, tmp, head) {
+		window = list_entry(item, struct scif_window, list);
+		scif_unmap_window(ep->remote_dev, window);
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/*
+ * scif_unregister_all_window:
+ *
+ * Traverse all the windows in the self registration list and:
+ * 1) Call scif_unregister_window(..)
+ * RMA lock must be held.
+ */
+int scif_unregister_all_windows(scif_epd_t epd)
+{
+	struct list_head *item, *tmp;
+	struct scif_window *window;
+	struct scif_endpt *ep = (struct scif_endpt *)epd;
+	struct list_head *head = &ep->rma_info.reg_list;
+	int err = 0;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+retry:
+	item = NULL;
+	tmp = NULL;
+	list_for_each_safe(item, tmp, head) {
+		window = list_entry(item, struct scif_window, list);
+		ep->rma_info.async_list_del = 0;
+		err = scif_unregister_window(window);
+		if (err)
+			dev_err(scif_info.mdev.this_device,
+				"%s %d err %d\n",
+				__func__, __LINE__, err);
+		/*
+		 * Need to restart list traversal if there has been
+		 * an asynchronous list entry deletion.
+		 */
+		if (ACCESS_ONCE(ep->rma_info.async_list_del))
+			goto retry;
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (!list_empty(&ep->rma_info.mmn_list)) {
+		spin_lock(&scif_info.rmalock);
+		list_add_tail(&ep->mmu_list, &scif_info.mmu_notif_cleanup);
+		spin_unlock(&scif_info.rmalock);
+		schedule_work(&scif_info.mmu_notif_work);
+	}
+	return err;
+}
diff --git a/kernel/drivers/misc/mic/scif/scif_rma_list.h b/kernel/drivers/misc/mic/scif/scif_rma_list.h
new file mode 100644
index 000000000..7d58d1d55
--- /dev/null
+++ b/kernel/drivers/misc/mic/scif/scif_rma_list.h
@@ -0,0 +1,57 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef SCIF_RMA_LIST_H
+#define SCIF_RMA_LIST_H
+
+/*
+ * struct scif_rma_req - Self Registration list RMA Request query
+ *
+ * @out_window - Returns the window if found
+ * @offset: Starting offset
+ * @nr_bytes: number of bytes
+ * @prot: protection requested i.e. read or write or both
+ * @type: Specify single, partial or multiple windows
+ * @head: Head of list on which to search
+ * @va_for_temp: VA for searching temporary cached windows
+ */
+struct scif_rma_req {
+	struct scif_window **out_window;
+	union {
+		s64 offset;
+		unsigned long va_for_temp;
+	};
+	size_t nr_bytes;
+	int prot;
+	enum scif_window_type type;
+	struct list_head *head;
+};
+
+/* Insert */
+void scif_insert_window(struct scif_window *window, struct list_head *head);
+void scif_insert_tcw(struct scif_window *window,
+		     struct list_head *head);
+/* Query */
+int scif_query_window(struct scif_rma_req *request);
+int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *request);
+/* Called from close to unregister all self windows */
+int scif_unregister_all_windows(scif_epd_t epd);
+void scif_unmap_all_windows(scif_epd_t epd);
+/* Traverse list and unregister */
+int scif_rma_list_unregister(struct scif_window *window, s64 offset,
+			     int nr_pages);
+#endif /* SCIF_RMA_LIST_H */
diff --git a/kernel/drivers/misc/qcom-coincell.c b/kernel/drivers/misc/qcom-coincell.c
new file mode 100644
index 000000000..7b4a2da48
--- /dev/null
+++ b/kernel/drivers/misc/qcom-coincell.c
@@ -0,0 +1,152 @@
+/* Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2015, Sony Mobile Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+struct qcom_coincell {
+	struct device	*dev;
+	struct regmap	*regmap;
+	u32		base_addr;
+};
+
+#define QCOM_COINCELL_REG_RSET		0x44
+#define QCOM_COINCELL_REG_VSET		0x45
+#define QCOM_COINCELL_REG_ENABLE	0x46
+
+#define QCOM_COINCELL_ENABLE		BIT(7)
+
+static const int qcom_rset_map[] = { 2100, 1700, 1200, 800 };
+static const int qcom_vset_map[] = { 2500, 3200, 3100, 3000 };
+/* NOTE: for pm8921 and others, voltage of 2500 is 16 (10000b), not 0 */
+
+/* if enable==0, rset and vset are ignored */
+static int qcom_coincell_chgr_config(struct qcom_coincell *chgr, int rset,
+				     int vset, bool enable)
+{
+	int i, j, rc;
+
+	/* if disabling, just do that and skip other operations */
+	if (!enable)
+		return regmap_write(chgr->regmap,
+			  chgr->base_addr + QCOM_COINCELL_REG_ENABLE, 0);
+
+	/* find index for current-limiting resistor */
+	for (i = 0; i < ARRAY_SIZE(qcom_rset_map); i++)
+		if (rset == qcom_rset_map[i])
+			break;
+
+	if (i >= ARRAY_SIZE(qcom_rset_map)) {
+		dev_err(chgr->dev, "invalid rset-ohms value %d\n", rset);
+		return -EINVAL;
+	}
+
+	/* find index for charge voltage */
+	for (j = 0; j < ARRAY_SIZE(qcom_vset_map); j++)
+		if (vset == qcom_vset_map[j])
+			break;
+
+	if (j >= ARRAY_SIZE(qcom_vset_map)) {
+		dev_err(chgr->dev, "invalid vset-millivolts value %d\n", vset);
+		return -EINVAL;
+	}
+
+	rc = regmap_write(chgr->regmap,
+			  chgr->base_addr + QCOM_COINCELL_REG_RSET, i);
+	if (rc) {
+		/*
+		 * This is mainly to flag a bad base_addr (reg) from dts.
+		 * Other failures writing to the registers should be
+		 * extremely rare, or indicative of problems that
+		 * should be reported elsewhere (eg. spmi failure).
+		 */
+		dev_err(chgr->dev, "could not write to RSET register\n");
+		return rc;
+	}
+
+	rc = regmap_write(chgr->regmap,
+		chgr->base_addr + QCOM_COINCELL_REG_VSET, j);
+	if (rc)
+		return rc;
+
+	/* set 'enable' register */
+	return regmap_write(chgr->regmap,
+			    chgr->base_addr + QCOM_COINCELL_REG_ENABLE,
+			    QCOM_COINCELL_ENABLE);
+}
+
+static int qcom_coincell_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct qcom_coincell chgr;
+	u32 rset, vset;
+	bool enable;
+	int rc;
+
+	chgr.dev = &pdev->dev;
+
+	chgr.regmap = dev_get_regmap(pdev->dev.parent, NULL);
+	if (!chgr.regmap) {
+		dev_err(chgr.dev, "Unable to get regmap\n");
+		return -EINVAL;
+	}
+
+	rc = of_property_read_u32(node, "reg", &chgr.base_addr);
+	if (rc)
+		return rc;
+
+	enable = !of_property_read_bool(node, "qcom,charger-disable");
+
+	if (enable) {
+		rc = of_property_read_u32(node, "qcom,rset-ohms", &rset);
+		if (rc) {
+			dev_err(chgr.dev,
+				"can't find 'qcom,rset-ohms' in DT block");
+			return rc;
+		}
+
+		rc = of_property_read_u32(node, "qcom,vset-millivolts", &vset);
+		if (rc) {
+			dev_err(chgr.dev,
+			    "can't find 'qcom,vset-millivolts' in DT block");
+			return rc;
+		}
+	}
+
+	return qcom_coincell_chgr_config(&chgr, rset, vset, enable);
+}
+
+static const struct of_device_id qcom_coincell_match_table[] = {
+	{ .compatible = "qcom,pm8941-coincell", },
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, qcom_coincell_match_table);
+
+static struct platform_driver qcom_coincell_driver = {
+	.driver	= {
+		.name		= "qcom-spmi-coincell",
+		.of_match_table	= qcom_coincell_match_table,
+	},
+	.probe		= qcom_coincell_probe,
+};
+
+module_platform_driver(qcom_coincell_driver);
+
+MODULE_DESCRIPTION("Qualcomm PMIC coincell charger driver");
+MODULE_LICENSE("GPL v2");
diff --git a/kernel/drivers/misc/sgi-gru/gruhandles.c b/kernel/drivers/misc/sgi-gru/gruhandles.c
index 2f30badc6..1ee8e82ba 100644
--- a/kernel/drivers/misc/sgi-gru/gruhandles.c
+++ b/kernel/drivers/misc/sgi-gru/gruhandles.c
@@ -196,12 +196,6 @@ void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
 	start_instruction(tfh);
 }
 
-void tfh_restart(struct gru_tlb_fault_handle *tfh)
-{
-	tfh->opc = TFHOP_RESTART;
-	start_instruction(tfh);
-}
-
 void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh)
 {
 	tfh->opc = TFHOP_USER_POLLING_MODE;
diff --git a/kernel/drivers/misc/sgi-gru/gruhandles.h b/kernel/drivers/misc/sgi-gru/gruhandles.h
index 3f998b924..3d7bd36a1 100644
--- a/kernel/drivers/misc/sgi-gru/gruhandles.h
+++ b/kernel/drivers/misc/sgi-gru/gruhandles.h
@@ -524,7 +524,6 @@ int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
 	int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
 void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
 	int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
-void tfh_restart(struct gru_tlb_fault_handle *tfh);
 void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh);
 void tfh_exception(struct gru_tlb_fault_handle *tfh);
 
diff --git a/kernel/drivers/misc/sgi-gru/grukdump.c b/kernel/drivers/misc/sgi-gru/grukdump.c
index a3700a56b..313da3150 100644
--- a/kernel/drivers/misc/sgi-gru/grukdump.c
+++ b/kernel/drivers/misc/sgi-gru/grukdump.c
@@ -78,11 +78,10 @@ static int gru_dump_tfm(struct gru_state *gru,
 		void __user *ubuf, void __user *ubufend)
 {
 	struct gru_tlb_fault_map *tfm;
-	int i, ret, bytes;
+	int i;
 
-	bytes = GRU_NUM_TFM * GRU_CACHE_LINE_BYTES;
-	if (bytes > ubufend - ubuf)
-		ret = -EFBIG;
+	if (GRU_NUM_TFM * GRU_CACHE_LINE_BYTES > ubufend - ubuf)
+		return -EFBIG;
 
 	for (i = 0; i < GRU_NUM_TFM; i++) {
 		tfm = get_tfm(gru->gs_gru_base_vaddr, i);
@@ -99,11 +98,10 @@ static int gru_dump_tgh(struct gru_state *gru,
 		void __user *ubuf, void __user *ubufend)
 {
 	struct gru_tlb_global_handle *tgh;
-	int i, ret, bytes;
+	int i;
 
-	bytes = GRU_NUM_TGH * GRU_CACHE_LINE_BYTES;
-	if (bytes > ubufend - ubuf)
-		ret = -EFBIG;
+	if (GRU_NUM_TGH * GRU_CACHE_LINE_BYTES > ubufend - ubuf)
+		return -EFBIG;
 
 	for (i = 0; i < GRU_NUM_TGH; i++) {
 		tgh = get_tgh(gru->gs_gru_base_vaddr, i);
@@ -196,7 +194,7 @@ int gru_dump_chiplet_request(unsigned long arg)
 		return -EFAULT;
 
 	/* Currently, only dump by gid is implemented */
-	if (req.gid >= gru_max_gids || req.gid < 0)
+	if (req.gid >= gru_max_gids)
 		return -EINVAL;
 
 	gru = GID_TO_GRU(req.gid);
diff --git a/kernel/drivers/misc/sgi-gru/grukservices.c b/kernel/drivers/misc/sgi-gru/grukservices.c
index 913de07e5..967b9dd24 100644
--- a/kernel/drivers/misc/sgi-gru/grukservices.c
+++ b/kernel/drivers/misc/sgi-gru/grukservices.c
@@ -160,7 +160,12 @@ static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
 	down_write(&bs->bs_kgts_sema);
 
 	if (!bs->bs_kgts) {
-		bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);
+		do {
+			bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);
+			if (!IS_ERR(bs->bs_kgts))
+				break;
+			msleep(1);
+		} while (true);
 		bs->bs_kgts->ts_user_blade_id = blade_id;
 	}
 	kgts = bs->bs_kgts;
@@ -429,8 +434,8 @@ int gru_get_cb_exception_detail(void *cb,
 	return 0;
 }
 
-char *gru_get_cb_exception_detail_str(int ret, void *cb,
-				      char *buf, int size)
+static char *gru_get_cb_exception_detail_str(int ret, void *cb,
+					     char *buf, int size)
 {
 	struct gru_control_block_status *gen = (void *)cb;
 	struct control_block_extended_exc_detail excdet;
@@ -505,7 +510,7 @@ int gru_wait_proc(void *cb)
 	return ret;
 }
 
-void gru_abort(int ret, void *cb, char *str)
+static void gru_abort(int ret, void *cb, char *str)
 {
 	char buf[GRU_EXC_STR_SIZE];
 
@@ -997,7 +1002,6 @@ static int quicktest1(unsigned long arg)
 {
 	struct gru_message_queue_desc mqd;
 	void *p, *mq;
-	unsigned long *dw;
 	int i, ret = -EIO;
 	char mes[GRU_CACHE_LINE_BYTES], *m;
 
@@ -1007,7 +1011,6 @@ static int quicktest1(unsigned long arg)
 		return -ENOMEM;
 	mq = ALIGNUP(p, 1024);
 	memset(mes, 0xee, sizeof(mes));
-	dw = mq;
 
 	gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0);
 	for (i = 0; i < 6; i++) {
diff --git a/kernel/drivers/misc/sgi-gru/grumain.c b/kernel/drivers/misc/sgi-gru/grumain.c
index ae16c8cb4..1525870f4 100644
--- a/kernel/drivers/misc/sgi-gru/grumain.c
+++ b/kernel/drivers/misc/sgi-gru/grumain.c
@@ -930,6 +930,7 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct gru_thread_state *gts;
 	unsigned long paddr, vaddr;
+	unsigned long expires;
 
 	vaddr = (unsigned long)vmf->virtual_address;
 	gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
@@ -954,7 +955,8 @@ again:
 			mutex_unlock(&gts->ts_ctxlock);
 			set_current_state(TASK_INTERRUPTIBLE);
 			schedule_timeout(GRU_ASSIGN_DELAY);  /* true hack ZZZ */
-			if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies)
+			expires = gts->ts_steal_jiffies + GRU_STEAL_DELAY;
+			if (time_before(expires, jiffies))
 				gru_steal_context(gts);
 			goto again;
 		}
diff --git a/kernel/drivers/misc/sgi-gru/grutlbpurge.c b/kernel/drivers/misc/sgi-gru/grutlbpurge.c
index 2129274ef..e936d4389 100644
--- a/kernel/drivers/misc/sgi-gru/grutlbpurge.c
+++ b/kernel/drivers/misc/sgi-gru/grutlbpurge.c
@@ -306,19 +306,20 @@ struct gru_mm_struct *gru_register_mmu_notifier(void)
 		atomic_inc(&gms->ms_refcnt);
 	} else {
 		gms = kzalloc(sizeof(*gms), GFP_KERNEL);
-		if (gms) {
-			STAT(gms_alloc);
-			spin_lock_init(&gms->ms_asid_lock);
-			gms->ms_notifier.ops = &gru_mmuops;
-			atomic_set(&gms->ms_refcnt, 1);
-			init_waitqueue_head(&gms->ms_wait_queue);
-			err = __mmu_notifier_register(&gms->ms_notifier, current->mm);
-			if (err)
-				goto error;
-		}
+		if (!gms)
+			return ERR_PTR(-ENOMEM);
+		STAT(gms_alloc);
+		spin_lock_init(&gms->ms_asid_lock);
+		gms->ms_notifier.ops = &gru_mmuops;
+		atomic_set(&gms->ms_refcnt, 1);
+		init_waitqueue_head(&gms->ms_wait_queue);
+		err = __mmu_notifier_register(&gms->ms_notifier, current->mm);
+		if (err)
+			goto error;
 	}
-	gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
-		atomic_read(&gms->ms_refcnt));
+	if (gms)
+		gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
+			atomic_read(&gms->ms_refcnt));
 	return gms;
 error:
 	kfree(gms);
diff --git a/kernel/drivers/misc/sgi-xp/xpc_uv.c b/kernel/drivers/misc/sgi-xp/xpc_uv.c
index 95c894482..340b44d9e 100644
--- a/kernel/drivers/misc/sgi-xp/xpc_uv.c
+++ b/kernel/drivers/misc/sgi-xp/xpc_uv.c
@@ -239,7 +239,7 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
 	mq->mmr_blade = uv_cpu_to_blade_id(cpu);
 
 	nid = cpu_to_node(cpu);
-	page = alloc_pages_exact_node(nid,
+	page = __alloc_pages_node(nid,
 				      GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
 				      pg_order);
 	if (page == NULL) {
diff --git a/kernel/drivers/misc/spear13xx_pcie_gadget.c b/kernel/drivers/misc/spear13xx_pcie_gadget.c
index fe3ad0ca9..ee120dcbb 100644
--- a/kernel/drivers/misc/spear13xx_pcie_gadget.c
+++ b/kernel/drivers/misc/spear13xx_pcie_gadget.c
@@ -2,7 +2,7 @@
  * drivers/misc/spear13xx_pcie_gadget.c
  *
  * Copyright (C) 2010 ST Microelectronics
- * Pratyush Anand<pratyush.anand@st.com>
+ * Pratyush Anand<pratyush.anand@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
@@ -220,11 +220,17 @@ static irqreturn_t spear_pcie_gadget_irq(int irq, void *dev_id)
 /*
  * configfs interfaces show/store functions
  */
-static ssize_t pcie_gadget_show_link(
-		struct spear_pcie_gadget_config *config,
-		char *buf)
+
+static struct pcie_gadget_target *to_target(struct config_item *item)
 {
-	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+	return item ?
+		container_of(to_configfs_subsystem(to_config_group(item)),
+				struct pcie_gadget_target, subsys) : NULL;
+}
+
+static ssize_t pcie_gadget_link_show(struct config_item *item, char *buf)
+{
+	struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base;
 
 	if (readl(&app_reg->app_status_1) & ((u32)1 << XMLH_LINK_UP_ID))
 		return sprintf(buf, "UP");
@@ -232,11 +238,10 @@ static ssize_t pcie_gadget_show_link(
 		return sprintf(buf, "DOWN");
 }
 
-static ssize_t pcie_gadget_store_link(
-		struct spear_pcie_gadget_config *config,
+static ssize_t pcie_gadget_link_store(struct config_item *item,
 		const char *buf, size_t count)
 {
-	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+	struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base;
 
 	if (sysfs_streq(buf, "UP"))
 		writel(readl(&app_reg->app_ctrl_0) | (1 << APP_LTSSM_ENABLE_ID),
@@ -250,17 +255,15 @@ static ssize_t pcie_gadget_store_link(
 	return count;
 }
 
-static ssize_t pcie_gadget_show_int_type(
-		struct spear_pcie_gadget_config *config,
-		char *buf)
+static ssize_t pcie_gadget_int_type_show(struct config_item *item, char *buf)
 {
-	return sprintf(buf, "%s", config->int_type);
+	return sprintf(buf, "%s", to_target(item)->int_type);
 }
 
-static ssize_t pcie_gadget_store_int_type(
-		struct spear_pcie_gadget_config *config,
+static ssize_t pcie_gadget_int_type_store(struct config_item *item,
 		const char *buf, size_t count)
 {
+	struct spear_pcie_gadget_config *config = to_target(item)
 	u32 cap, vec, flags;
 	ulong vector;
 
@@ -288,11 +291,10 @@ static ssize_t pcie_gadget_store_int_type(
 	return count;
 }
 
-static ssize_t pcie_gadget_show_no_of_msi(
-		struct spear_pcie_gadget_config *config,
-		char *buf)
+static ssize_t pcie_gadget_no_of_msi_show(struct config_item *item, char *buf)
 {
-	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+	struct spear_pcie_gadget_config *config = to_target(item)
+	struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base;
 	u32 cap, vec, flags;
 	ulong vector;
 
@@ -313,13 +315,12 @@ static ssize_t pcie_gadget_show_no_of_msi(
 	return sprintf(buf, "%lu", vector);
 }
 
-static ssize_t pcie_gadget_store_no_of_msi(
-		struct spear_pcie_gadget_config *config,
+static ssize_t pcie_gadget_no_of_msi_store(struct config_item *item,
 		const char *buf, size_t count)
 {
 	int ret;
 
-	ret = kstrtoul(buf, 0, &config->requested_msi);
+	ret = kstrtoul(buf, 0, &to_target(item)->requested_msi);
 	if (ret)
 		return ret;
 
@@ -329,11 +330,10 @@ static ssize_t pcie_gadget_store_no_of_msi(
 	return count;
 }
 
-static ssize_t pcie_gadget_store_inta(
-		struct spear_pcie_gadget_config *config,
+static ssize_t pcie_gadget_inta_store(struct config_item *item,
 		const char *buf, size_t count)
 {
-	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+	struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base;
 	ulong en;
 	int ret;
 
@@ -351,10 +351,10 @@ static ssize_t pcie_gadget_store_inta(
 	return count;
 }
 
-static ssize_t pcie_gadget_store_send_msi(
-		struct spear_pcie_gadget_config *config,
+static ssize_t pcie_gadget_send_msi_store(struct config_item *item,
 		const char *buf, size_t count)
 {
+	struct spear_pcie_gadget_config *config = to_target(item)
 	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
 	ulong vector;
 	u32 ven_msi;
@@ -388,19 +388,16 @@ static ssize_t pcie_gadget_store_send_msi(
 	return count;
 }
 
-static ssize_t pcie_gadget_show_vendor_id(
-		struct spear_pcie_gadget_config *config,
-		char *buf)
+static ssize_t pcie_gadget_vendor_id_show(struct config_item *item, char *buf)
 {
 	u32 id;
 
-	spear_dbi_read_reg(config, PCI_VENDOR_ID, 2, &id);
+	spear_dbi_read_reg(to_target(item), PCI_VENDOR_ID, 2, &id);
 
 	return sprintf(buf, "%x", id);
 }
 
-static ssize_t pcie_gadget_store_vendor_id(
-		struct spear_pcie_gadget_config *config,
+static ssize_t pcie_gadget_vendor_id_store(struct config_item *item,
 		const char *buf, size_t count)
 {
 	ulong id;
@@ -410,24 +407,21 @@ static ssize_t pcie_gadget_store_vendor_id(
 	if (ret)
 		return ret;
 
-	spear_dbi_write_reg(config, PCI_VENDOR_ID, 2, id);
+	spear_dbi_write_reg(to_target(item), PCI_VENDOR_ID, 2, id);
 
 	return count;
 }
 
-static ssize_t pcie_gadget_show_device_id(
-		struct spear_pcie_gadget_config *config,
-		char *buf)
+static ssize_t pcie_gadget_device_id_show(struct config_item *item, char *buf)
 {
 	u32 id;
 
-	spear_dbi_read_reg(config, PCI_DEVICE_ID, 2, &id);
+	spear_dbi_read_reg(to_target(item), PCI_DEVICE_ID, 2, &id);
 
 	return sprintf(buf, "%x", id);
 }
 
-static ssize_t pcie_gadget_store_device_id(
-		struct spear_pcie_gadget_config *config,
+static ssize_t pcie_gadget_device_id_store(struct config_item *item,
 		const char *buf, size_t count)
 {
 	ulong id;
@@ -437,22 +431,20 @@ static ssize_t pcie_gadget_store_device_id(
 	if (ret)
 		return ret;
 
-	spear_dbi_write_reg(config, PCI_DEVICE_ID, 2, id);
+	spear_dbi_write_reg(to_target(item), PCI_DEVICE_ID, 2, id);
 
 	return count;
 }
 
-static ssize_t pcie_gadget_show_bar0_size(
-		struct spear_pcie_gadget_config *config,
-		char *buf)
+static ssize_t pcie_gadget_bar0_size_show(struct config_item *item, char *buf)
 {
-	return sprintf(buf, "%lx", config->bar0_size);
+	return sprintf(buf, "%lx", to_target(item)->bar0_size);
 }
 
-static ssize_t pcie_gadget_store_bar0_size(
-		struct spear_pcie_gadget_config *config,
+static ssize_t pcie_gadget_bar0_size_store(struct config_item *item,
 		const char *buf, size_t count)
 {
+	struct spear_pcie_gadget_config *config = to_target(item)
 	ulong size;
 	u32 pos, pos1;
 	u32 no_of_bit = 0;
@@ -489,21 +481,20 @@ static ssize_t pcie_gadget_store_bar0_size(
 	return count;
 }
 
-static ssize_t pcie_gadget_show_bar0_address(
-		struct spear_pcie_gadget_config *config,
+static ssize_t pcie_gadget_bar0_address_show(struct config_item *item,
 		char *buf)
 {
-	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+	struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base;
 
 	u32 address = readl(&app_reg->pim0_mem_addr_start);
 
 	return sprintf(buf, "%x", address);
 }
 
-static ssize_t pcie_gadget_store_bar0_address(
-		struct spear_pcie_gadget_config *config,
+static ssize_t pcie_gadget_bar0_address_store(struct config_item *item,
 		const char *buf, size_t count)
 {
+	struct spear_pcie_gadget_config *config = to_target(item)
 	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
 	ulong address;
 	int ret;
@@ -524,15 +515,13 @@ static ssize_t pcie_gadget_store_bar0_address(
 	return count;
 }
 
-static ssize_t pcie_gadget_show_bar0_rw_offset(
-		struct spear_pcie_gadget_config *config,
+static ssize_t pcie_gadget_bar0_rw_offset_show(struct config_item *item,
 		char *buf)
 {
-	return sprintf(buf, "%lx", config->bar0_rw_offset);
+	return sprintf(buf, "%lx", to_target(item)->bar0_rw_offset);
 }
 
-static ssize_t pcie_gadget_store_bar0_rw_offset(
-		struct spear_pcie_gadget_config *config,
+static ssize_t pcie_gadget_bar0_rw_offset_store(struct config_item *item,
 		const char *buf, size_t count)
 {
 	ulong offset;
@@ -545,15 +534,14 @@ static ssize_t pcie_gadget_store_bar0_rw_offset(
 	if (offset % 4)
 		return -EINVAL;
 
-	config->bar0_rw_offset = offset;
+	to_target(item)->bar0_rw_offset = offset;
 
 	return count;
 }
 
-static ssize_t pcie_gadget_show_bar0_data(
-		struct spear_pcie_gadget_config *config,
-		char *buf)
+static ssize_t pcie_gadget_bar0_data_show(struct config_item *item, char *buf)
 {
+	struct spear_pcie_gadget_config *config = to_target(item)
 	ulong data;
 
 	if (!config->va_bar0_address)
@@ -564,10 +552,10 @@ static ssize_t pcie_gadget_show_bar0_data(
 	return sprintf(buf, "%lx", data);
 }
 
-static ssize_t pcie_gadget_store_bar0_data(
-		struct spear_pcie_gadget_config *config,
+static ssize_t pcie_gadget_bar0_data_store(struct config_item *item,
 		const char *buf, size_t count)
 {
+	struct spear_pcie_gadget_config *config = to_target(item)
 	ulong data;
 	int ret;
 
@@ -583,97 +571,35 @@ static ssize_t pcie_gadget_store_bar0_data(
 	return count;
 }
 
-/*
- * Attribute definitions.
- */
-
-#define PCIE_GADGET_TARGET_ATTR_RO(_name)				\
-static struct pcie_gadget_target_attr pcie_gadget_target_##_name =	\
-	__CONFIGFS_ATTR(_name, S_IRUGO, pcie_gadget_show_##_name, NULL)
-
-#define PCIE_GADGET_TARGET_ATTR_WO(_name)				\
-static struct pcie_gadget_target_attr pcie_gadget_target_##_name =	\
-	__CONFIGFS_ATTR(_name, S_IWUSR, NULL, pcie_gadget_store_##_name)
-
-#define PCIE_GADGET_TARGET_ATTR_RW(_name)				\
-static struct pcie_gadget_target_attr pcie_gadget_target_##_name =	\
-	__CONFIGFS_ATTR(_name, S_IRUGO | S_IWUSR, pcie_gadget_show_##_name, \
-			pcie_gadget_store_##_name)
-PCIE_GADGET_TARGET_ATTR_RW(link);
-PCIE_GADGET_TARGET_ATTR_RW(int_type);
-PCIE_GADGET_TARGET_ATTR_RW(no_of_msi);
-PCIE_GADGET_TARGET_ATTR_WO(inta);
-PCIE_GADGET_TARGET_ATTR_WO(send_msi);
-PCIE_GADGET_TARGET_ATTR_RW(vendor_id);
-PCIE_GADGET_TARGET_ATTR_RW(device_id);
-PCIE_GADGET_TARGET_ATTR_RW(bar0_size);
-PCIE_GADGET_TARGET_ATTR_RW(bar0_address);
-PCIE_GADGET_TARGET_ATTR_RW(bar0_rw_offset);
-PCIE_GADGET_TARGET_ATTR_RW(bar0_data);
+CONFIGFS_ATTR(pcie_gadget_, link);
+CONFIGFS_ATTR(pcie_gadget_, int_type);
+CONFIGFS_ATTR(pcie_gadget_, no_of_msi);
+CONFIGFS_ATTR_WO(pcie_gadget_, inta);
+CONFIGFS_ATTR_WO(pcie_gadget_, send_msi);
+CONFIGFS_ATTR(pcie_gadget_, vendor_id);
+CONFIGFS_ATTR(pcie_gadget_, device_id);
+CONFIGFS_ATTR(pcie_gadget_, bar0_size);
+CONFIGFS_ATTR(pcie_gadget_, bar0_address);
+CONFIGFS_ATTR(pcie_gadget_, bar0_rw_offset);
+CONFIGFS_ATTR(pcie_gadget_, bar0_data);
 
 static struct configfs_attribute *pcie_gadget_target_attrs[] = {
-	&pcie_gadget_target_link.attr,
-	&pcie_gadget_target_int_type.attr,
-	&pcie_gadget_target_no_of_msi.attr,
-	&pcie_gadget_target_inta.attr,
-	&pcie_gadget_target_send_msi.attr,
-	&pcie_gadget_target_vendor_id.attr,
-	&pcie_gadget_target_device_id.attr,
-	&pcie_gadget_target_bar0_size.attr,
-	&pcie_gadget_target_bar0_address.attr,
-	&pcie_gadget_target_bar0_rw_offset.attr,
-	&pcie_gadget_target_bar0_data.attr,
+	&pcie_gadget_attr_link,
+	&pcie_gadget_attr_int_type,
+	&pcie_gadget_attr_no_of_msi,
+	&pcie_gadget_attr_inta,
+	&pcie_gadget_attr_send_msi,
+	&pcie_gadget_attr_vendor_id,
+	&pcie_gadget_attr_device_id,
+	&pcie_gadget_attr_bar0_size,
+	&pcie_gadget_attr_bar0_address,
+	&pcie_gadget_attr_bar0_rw_offset,
+	&pcie_gadget_attr_bar0_data,
 	NULL,
 };
 
-static struct pcie_gadget_target *to_target(struct config_item *item)
-{
-	return item ?
-		container_of(to_configfs_subsystem(to_config_group(item)),
-				struct pcie_gadget_target, subsys) : NULL;
-}
-
-/*
- * Item operations and type for pcie_gadget_target.
- */
-
-static ssize_t pcie_gadget_target_attr_show(struct config_item *item,
-					   struct configfs_attribute *attr,
-					   char *buf)
-{
-	ssize_t ret = -EINVAL;
-	struct pcie_gadget_target *target = to_target(item);
-	struct pcie_gadget_target_attr *t_attr =
-		container_of(attr, struct pcie_gadget_target_attr, attr);
-
-	if (t_attr->show)
-		ret = t_attr->show(&target->config, buf);
-	return ret;
-}
-
-static ssize_t pcie_gadget_target_attr_store(struct config_item *item,
-					struct configfs_attribute *attr,
-					const char *buf,
-					size_t count)
-{
-	ssize_t ret = -EINVAL;
-	struct pcie_gadget_target *target = to_target(item);
-	struct pcie_gadget_target_attr *t_attr =
-		container_of(attr, struct pcie_gadget_target_attr, attr);
-
-	if (t_attr->store)
-		ret = t_attr->store(&target->config, buf, count);
-	return ret;
-}
-
-static struct configfs_item_operations pcie_gadget_target_item_ops = {
-	.show_attribute		= pcie_gadget_target_attr_show,
-	.store_attribute	= pcie_gadget_target_attr_store,
-};
-
 static struct config_item_type pcie_gadget_target_type = {
 	.ct_attrs		= pcie_gadget_target_attrs,
-	.ct_item_ops		= &pcie_gadget_target_item_ops,
 	.ct_owner		= THIS_MODULE,
 };
 
diff --git a/kernel/drivers/misc/sram.c b/kernel/drivers/misc/sram.c
index eeaaf5fca..736dae715 100644
--- a/kernel/drivers/misc/sram.c
+++ b/kernel/drivers/misc/sram.c
@@ -18,33 +18,154 @@
  * MA 02110-1301, USA.
  */
 
-#include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/clk.h>
-#include <linux/err.h>
+#include <linux/genalloc.h>
 #include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/list.h>
 #include <linux/list_sort.h>
+#include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/genalloc.h>
 
 #define SRAM_GRANULARITY	32
 
+struct sram_partition {
+	void __iomem *base;
+
+	struct gen_pool *pool;
+	struct bin_attribute battr;
+	struct mutex lock;
+};
+
 struct sram_dev {
+	struct device *dev;
+	void __iomem *virt_base;
+
 	struct gen_pool *pool;
 	struct clk *clk;
+
+	struct sram_partition *partition;
+	u32 partitions;
 };
 
 struct sram_reserve {
 	struct list_head list;
 	u32 start;
 	u32 size;
+	bool export;
+	bool pool;
+	const char *label;
 };
 
+static ssize_t sram_read(struct file *filp, struct kobject *kobj,
+			 struct bin_attribute *attr,
+			 char *buf, loff_t pos, size_t count)
+{
+	struct sram_partition *part;
+
+	part = container_of(attr, struct sram_partition, battr);
+
+	mutex_lock(&part->lock);
+	memcpy_fromio(buf, part->base + pos, count);
+	mutex_unlock(&part->lock);
+
+	return count;
+}
+
+static ssize_t sram_write(struct file *filp, struct kobject *kobj,
+			  struct bin_attribute *attr,
+			  char *buf, loff_t pos, size_t count)
+{
+	struct sram_partition *part;
+
+	part = container_of(attr, struct sram_partition, battr);
+
+	mutex_lock(&part->lock);
+	memcpy_toio(part->base + pos, buf, count);
+	mutex_unlock(&part->lock);
+
+	return count;
+}
+
+static int sram_add_pool(struct sram_dev *sram, struct sram_reserve *block,
+			 phys_addr_t start, struct sram_partition *part)
+{
+	int ret;
+
+	part->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY),
+					  NUMA_NO_NODE, block->label);
+	if (IS_ERR(part->pool))
+		return PTR_ERR(part->pool);
+
+	ret = gen_pool_add_virt(part->pool, (unsigned long)part->base, start,
+				block->size, NUMA_NO_NODE);
+	if (ret < 0) {
+		dev_err(sram->dev, "failed to register subpool: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int sram_add_export(struct sram_dev *sram, struct sram_reserve *block,
+			   phys_addr_t start, struct sram_partition *part)
+{
+	sysfs_bin_attr_init(&part->battr);
+	part->battr.attr.name = devm_kasprintf(sram->dev, GFP_KERNEL,
+					       "%llx.sram",
+					       (unsigned long long)start);
+	if (!part->battr.attr.name)
+		return -ENOMEM;
+
+	part->battr.attr.mode = S_IRUSR | S_IWUSR;
+	part->battr.read = sram_read;
+	part->battr.write = sram_write;
+	part->battr.size = block->size;
+
+	return device_create_bin_file(sram->dev, &part->battr);
+}
+
+static int sram_add_partition(struct sram_dev *sram, struct sram_reserve *block,
+			      phys_addr_t start)
+{
+	int ret;
+	struct sram_partition *part = &sram->partition[sram->partitions];
+
+	mutex_init(&part->lock);
+	part->base = sram->virt_base + block->start;
+
+	if (block->pool) {
+		ret = sram_add_pool(sram, block, start, part);
+		if (ret)
+			return ret;
+	}
+	if (block->export) {
+		ret = sram_add_export(sram, block, start, part);
+		if (ret)
+			return ret;
+	}
+	sram->partitions++;
+
+	return 0;
+}
+
+static void sram_free_partitions(struct sram_dev *sram)
+{
+	struct sram_partition *part;
+
+	if (!sram->partitions)
+		return;
+
+	part = &sram->partition[sram->partitions - 1];
+	for (; sram->partitions; sram->partitions--, part--) {
+		if (part->battr.size)
+			device_remove_bin_file(sram->dev, &part->battr);
+
+		if (part->pool &&
+		    gen_pool_avail(part->pool) < gen_pool_size(part->pool))
+			dev_err(sram->dev, "removed pool while SRAM allocated\n");
+	}
+}
+
 static int sram_reserve_cmp(void *priv, struct list_head *a,
 					struct list_head *b)
 {
@@ -54,62 +175,28 @@ static int sram_reserve_cmp(void *priv, struct list_head *a,
 	return ra->start - rb->start;
 }
 
-static int sram_probe(struct platform_device *pdev)
+static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
 {
-	void __iomem *virt_base;
-	struct sram_dev *sram;
-	struct resource *res;
-	struct device_node *np = pdev->dev.of_node, *child;
+	struct device_node *np = sram->dev->of_node, *child;
 	unsigned long size, cur_start, cur_size;
 	struct sram_reserve *rblocks, *block;
 	struct list_head reserve_list;
-	unsigned int nblocks;
-	int ret;
+	unsigned int nblocks, exports = 0;
+	const char *label;
+	int ret = 0;
 
 	INIT_LIST_HEAD(&reserve_list);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "found no memory resource\n");
-		return -EINVAL;
-	}
-
 	size = resource_size(res);
 
-	if (!devm_request_mem_region(&pdev->dev,
-			res->start, size, pdev->name)) {
-		dev_err(&pdev->dev, "could not request region for resource\n");
-		return -EBUSY;
-	}
-
-	virt_base = devm_ioremap_wc(&pdev->dev, res->start, size);
-	if (IS_ERR(virt_base))
-		return PTR_ERR(virt_base);
-
-	sram = devm_kzalloc(&pdev->dev, sizeof(*sram), GFP_KERNEL);
-	if (!sram)
-		return -ENOMEM;
-
-	sram->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(sram->clk))
-		sram->clk = NULL;
-	else
-		clk_prepare_enable(sram->clk);
-
-	sram->pool = devm_gen_pool_create(&pdev->dev, ilog2(SRAM_GRANULARITY), -1);
-	if (!sram->pool)
-		return -ENOMEM;
-
 	/*
 	 * We need an additional block to mark the end of the memory region
 	 * after the reserved blocks from the dt are processed.
 	 */
 	nblocks = (np) ? of_get_available_child_count(np) + 1 : 1;
-	rblocks = kmalloc((nblocks) * sizeof(*rblocks), GFP_KERNEL);
-	if (!rblocks) {
-		ret = -ENOMEM;
-		goto err_alloc;
-	}
+	rblocks = kzalloc((nblocks) * sizeof(*rblocks), GFP_KERNEL);
+	if (!rblocks)
+		return -ENOMEM;
 
 	block = &rblocks[0];
 	for_each_available_child_of_node(np, child) {
@@ -117,14 +204,14 @@ static int sram_probe(struct platform_device *pdev)
 
 		ret = of_address_to_resource(child, 0, &child_res);
 		if (ret < 0) {
-			dev_err(&pdev->dev,
+			dev_err(sram->dev,
 				"could not get address for node %s\n",
 				child->full_name);
 			goto err_chunks;
 		}
 
 		if (child_res.start < res->start || child_res.end > res->end) {
-			dev_err(&pdev->dev,
+			dev_err(sram->dev,
 				"reserved block %s outside the sram area\n",
 				child->full_name);
 			ret = -EINVAL;
@@ -135,12 +222,42 @@ static int sram_probe(struct platform_device *pdev)
 		block->size = resource_size(&child_res);
 		list_add_tail(&block->list, &reserve_list);
 
-		dev_dbg(&pdev->dev, "found reserved block 0x%x-0x%x\n",
-			block->start,
-			block->start + block->size);
+		if (of_find_property(child, "export", NULL))
+			block->export = true;
+
+		if (of_find_property(child, "pool", NULL))
+			block->pool = true;
+
+		if ((block->export || block->pool) && block->size) {
+			exports++;
+
+			label = NULL;
+			ret = of_property_read_string(child, "label", &label);
+			if (ret && ret != -EINVAL) {
+				dev_err(sram->dev,
+					"%s has invalid label name\n",
+					child->full_name);
+				goto err_chunks;
+			}
+			if (!label)
+				label = child->name;
+
+			block->label = devm_kstrdup(sram->dev,
+						    label, GFP_KERNEL);
+			if (!block->label)
+				goto err_chunks;
+
+			dev_dbg(sram->dev, "found %sblock '%s' 0x%x-0x%x\n",
+				block->export ? "exported " : "", block->label,
+				block->start, block->start + block->size);
+		} else {
+			dev_dbg(sram->dev, "found reserved block 0x%x-0x%x\n",
+				block->start, block->start + block->size);
+		}
 
 		block++;
 	}
+	child = NULL;
 
 	/* the last chunk marks the end of the region */
 	rblocks[nblocks - 1].start = size;
@@ -149,18 +266,37 @@ static int sram_probe(struct platform_device *pdev)
 
 	list_sort(NULL, &reserve_list, sram_reserve_cmp);
 
-	cur_start = 0;
+	if (exports) {
+		sram->partition = devm_kzalloc(sram->dev,
+				       exports * sizeof(*sram->partition),
+				       GFP_KERNEL);
+		if (!sram->partition) {
+			ret = -ENOMEM;
+			goto err_chunks;
+		}
+	}
 
+	cur_start = 0;
 	list_for_each_entry(block, &reserve_list, list) {
 		/* can only happen if sections overlap */
 		if (block->start < cur_start) {
-			dev_err(&pdev->dev,
+			dev_err(sram->dev,
 				"block at 0x%x starts after current offset 0x%lx\n",
 				block->start, cur_start);
 			ret = -EINVAL;
+			sram_free_partitions(sram);
 			goto err_chunks;
 		}
 
+		if ((block->export || block->pool) && block->size) {
+			ret = sram_add_partition(sram, block,
+						 res->start + block->start);
+			if (ret) {
+				sram_free_partitions(sram);
+				goto err_chunks;
+			}
+		}
+
 		/* current start is in a reserved block, so continue after it */
 		if (block->start == cur_start) {
 			cur_start = block->start + block->size;
@@ -174,40 +310,91 @@ static int sram_probe(struct platform_device *pdev)
 		 */
 		cur_size = block->start - cur_start;
 
-		dev_dbg(&pdev->dev, "adding chunk 0x%lx-0x%lx\n",
+		dev_dbg(sram->dev, "adding chunk 0x%lx-0x%lx\n",
 			cur_start, cur_start + cur_size);
+
 		ret = gen_pool_add_virt(sram->pool,
-				(unsigned long)virt_base + cur_start,
+				(unsigned long)sram->virt_base + cur_start,
 				res->start + cur_start, cur_size, -1);
-		if (ret < 0)
+		if (ret < 0) {
+			sram_free_partitions(sram);
 			goto err_chunks;
+		}
 
 		/* next allocation after this reserved block */
 		cur_start = block->start + block->size;
 	}
 
+ err_chunks:
+	if (child)
+		of_node_put(child);
+
 	kfree(rblocks);
 
+	return ret;
+}
+
+static int sram_probe(struct platform_device *pdev)
+{
+	struct sram_dev *sram;
+	struct resource *res;
+	size_t size;
+	int ret;
+
+	sram = devm_kzalloc(&pdev->dev, sizeof(*sram), GFP_KERNEL);
+	if (!sram)
+		return -ENOMEM;
+
+	sram->dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(sram->dev, "found no memory resource\n");
+		return -EINVAL;
+	}
+
+	size = resource_size(res);
+
+	if (!devm_request_mem_region(sram->dev, res->start, size, pdev->name)) {
+		dev_err(sram->dev, "could not request region for resource\n");
+		return -EBUSY;
+	}
+
+	sram->virt_base = devm_ioremap_wc(sram->dev, res->start, size);
+	if (IS_ERR(sram->virt_base))
+		return PTR_ERR(sram->virt_base);
+
+	sram->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY),
+					  NUMA_NO_NODE, NULL);
+	if (IS_ERR(sram->pool))
+		return PTR_ERR(sram->pool);
+
+	ret = sram_reserve_regions(sram, res);
+	if (ret)
+		return ret;
+
+	sram->clk = devm_clk_get(sram->dev, NULL);
+	if (IS_ERR(sram->clk))
+		sram->clk = NULL;
+	else
+		clk_prepare_enable(sram->clk);
+
 	platform_set_drvdata(pdev, sram);
 
-	dev_dbg(&pdev->dev, "SRAM pool: %ld KiB @ 0x%p\n", size / 1024, virt_base);
+	dev_dbg(sram->dev, "SRAM pool: %zu KiB @ 0x%p\n",
+		gen_pool_size(sram->pool) / 1024, sram->virt_base);
 
 	return 0;
-
-err_chunks:
-	kfree(rblocks);
-err_alloc:
-	if (sram->clk)
-		clk_disable_unprepare(sram->clk);
-	return ret;
 }
 
 static int sram_remove(struct platform_device *pdev)
 {
 	struct sram_dev *sram = platform_get_drvdata(pdev);
 
+	sram_free_partitions(sram);
+
 	if (gen_pool_avail(sram->pool) < gen_pool_size(sram->pool))
-		dev_dbg(&pdev->dev, "removed while SRAM allocated\n");
+		dev_err(sram->dev, "removed while SRAM allocated\n");
 
 	if (sram->clk)
 		clk_disable_unprepare(sram->clk);
diff --git a/kernel/drivers/misc/ti-st/st_core.c b/kernel/drivers/misc/ti-st/st_core.c
index c8c6a3630..6e3af8b42 100644
--- a/kernel/drivers/misc/ti-st/st_core.c
+++ b/kernel/drivers/misc/ti-st/st_core.c
@@ -460,6 +460,13 @@ static void st_int_enqueue(struct st_data_s *st_gdata, struct sk_buff *skb)
  * - TTY layer when write's finished
  * - st_write (in context of the protocol stack)
  */
+static void work_fn_write_wakeup(struct work_struct *work)
+{
+	struct st_data_s *st_gdata = container_of(work, struct st_data_s,
+			work_write_wakeup);
+
+	st_tx_wakeup((void *)st_gdata);
+}
 void st_tx_wakeup(struct st_data_s *st_data)
 {
 	struct sk_buff *skb;
@@ -812,8 +819,12 @@ static void st_tty_wakeup(struct tty_struct *tty)
 	/* don't do an wakeup for now */
 	clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
 
-	/* call our internal wakeup */
-	st_tx_wakeup((void *)st_gdata);
+	/*
+	 * schedule the internal wakeup instead of calling directly to
+	 * avoid lockup (port->lock needed in tty->ops->write is
+	 * already taken here
+	 */
+	schedule_work(&st_gdata->work_write_wakeup);
 }
 
 static void st_tty_flush_buffer(struct tty_struct *tty)
@@ -881,6 +892,9 @@ int st_core_init(struct st_data_s **core_data)
 			pr_err("unable to un-register ldisc");
 		return err;
 	}
+
+	INIT_WORK(&st_gdata->work_write_wakeup, work_fn_write_wakeup);
+
 	*core_data = st_gdata;
 	return 0;
 }
diff --git a/kernel/drivers/misc/ti-st/st_kim.c b/kernel/drivers/misc/ti-st/st_kim.c
index 18e7a0398..71b64550b 100644
--- a/kernel/drivers/misc/ti-st/st_kim.c
+++ b/kernel/drivers/misc/ti-st/st_kim.c
@@ -36,8 +36,6 @@
 #include <linux/skbuff.h>
 #include <linux/ti_wilink_st.h>
 #include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
 
 #define MAX_ST_DEVICES	3	/* Imagine 1 on each UART for now */
 static struct platform_device *st_kim_devices[MAX_ST_DEVICES];
@@ -45,9 +43,6 @@ static struct platform_device *st_kim_devices[MAX_ST_DEVICES];
 /**********************************************************************/
 /* internal functions */
 
-struct ti_st_plat_data	*dt_pdata;
-static struct ti_st_plat_data *get_platform_data(struct device *dev);
-
 /**
  * st_get_plat_device -
  *	function which returns the reference to the platform device
@@ -469,12 +464,7 @@ long st_kim_start(void *kim_data)
 	struct kim_data_s	*kim_gdata = (struct kim_data_s *)kim_data;
 
 	pr_info(" %s", __func__);
-	if (kim_gdata->kim_pdev->dev.of_node) {
-		pr_debug("use device tree data");
-		pdata = dt_pdata;
-	} else {
-		pdata = kim_gdata->kim_pdev->dev.platform_data;
-	}
+	pdata = kim_gdata->kim_pdev->dev.platform_data;
 
 	do {
 		/* platform specific enabling code here */
@@ -482,9 +472,9 @@ long st_kim_start(void *kim_data)
 			pdata->chip_enable(kim_gdata);
 
 		/* Configure BT nShutdown to HIGH state */
-		gpio_set_value(kim_gdata->nshutdown, GPIO_LOW);
+		gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW);
 		mdelay(5);	/* FIXME: a proper toggle */
-		gpio_set_value(kim_gdata->nshutdown, GPIO_HIGH);
+		gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_HIGH);
 		mdelay(100);
 		/* re-initialize the completion */
 		reinit_completion(&kim_gdata->ldisc_installed);
@@ -534,18 +524,12 @@ long st_kim_stop(void *kim_data)
 {
 	long err = 0;
 	struct kim_data_s	*kim_gdata = (struct kim_data_s *)kim_data;
-	struct ti_st_plat_data	*pdata;
+	struct ti_st_plat_data	*pdata =
+		kim_gdata->kim_pdev->dev.platform_data;
 	struct tty_struct	*tty = kim_gdata->core_data->tty;
 
 	reinit_completion(&kim_gdata->ldisc_installed);
 
-	if (kim_gdata->kim_pdev->dev.of_node) {
-		pr_debug("use device tree data");
-		pdata = dt_pdata;
-	} else
-		pdata = kim_gdata->kim_pdev->dev.platform_data;
-
-
 	if (tty) {	/* can be called before ldisc is installed */
 		/* Flush any pending characters in the driver and discipline. */
 		tty_ldisc_flush(tty);
@@ -566,11 +550,11 @@ long st_kim_stop(void *kim_data)
 	}
 
 	/* By default configure BT nShutdown to LOW state */
-	gpio_set_value(kim_gdata->nshutdown, GPIO_LOW);
+	gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW);
 	mdelay(1);
-	gpio_set_value(kim_gdata->nshutdown, GPIO_HIGH);
+	gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_HIGH);
 	mdelay(1);
-	gpio_set_value(kim_gdata->nshutdown, GPIO_LOW);
+	gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW);
 
 	/* platform specific disable */
 	if (pdata->chip_disable)
@@ -737,53 +721,13 @@ static const struct file_operations list_debugfs_fops = {
  * board-*.c file
  */
 
-static const struct of_device_id kim_of_match[] = {
-{
-	.compatible = "kim",
-	},
-	{}
-};
-MODULE_DEVICE_TABLE(of, kim_of_match);
-
-static struct ti_st_plat_data *get_platform_data(struct device *dev)
-{
-	struct device_node *np = dev->of_node;
-	const u32 *dt_property;
-	int len;
-
-	dt_pdata = kzalloc(sizeof(*dt_pdata), GFP_KERNEL);
-
-	if (!dt_pdata)
-		pr_err("Can't allocate device_tree platform data\n");
-
-	dt_property = of_get_property(np, "dev_name", &len);
-	if (dt_property)
-		memcpy(&dt_pdata->dev_name, dt_property, len);
-	of_property_read_u32(np, "nshutdown_gpio",
-			     &dt_pdata->nshutdown_gpio);
-	of_property_read_u32(np, "flow_cntrl", &dt_pdata->flow_cntrl);
-	of_property_read_u32(np, "baud_rate", &dt_pdata->baud_rate);
-
-	return dt_pdata;
-}
-
 static struct dentry *kim_debugfs_dir;
 static int kim_probe(struct platform_device *pdev)
 {
 	struct kim_data_s	*kim_gdata;
-	struct ti_st_plat_data	*pdata;
+	struct ti_st_plat_data	*pdata = pdev->dev.platform_data;
 	int err;
 
-	if (pdev->dev.of_node)
-		pdata = get_platform_data(&pdev->dev);
-	else
-		pdata = pdev->dev.platform_data;
-
-	if (pdata == NULL) {
-		dev_err(&pdev->dev, "Platform Data is missing\n");
-		return -ENXIO;
-	}
-
 	if ((pdev->id != -1) && (pdev->id < MAX_ST_DEVICES)) {
 		/* multiple devices could exist */
 		st_kim_devices[pdev->id] = pdev;
@@ -864,16 +808,9 @@ err_core_init:
 static int kim_remove(struct platform_device *pdev)
 {
 	/* free the GPIOs requested */
-	struct ti_st_plat_data	*pdata;
+	struct ti_st_plat_data	*pdata = pdev->dev.platform_data;
 	struct kim_data_s	*kim_gdata;
 
-	if (pdev->dev.of_node) {
-		pr_debug("use device tree data");
-		pdata = dt_pdata;
-	} else {
-		pdata = pdev->dev.platform_data;
-	}
-
 	kim_gdata = platform_get_drvdata(pdev);
 
 	/* Free the Bluetooth/FM/GPIO
@@ -891,22 +828,12 @@ static int kim_remove(struct platform_device *pdev)
 
 	kfree(kim_gdata);
 	kim_gdata = NULL;
-	kfree(dt_pdata);
-	dt_pdata = NULL;
-
 	return 0;
 }
 
 static int kim_suspend(struct platform_device *pdev, pm_message_t state)
 {
-	struct ti_st_plat_data	*pdata;
-
-	if (pdev->dev.of_node) {
-		pr_debug("use device tree data");
-		pdata = dt_pdata;
-	} else {
-		pdata = pdev->dev.platform_data;
-	}
+	struct ti_st_plat_data	*pdata = pdev->dev.platform_data;
 
 	if (pdata->suspend)
 		return pdata->suspend(pdev, state);
@@ -916,14 +843,7 @@ static int kim_suspend(struct platform_device *pdev, pm_message_t state)
 
 static int kim_resume(struct platform_device *pdev)
 {
-	struct ti_st_plat_data	*pdata;
-
-	if (pdev->dev.of_node) {
-		pr_debug("use device tree data");
-		pdata = dt_pdata;
-	} else {
-		pdata = pdev->dev.platform_data;
-	}
+	struct ti_st_plat_data	*pdata = pdev->dev.platform_data;
 
 	if (pdata->resume)
 		return pdata->resume(pdev);
@@ -940,8 +860,6 @@ static struct platform_driver kim_platform_driver = {
 	.resume = kim_resume,
 	.driver = {
 		.name = "kim",
-		.owner = THIS_MODULE,
-		.of_match_table = of_match_ptr(kim_of_match),
 	},
 };
 
diff --git a/kernel/drivers/misc/ti-st/st_ll.c b/kernel/drivers/misc/ti-st/st_ll.c
index 518e1b7f2..93b4d67cc 100644
--- a/kernel/drivers/misc/ti-st/st_ll.c
+++ b/kernel/drivers/misc/ti-st/st_ll.c
@@ -26,7 +26,6 @@
 #include <linux/ti_wilink_st.h>
 
 /**********************************************************************/
-
 /* internal functions */
 static void send_ll_cmd(struct st_data_s *st_data,
 	unsigned char cmd)
@@ -54,13 +53,7 @@ static void ll_device_want_to_sleep(struct st_data_s *st_data)
 
 	/* communicate to platform about chip asleep */
 	kim_data = st_data->kim_data;
-	if (kim_data->kim_pdev->dev.of_node) {
-		pr_debug("use device tree data");
-		pdata = dt_pdata;
-	} else {
-		pdata = kim_data->kim_pdev->dev.platform_data;
-	}
-
+	pdata = kim_data->kim_pdev->dev.platform_data;
 	if (pdata->chip_asleep)
 		pdata->chip_asleep(NULL);
 }
@@ -93,13 +86,7 @@ static void ll_device_want_to_wakeup(struct st_data_s *st_data)
 
 	/* communicate to platform about chip wakeup */
 	kim_data = st_data->kim_data;
-	if (kim_data->kim_pdev->dev.of_node) {
-		pr_debug("use device tree data");
-		pdata = dt_pdata;
-	} else {
-		pdata = kim_data->kim_pdev->dev.platform_data;
-	}
-
+	pdata = kim_data->kim_pdev->dev.platform_data;
 	if (pdata->chip_awake)
 		pdata->chip_awake(NULL);
 }
diff --git a/kernel/drivers/misc/ti_dac7512.c b/kernel/drivers/misc/ti_dac7512.c
index cb0289b44..f5456fb7d 100644
--- a/kernel/drivers/misc/ti_dac7512.c
+++ b/kernel/drivers/misc/ti_dac7512.c
@@ -89,7 +89,6 @@ MODULE_DEVICE_TABLE(of, dac7512_of_match);
 static struct spi_driver dac7512_driver = {
 	.driver = {
 		.name	= "dac7512",
-		.owner	= THIS_MODULE,
 		.of_match_table = of_match_ptr(dac7512_of_match),
 	},
 	.probe	= dac7512_probe,
diff --git a/kernel/drivers/misc/tsl2550.c b/kernel/drivers/misc/tsl2550.c
index b00335652..87a13374f 100644
--- a/kernel/drivers/misc/tsl2550.c
+++ b/kernel/drivers/misc/tsl2550.c
@@ -446,7 +446,6 @@ MODULE_DEVICE_TABLE(i2c, tsl2550_id);
 static struct i2c_driver tsl2550_driver = {
 	.driver = {
 		.name	= TSL2550_DRV_NAME,
-		.owner	= THIS_MODULE,
 		.pm	= TSL2550_PM_OPS,
 	},
 	.probe	= tsl2550_probe,
diff --git a/kernel/drivers/misc/vmw_balloon.c b/kernel/drivers/misc/vmw_balloon.c
index 191617492..1e688bfec 100644
--- a/kernel/drivers/misc/vmw_balloon.c
+++ b/kernel/drivers/misc/vmw_balloon.c
@@ -1,7 +1,7 @@
 /*
  * VMware Balloon driver.
  *
- * Copyright (C) 2000-2010, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2000-2014, VMware, Inc. All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
@@ -37,16 +37,19 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/vmalloc.h>
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/workqueue.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
 #include <asm/hypervisor.h>
 
 MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
-MODULE_VERSION("1.2.1.3-k");
+MODULE_VERSION("1.5.0.0-k");
 MODULE_ALIAS("dmi:*:svnVMware*:*");
 MODULE_ALIAS("vmware_vmmemctl");
 MODULE_LICENSE("GPL");
@@ -57,12 +60,6 @@ MODULE_LICENSE("GPL");
  */
 
 /*
- * Rate of allocating memory when there is no memory pressure
- * (driver performs non-sleeping allocations).
- */
-#define VMW_BALLOON_NOSLEEP_ALLOC_MAX	16384U
-
-/*
  * Rates of memory allocaton when guest experiences memory pressure
  * (driver performs sleeping allocations).
  */
@@ -71,13 +68,6 @@ MODULE_LICENSE("GPL");
 #define VMW_BALLOON_RATE_ALLOC_INC	16U
 
 /*
- * Rates for releasing pages while deflating balloon.
- */
-#define VMW_BALLOON_RATE_FREE_MIN	512U
-#define VMW_BALLOON_RATE_FREE_MAX	16384U
-#define VMW_BALLOON_RATE_FREE_INC	16U
-
-/*
  * When guest is under memory pressure, use a reduced page allocation
  * rate for next several cycles.
  */
@@ -85,7 +75,7 @@ MODULE_LICENSE("GPL");
 
 /*
  * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
- * allow wait (__GFP_WAIT) for NOSLEEP page allocations. Use
+ * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use
  * __GFP_NOWARN, to suppress page allocation failure warnings.
  */
 #define VMW_PAGE_ALLOC_NOSLEEP		(__GFP_HIGHMEM|__GFP_NOWARN)
@@ -99,9 +89,6 @@ MODULE_LICENSE("GPL");
  */
 #define VMW_PAGE_ALLOC_CANSLEEP		(GFP_HIGHUSER)
 
-/* Maximum number of page allocations without yielding processor */
-#define VMW_BALLOON_YIELD_THRESHOLD	1024
-
 /* Maximum number of refused pages we accumulate during inflation cycle */
 #define VMW_BALLOON_MAX_REFUSED		16
 
@@ -110,68 +97,154 @@ MODULE_LICENSE("GPL");
  */
 #define VMW_BALLOON_HV_PORT		0x5670
 #define VMW_BALLOON_HV_MAGIC		0x456c6d6f
-#define VMW_BALLOON_PROTOCOL_VERSION	2
 #define VMW_BALLOON_GUEST_ID		1	/* Linux */
 
-#define VMW_BALLOON_CMD_START		0
-#define VMW_BALLOON_CMD_GET_TARGET	1
-#define VMW_BALLOON_CMD_LOCK		2
-#define VMW_BALLOON_CMD_UNLOCK		3
-#define VMW_BALLOON_CMD_GUEST_ID	4
+enum vmwballoon_capabilities {
+	/*
+	 * Bit 0 is reserved and not associated to any capability.
+	 */
+	VMW_BALLOON_BASIC_CMDS			= (1 << 1),
+	VMW_BALLOON_BATCHED_CMDS		= (1 << 2),
+	VMW_BALLOON_BATCHED_2M_CMDS		= (1 << 3),
+	VMW_BALLOON_SIGNALLED_WAKEUP_CMD	= (1 << 4),
+};
+
+#define VMW_BALLOON_CAPABILITIES	(VMW_BALLOON_BASIC_CMDS \
+					| VMW_BALLOON_BATCHED_CMDS \
+					| VMW_BALLOON_BATCHED_2M_CMDS \
+					| VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
+
+#define VMW_BALLOON_2M_SHIFT		(9)
+#define VMW_BALLOON_NUM_PAGE_SIZES	(2)
+
+/*
+ * Backdoor commands availability:
+ *
+ * START, GET_TARGET and GUEST_ID are always available,
+ *
+ * VMW_BALLOON_BASIC_CMDS:
+ *	LOCK and UNLOCK commands,
+ * VMW_BALLOON_BATCHED_CMDS:
+ *	BATCHED_LOCK and BATCHED_UNLOCK commands.
+ * VMW BALLOON_BATCHED_2M_CMDS:
+ *	BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands,
+ * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD:
+ *	VMW_BALLOON_CMD_VMCI_DOORBELL_SET command.
+ */
+#define VMW_BALLOON_CMD_START			0
+#define VMW_BALLOON_CMD_GET_TARGET		1
+#define VMW_BALLOON_CMD_LOCK			2
+#define VMW_BALLOON_CMD_UNLOCK			3
+#define VMW_BALLOON_CMD_GUEST_ID		4
+#define VMW_BALLOON_CMD_BATCHED_LOCK		6
+#define VMW_BALLOON_CMD_BATCHED_UNLOCK		7
+#define VMW_BALLOON_CMD_BATCHED_2M_LOCK		8
+#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK	9
+#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET	10
+
 
 /* error codes */
-#define VMW_BALLOON_SUCCESS		0
-#define VMW_BALLOON_FAILURE		-1
-#define VMW_BALLOON_ERROR_CMD_INVALID	1
-#define VMW_BALLOON_ERROR_PPN_INVALID	2
-#define VMW_BALLOON_ERROR_PPN_LOCKED	3
-#define VMW_BALLOON_ERROR_PPN_UNLOCKED	4
-#define VMW_BALLOON_ERROR_PPN_PINNED	5
-#define VMW_BALLOON_ERROR_PPN_NOTNEEDED	6
-#define VMW_BALLOON_ERROR_RESET		7
-#define VMW_BALLOON_ERROR_BUSY		8
-
-#define VMWARE_BALLOON_CMD(cmd, data, result)		\
-({							\
-	unsigned long __stat, __dummy1, __dummy2;	\
-	__asm__ __volatile__ ("inl %%dx" :		\
-		"=a"(__stat),				\
-		"=c"(__dummy1),				\
-		"=d"(__dummy2),				\
-		"=b"(result) :				\
-		"0"(VMW_BALLOON_HV_MAGIC),		\
-		"1"(VMW_BALLOON_CMD_##cmd),		\
-		"2"(VMW_BALLOON_HV_PORT),		\
-		"3"(data) :				\
-		"memory");				\
-	result &= -1UL;					\
-	__stat & -1UL;					\
+#define VMW_BALLOON_SUCCESS		        0
+#define VMW_BALLOON_FAILURE		        -1
+#define VMW_BALLOON_ERROR_CMD_INVALID	        1
+#define VMW_BALLOON_ERROR_PPN_INVALID	        2
+#define VMW_BALLOON_ERROR_PPN_LOCKED	        3
+#define VMW_BALLOON_ERROR_PPN_UNLOCKED	        4
+#define VMW_BALLOON_ERROR_PPN_PINNED	        5
+#define VMW_BALLOON_ERROR_PPN_NOTNEEDED	        6
+#define VMW_BALLOON_ERROR_RESET		        7
+#define VMW_BALLOON_ERROR_BUSY		        8
+
+#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES	(0x03000000)
+
+/* Batch page description */
+
+/*
+ * Layout of a page in the batch page:
+ *
+ * +-------------+----------+--------+
+ * |             |          |        |
+ * | Page number | Reserved | Status |
+ * |             |          |        |
+ * +-------------+----------+--------+
+ * 64  PAGE_SHIFT          6         0
+ *
+ * The reserved field should be set to 0.
+ */
+#define VMW_BALLOON_BATCH_MAX_PAGES	(PAGE_SIZE / sizeof(u64))
+#define VMW_BALLOON_BATCH_STATUS_MASK	((1UL << 5) - 1)
+#define VMW_BALLOON_BATCH_PAGE_MASK	(~((1UL << PAGE_SHIFT) - 1))
+
+struct vmballoon_batch_page {
+	u64 pages[VMW_BALLOON_BATCH_MAX_PAGES];
+};
+
+static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx)
+{
+	return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK;
+}
+
+static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch,
+				int idx)
+{
+	return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK);
+}
+
+static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx,
+				u64 pa)
+{
+	batch->pages[idx] = pa;
+}
+
+
+#define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result)		\
+({								\
+	unsigned long __status, __dummy1, __dummy2, __dummy3;	\
+	__asm__ __volatile__ ("inl %%dx" :			\
+		"=a"(__status),					\
+		"=c"(__dummy1),					\
+		"=d"(__dummy2),					\
+		"=b"(result),					\
+		"=S" (__dummy3) :				\
+		"0"(VMW_BALLOON_HV_MAGIC),			\
+		"1"(VMW_BALLOON_CMD_##cmd),			\
+		"2"(VMW_BALLOON_HV_PORT),			\
+		"3"(arg1),					\
+		"4" (arg2) :					\
+		"memory");					\
+	if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START)	\
+		result = __dummy1;				\
+	result &= -1UL;						\
+	__status & -1UL;					\
 })
 
 #ifdef CONFIG_DEBUG_FS
 struct vmballoon_stats {
 	unsigned int timer;
+	unsigned int doorbell;
 
 	/* allocation statistics */
-	unsigned int alloc;
-	unsigned int alloc_fail;
+	unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES];
+	unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES];
 	unsigned int sleep_alloc;
 	unsigned int sleep_alloc_fail;
-	unsigned int refused_alloc;
-	unsigned int refused_free;
-	unsigned int free;
+	unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES];
+	unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES];
+	unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES];
 
 	/* monitor operations */
-	unsigned int lock;
-	unsigned int lock_fail;
-	unsigned int unlock;
-	unsigned int unlock_fail;
+	unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES];
+	unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
+	unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES];
+	unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
 	unsigned int target;
 	unsigned int target_fail;
 	unsigned int start;
 	unsigned int start_fail;
 	unsigned int guest_type;
 	unsigned int guest_type_fail;
+	unsigned int doorbell_set;
+	unsigned int doorbell_unset;
 };
 
 #define STATS_INC(stat) (stat)++
@@ -179,14 +252,30 @@ struct vmballoon_stats {
 #define STATS_INC(stat)
 #endif
 
-struct vmballoon {
+struct vmballoon;
 
+struct vmballoon_ops {
+	void (*add_page)(struct vmballoon *b, int idx, struct page *p);
+	int (*lock)(struct vmballoon *b, unsigned int num_pages,
+			bool is_2m_pages, unsigned int *target);
+	int (*unlock)(struct vmballoon *b, unsigned int num_pages,
+			bool is_2m_pages, unsigned int *target);
+};
+
+struct vmballoon_page_size {
 	/* list of reserved physical pages */
 	struct list_head pages;
 
 	/* transient list of non-balloonable pages */
 	struct list_head refused_pages;
 	unsigned int n_refused_pages;
+};
+
+struct vmballoon {
+	struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];
+
+	/* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */
+	unsigned supported_page_sizes;
 
 	/* balloon size in pages */
 	unsigned int size;
@@ -197,11 +286,18 @@ struct vmballoon {
 
 	/* adjustment rates (pages per second) */
 	unsigned int rate_alloc;
-	unsigned int rate_free;
 
 	/* slowdown page allocations for next few cycles */
 	unsigned int slow_allocation_cycles;
 
+	unsigned long capabilities;
+
+	struct vmballoon_batch_page *batch_page;
+	unsigned int batch_max_pages;
+	struct page *page;
+
+	const struct vmballoon_ops *ops;
+
 #ifdef CONFIG_DEBUG_FS
 	/* statistics */
 	struct vmballoon_stats stats;
@@ -213,6 +309,8 @@ struct vmballoon {
 	struct sysinfo sysinfo;
 
 	struct delayed_work dwork;
+
+	struct vmci_handle vmci_doorbell;
 };
 
 static struct vmballoon balloon;
@@ -221,19 +319,38 @@ static struct vmballoon balloon;
  * Send "start" command to the host, communicating supported version
  * of the protocol.
  */
-static bool vmballoon_send_start(struct vmballoon *b)
+static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
 {
-	unsigned long status, dummy;
+	unsigned long status, capabilities, dummy = 0;
+	bool success;
 
 	STATS_INC(b->stats.start);
 
-	status = VMWARE_BALLOON_CMD(START, VMW_BALLOON_PROTOCOL_VERSION, dummy);
-	if (status == VMW_BALLOON_SUCCESS)
-		return true;
+	status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities);
 
-	pr_debug("%s - failed, hv returns %ld\n", __func__, status);
-	STATS_INC(b->stats.start_fail);
-	return false;
+	switch (status) {
+	case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
+		b->capabilities = capabilities;
+		success = true;
+		break;
+	case VMW_BALLOON_SUCCESS:
+		b->capabilities = VMW_BALLOON_BASIC_CMDS;
+		success = true;
+		break;
+	default:
+		success = false;
+	}
+
+	if (b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS)
+		b->supported_page_sizes = 2;
+	else
+		b->supported_page_sizes = 1;
+
+	if (!success) {
+		pr_debug("%s - failed, hv returns %ld\n", __func__, status);
+		STATS_INC(b->stats.start_fail);
+	}
+	return success;
 }
 
 static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
@@ -259,9 +376,10 @@ static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
  */
 static bool vmballoon_send_guest_id(struct vmballoon *b)
 {
-	unsigned long status, dummy;
+	unsigned long status, dummy = 0;
 
-	status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy);
+	status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy,
+				dummy);
 
 	STATS_INC(b->stats.guest_type);
 
@@ -273,6 +391,14 @@ static bool vmballoon_send_guest_id(struct vmballoon *b)
 	return false;
 }
 
+static u16 vmballoon_page_size(bool is_2m_page)
+{
+	if (is_2m_page)
+		return 1 << VMW_BALLOON_2M_SHIFT;
+
+	return 1;
+}
+
 /*
  * Retrieve desired balloon size from the host.
  */
@@ -281,6 +407,7 @@ static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
 	unsigned long status;
 	unsigned long target;
 	unsigned long limit;
+	unsigned long dummy = 0;
 	u32 limit32;
 
 	/*
@@ -299,7 +426,7 @@ static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
 	/* update stats */
 	STATS_INC(b->stats.target);
 
-	status = VMWARE_BALLOON_CMD(GET_TARGET, limit, target);
+	status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target);
 	if (vmballoon_check_status(b, status)) {
 		*new_target = target;
 		return true;
@@ -316,23 +443,46 @@ static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
  * check the return value and maybe submit a different page.
  */
 static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
-				     unsigned int *hv_status)
+				unsigned int *hv_status, unsigned int *target)
 {
-	unsigned long status, dummy;
+	unsigned long status, dummy = 0;
 	u32 pfn32;
 
 	pfn32 = (u32)pfn;
 	if (pfn32 != pfn)
 		return -1;
 
-	STATS_INC(b->stats.lock);
+	STATS_INC(b->stats.lock[false]);
 
-	*hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy);
+	*hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target);
 	if (vmballoon_check_status(b, status))
 		return 0;
 
 	pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
-	STATS_INC(b->stats.lock_fail);
+	STATS_INC(b->stats.lock_fail[false]);
+	return 1;
+}
+
+static int vmballoon_send_batched_lock(struct vmballoon *b,
+		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
+{
+	unsigned long status;
+	unsigned long pfn = page_to_pfn(b->page);
+
+	STATS_INC(b->stats.lock[is_2m_pages]);
+
+	if (is_2m_pages)
+		status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages,
+				*target);
+	else
+		status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages,
+				*target);
+
+	if (vmballoon_check_status(b, status))
+		return 0;
+
+	pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
+	STATS_INC(b->stats.lock_fail[is_2m_pages]);
 	return 1;
 }
 
@@ -340,26 +490,66 @@ static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
  * Notify the host that guest intends to release given page back into
  * the pool of available (to the guest) pages.
  */
-static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn)
+static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn,
+							unsigned int *target)
 {
-	unsigned long status, dummy;
+	unsigned long status, dummy = 0;
 	u32 pfn32;
 
 	pfn32 = (u32)pfn;
 	if (pfn32 != pfn)
 		return false;
 
-	STATS_INC(b->stats.unlock);
+	STATS_INC(b->stats.unlock[false]);
 
-	status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy);
+	status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target);
 	if (vmballoon_check_status(b, status))
 		return true;
 
 	pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
-	STATS_INC(b->stats.unlock_fail);
+	STATS_INC(b->stats.unlock_fail[false]);
+	return false;
+}
+
+static bool vmballoon_send_batched_unlock(struct vmballoon *b,
+		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
+{
+	unsigned long status;
+	unsigned long pfn = page_to_pfn(b->page);
+
+	STATS_INC(b->stats.unlock[is_2m_pages]);
+
+	if (is_2m_pages)
+		status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages,
+				*target);
+	else
+		status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages,
+				*target);
+
+	if (vmballoon_check_status(b, status))
+		return true;
+
+	pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
+	STATS_INC(b->stats.unlock_fail[is_2m_pages]);
 	return false;
 }
 
+static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page)
+{
+	if (is_2m_page)
+		return alloc_pages(flags, VMW_BALLOON_2M_SHIFT);
+
+	return alloc_page(flags);
+}
+
+static void vmballoon_free_page(struct page *page, bool is_2m_page)
+{
+	if (is_2m_page)
+		__free_pages(page, VMW_BALLOON_2M_SHIFT);
+	else
+		__free_page(page);
+}
+
 /*
  * Quickly release all pages allocated for the balloon. This function is
  * called when host decides to "reset" balloon for one reason or another.
@@ -369,91 +559,75 @@ static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn)
 static void vmballoon_pop(struct vmballoon *b)
 {
 	struct page *page, *next;
-	unsigned int count = 0;
-
-	list_for_each_entry_safe(page, next, &b->pages, lru) {
-		list_del(&page->lru);
-		__free_page(page);
-		STATS_INC(b->stats.free);
-		b->size--;
-
-		if (++count >= b->rate_free) {
-			count = 0;
+	unsigned is_2m_pages;
+
+	for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
+			is_2m_pages++) {
+		struct vmballoon_page_size *page_size =
+				&b->page_sizes[is_2m_pages];
+		u16 size_per_page = vmballoon_page_size(is_2m_pages);
+
+		list_for_each_entry_safe(page, next, &page_size->pages, lru) {
+			list_del(&page->lru);
+			vmballoon_free_page(page, is_2m_pages);
+			STATS_INC(b->stats.free[is_2m_pages]);
+			b->size -= size_per_page;
 			cond_resched();
 		}
 	}
-}
 
-/*
- * Perform standard reset sequence by popping the balloon (in case it
- * is not  empty) and then restarting protocol. This operation normally
- * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
- */
-static void vmballoon_reset(struct vmballoon *b)
-{
-	/* free all pages, skipping monitor unlock */
-	vmballoon_pop(b);
+	if (b->batch_page) {
+		vunmap(b->batch_page);
+		b->batch_page = NULL;
+	}
 
-	if (vmballoon_send_start(b)) {
-		b->reset_required = false;
-		if (!vmballoon_send_guest_id(b))
-			pr_err("failed to send guest ID to the host\n");
+	if (b->page) {
+		__free_page(b->page);
+		b->page = NULL;
 	}
 }
 
 /*
- * Allocate (or reserve) a page for the balloon and notify the host.  If host
- * refuses the page put it on "refuse" list and allocate another one until host
- * is satisfied. "Refused" pages are released at the end of inflation cycle
- * (when we allocate b->rate_alloc pages).
+ * Notify the host of a ballooned page. If host rejects the page put it on the
+ * refuse list, those refused page are then released at the end of the
+ * inflation cycle.
  */
-static int vmballoon_reserve_page(struct vmballoon *b, bool can_sleep)
+static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
+				bool is_2m_pages, unsigned int *target)
 {
-	struct page *page;
-	gfp_t flags;
-	unsigned int hv_status;
-	int locked;
-	flags = can_sleep ? VMW_PAGE_ALLOC_CANSLEEP : VMW_PAGE_ALLOC_NOSLEEP;
-
-	do {
-		if (!can_sleep)
-			STATS_INC(b->stats.alloc);
-		else
-			STATS_INC(b->stats.sleep_alloc);
+	int locked, hv_status;
+	struct page *page = b->page;
+	struct vmballoon_page_size *page_size = &b->page_sizes[false];
 
-		page = alloc_page(flags);
-		if (!page) {
-			if (!can_sleep)
-				STATS_INC(b->stats.alloc_fail);
-			else
-				STATS_INC(b->stats.sleep_alloc_fail);
-			return -ENOMEM;
-		}
+	/* is_2m_pages can never happen as 2m pages support implies batching */
 
-		/* inform monitor */
-		locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status);
-		if (locked > 0) {
-			STATS_INC(b->stats.refused_alloc);
+	locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status,
+								target);
+	if (locked > 0) {
+		STATS_INC(b->stats.refused_alloc[false]);
 
-			if (hv_status == VMW_BALLOON_ERROR_RESET ||
-			    hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) {
-				__free_page(page);
-				return -EIO;
-			}
+		if (hv_status == VMW_BALLOON_ERROR_RESET ||
+				hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) {
+			vmballoon_free_page(page, false);
+			return -EIO;
+		}
 
-			/*
-			 * Place page on the list of non-balloonable pages
-			 * and retry allocation, unless we already accumulated
-			 * too many of them, in which case take a breather.
-			 */
-			list_add(&page->lru, &b->refused_pages);
-			if (++b->n_refused_pages >= VMW_BALLOON_MAX_REFUSED)
-				return -EIO;
+		/*
+		 * Place page on the list of non-balloonable pages
+		 * and retry allocation, unless we already accumulated
+		 * too many of them, in which case take a breather.
+		 */
+		if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
+			page_size->n_refused_pages++;
+			list_add(&page->lru, &page_size->refused_pages);
+		} else {
+			vmballoon_free_page(page, false);
 		}
-	} while (locked != 0);
+		return -EIO;
+	}
 
 	/* track allocated page */
-	list_add(&page->lru, &b->pages);
+	list_add(&page->lru, &page_size->pages);
 
 	/* update balloon size */
 	b->size++;
@@ -461,21 +635,81 @@ static int vmballoon_reserve_page(struct vmballoon *b, bool can_sleep)
 	return 0;
 }
 
+static int vmballoon_lock_batched_page(struct vmballoon *b,
+		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
+{
+	int locked, i;
+	u16 size_per_page = vmballoon_page_size(is_2m_pages);
+
+	locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages,
+			target);
+	if (locked > 0) {
+		for (i = 0; i < num_pages; i++) {
+			u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
+			struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
+
+			vmballoon_free_page(p, is_2m_pages);
+		}
+
+		return -EIO;
+	}
+
+	for (i = 0; i < num_pages; i++) {
+		u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
+		struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
+		struct vmballoon_page_size *page_size =
+				&b->page_sizes[is_2m_pages];
+
+		locked = vmballoon_batch_get_status(b->batch_page, i);
+
+		switch (locked) {
+		case VMW_BALLOON_SUCCESS:
+			list_add(&p->lru, &page_size->pages);
+			b->size += size_per_page;
+			break;
+		case VMW_BALLOON_ERROR_PPN_PINNED:
+		case VMW_BALLOON_ERROR_PPN_INVALID:
+			if (page_size->n_refused_pages
+					< VMW_BALLOON_MAX_REFUSED) {
+				list_add(&p->lru, &page_size->refused_pages);
+				page_size->n_refused_pages++;
+				break;
+			}
+			/* Fallthrough */
+		case VMW_BALLOON_ERROR_RESET:
+		case VMW_BALLOON_ERROR_PPN_NOTNEEDED:
+			vmballoon_free_page(p, is_2m_pages);
+			break;
+		default:
+			/* This should never happen */
+			WARN_ON_ONCE(true);
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Release the page allocated for the balloon. Note that we first notify
  * the host so it can make sure the page will be available for the guest
  * to use, if needed.
  */
-static int vmballoon_release_page(struct vmballoon *b, struct page *page)
+static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages,
+		bool is_2m_pages, unsigned int *target)
 {
-	if (!vmballoon_send_unlock_page(b, page_to_pfn(page)))
-		return -EIO;
+	struct page *page = b->page;
+	struct vmballoon_page_size *page_size = &b->page_sizes[false];
 
-	list_del(&page->lru);
+	/* is_2m_pages can never happen as 2m pages support implies batching */
+
+	if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) {
+		list_add(&page->lru, &page_size->pages);
+		return -EIO;
+	}
 
 	/* deallocate page */
-	__free_page(page);
-	STATS_INC(b->stats.free);
+	vmballoon_free_page(page, false);
+	STATS_INC(b->stats.free[false]);
 
 	/* update balloon size */
 	b->size--;
@@ -483,21 +717,76 @@ static int vmballoon_release_page(struct vmballoon *b, struct page *page)
 	return 0;
 }
 
+static int vmballoon_unlock_batched_page(struct vmballoon *b,
+				unsigned int num_pages, bool is_2m_pages,
+				unsigned int *target)
+{
+	int locked, i, ret = 0;
+	bool hv_success;
+	u16 size_per_page = vmballoon_page_size(is_2m_pages);
+
+	hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages,
+			target);
+	if (!hv_success)
+		ret = -EIO;
+
+	for (i = 0; i < num_pages; i++) {
+		u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
+		struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
+		struct vmballoon_page_size *page_size =
+				&b->page_sizes[is_2m_pages];
+
+		locked = vmballoon_batch_get_status(b->batch_page, i);
+		if (!hv_success || locked != VMW_BALLOON_SUCCESS) {
+			/*
+			 * That page wasn't successfully unlocked by the
+			 * hypervisor, re-add it to the list of pages owned by
+			 * the balloon driver.
+			 */
+			list_add(&p->lru, &page_size->pages);
+		} else {
+			/* deallocate page */
+			vmballoon_free_page(p, is_2m_pages);
+			STATS_INC(b->stats.free[is_2m_pages]);
+
+			/* update balloon size */
+			b->size -= size_per_page;
+		}
+	}
+
+	return ret;
+}
+
 /*
  * Release pages that were allocated while attempting to inflate the
  * balloon but were refused by the host for one reason or another.
  */
-static void vmballoon_release_refused_pages(struct vmballoon *b)
+static void vmballoon_release_refused_pages(struct vmballoon *b,
+		bool is_2m_pages)
 {
 	struct page *page, *next;
+	struct vmballoon_page_size *page_size =
+			&b->page_sizes[is_2m_pages];
 
-	list_for_each_entry_safe(page, next, &b->refused_pages, lru) {
+	list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) {
 		list_del(&page->lru);
-		__free_page(page);
-		STATS_INC(b->stats.refused_free);
+		vmballoon_free_page(page, is_2m_pages);
+		STATS_INC(b->stats.refused_free[is_2m_pages]);
 	}
 
-	b->n_refused_pages = 0;
+	page_size->n_refused_pages = 0;
+}
+
+static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
+{
+	b->page = p;
+}
+
+static void vmballoon_add_batched_page(struct vmballoon *b, int idx,
+				struct page *p)
+{
+	vmballoon_batch_set_pa(b->batch_page, idx,
+			(u64)page_to_pfn(p) << PAGE_SHIFT);
 }
 
 /*
@@ -507,12 +796,12 @@ static void vmballoon_release_refused_pages(struct vmballoon *b)
  */
 static void vmballoon_inflate(struct vmballoon *b)
 {
-	unsigned int goal;
-	unsigned int rate;
-	unsigned int i;
+	unsigned rate;
 	unsigned int allocations = 0;
+	unsigned int num_pages = 0;
 	int error = 0;
-	bool alloc_can_sleep = false;
+	gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP;
+	bool is_2m_pages;
 
 	pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
 
@@ -531,31 +820,51 @@ static void vmballoon_inflate(struct vmballoon *b)
 	 * slowdown page allocations considerably.
 	 */
 
-	goal = b->target - b->size;
 	/*
 	 * Start with no sleep allocation rate which may be higher
 	 * than sleeping allocation rate.
 	 */
-	rate = b->slow_allocation_cycles ?
-			b->rate_alloc : VMW_BALLOON_NOSLEEP_ALLOC_MAX;
+	if (b->slow_allocation_cycles) {
+		rate = b->rate_alloc;
+		is_2m_pages = false;
+	} else {
+		rate = UINT_MAX;
+		is_2m_pages =
+			b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES;
+	}
+
+	pr_debug("%s - goal: %d, no-sleep rate: %u, sleep rate: %d\n",
+		 __func__, b->target - b->size, rate, b->rate_alloc);
+
+	while (!b->reset_required &&
+		b->size + num_pages * vmballoon_page_size(is_2m_pages)
+		< b->target) {
+		struct page *page;
 
-	pr_debug("%s - goal: %d, no-sleep rate: %d, sleep rate: %d\n",
-		 __func__, goal, rate, b->rate_alloc);
+		if (flags == VMW_PAGE_ALLOC_NOSLEEP)
+			STATS_INC(b->stats.alloc[is_2m_pages]);
+		else
+			STATS_INC(b->stats.sleep_alloc);
+
+		page = vmballoon_alloc_page(flags, is_2m_pages);
+		if (!page) {
+			STATS_INC(b->stats.alloc_fail[is_2m_pages]);
 
-	for (i = 0; i < goal; i++) {
+			if (is_2m_pages) {
+				b->ops->lock(b, num_pages, true, &b->target);
 
-		error = vmballoon_reserve_page(b, alloc_can_sleep);
-		if (error) {
-			if (error != -ENOMEM) {
 				/*
-				 * Not a page allocation failure, stop this
-				 * cycle. Maybe we'll get new target from
-				 * the host soon.
+				 * ignore errors from locking as we now switch
+				 * to 4k pages and we might get different
+				 * errors.
 				 */
-				break;
+
+				num_pages = 0;
+				is_2m_pages = false;
+				continue;
 			}
 
-			if (alloc_can_sleep) {
+			if (flags == VMW_PAGE_ALLOC_CANSLEEP) {
 				/*
 				 * CANSLEEP page allocation failed, so guest
 				 * is under severe memory pressure. Quickly
@@ -563,6 +872,7 @@ static void vmballoon_inflate(struct vmballoon *b)
 				 */
 				b->rate_alloc = max(b->rate_alloc / 2,
 						    VMW_BALLOON_RATE_ALLOC_MIN);
+				STATS_INC(b->stats.sleep_alloc_fail);
 				break;
 			}
 
@@ -576,38 +886,49 @@ static void vmballoon_inflate(struct vmballoon *b)
 			 */
 			b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES;
 
-			if (i >= b->rate_alloc)
+			if (allocations >= b->rate_alloc)
 				break;
 
-			alloc_can_sleep = true;
+			flags = VMW_PAGE_ALLOC_CANSLEEP;
 			/* Lower rate for sleeping allocations. */
 			rate = b->rate_alloc;
+			continue;
 		}
 
-		if (++allocations > VMW_BALLOON_YIELD_THRESHOLD) {
-			cond_resched();
-			allocations = 0;
+		b->ops->add_page(b, num_pages++, page);
+		if (num_pages == b->batch_max_pages) {
+			error = b->ops->lock(b, num_pages, is_2m_pages,
+					&b->target);
+			num_pages = 0;
+			if (error)
+				break;
 		}
 
-		if (i >= rate) {
+		cond_resched();
+
+		if (allocations >= rate) {
 			/* We allocated enough pages, let's take a break. */
 			break;
 		}
 	}
 
+	if (num_pages > 0)
+		b->ops->lock(b, num_pages, is_2m_pages, &b->target);
+
 	/*
 	 * We reached our goal without failures so try increasing
 	 * allocation rate.
 	 */
-	if (error == 0 && i >= b->rate_alloc) {
-		unsigned int mult = i / b->rate_alloc;
+	if (error == 0 && allocations >= b->rate_alloc) {
+		unsigned int mult = allocations / b->rate_alloc;
 
 		b->rate_alloc =
 			min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC,
 			    VMW_BALLOON_RATE_ALLOC_MAX);
 	}
 
-	vmballoon_release_refused_pages(b);
+	vmballoon_release_refused_pages(b, true);
+	vmballoon_release_refused_pages(b, false);
 }
 
 /*
@@ -615,35 +936,176 @@ static void vmballoon_inflate(struct vmballoon *b)
  */
 static void vmballoon_deflate(struct vmballoon *b)
 {
-	struct page *page, *next;
-	unsigned int i = 0;
-	unsigned int goal;
-	int error;
+	unsigned is_2m_pages;
 
 	pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
 
-	/* limit deallocation rate */
-	goal = min(b->size - b->target, b->rate_free);
+	/* free pages to reach target */
+	for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes;
+			is_2m_pages++) {
+		struct page *page, *next;
+		unsigned int num_pages = 0;
+		struct vmballoon_page_size *page_size =
+				&b->page_sizes[is_2m_pages];
+
+		list_for_each_entry_safe(page, next, &page_size->pages, lru) {
+			if (b->reset_required ||
+				(b->target > 0 &&
+					b->size - num_pages
+					* vmballoon_page_size(is_2m_pages)
+				< b->target + vmballoon_page_size(true)))
+				break;
 
-	pr_debug("%s - goal: %d, rate: %d\n", __func__, goal, b->rate_free);
+			list_del(&page->lru);
+			b->ops->add_page(b, num_pages++, page);
 
-	/* free pages to reach target */
-	list_for_each_entry_safe(page, next, &b->pages, lru) {
-		error = vmballoon_release_page(b, page);
-		if (error) {
-			/* quickly decrease rate in case of error */
-			b->rate_free = max(b->rate_free / 2,
-					   VMW_BALLOON_RATE_FREE_MIN);
-			return;
+			if (num_pages == b->batch_max_pages) {
+				int error;
+
+				error = b->ops->unlock(b, num_pages,
+						is_2m_pages, &b->target);
+				num_pages = 0;
+				if (error)
+					return;
+			}
+
+			cond_resched();
 		}
 
-		if (++i >= goal)
-			break;
+		if (num_pages > 0)
+			b->ops->unlock(b, num_pages, is_2m_pages, &b->target);
+	}
+}
+
+static const struct vmballoon_ops vmballoon_basic_ops = {
+	.add_page = vmballoon_add_page,
+	.lock = vmballoon_lock_page,
+	.unlock = vmballoon_unlock_page
+};
+
+static const struct vmballoon_ops vmballoon_batched_ops = {
+	.add_page = vmballoon_add_batched_page,
+	.lock = vmballoon_lock_batched_page,
+	.unlock = vmballoon_unlock_batched_page
+};
+
+static bool vmballoon_init_batching(struct vmballoon *b)
+{
+	b->page = alloc_page(VMW_PAGE_ALLOC_NOSLEEP);
+	if (!b->page)
+		return false;
+
+	b->batch_page = vmap(&b->page, 1, VM_MAP, PAGE_KERNEL);
+	if (!b->batch_page) {
+		__free_page(b->page);
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * Receive notification and resize balloon
+ */
+static void vmballoon_doorbell(void *client_data)
+{
+	struct vmballoon *b = client_data;
+
+	STATS_INC(b->stats.doorbell);
+
+	mod_delayed_work(system_freezable_wq, &b->dwork, 0);
+}
+
+/*
+ * Clean up vmci doorbell
+ */
+static void vmballoon_vmci_cleanup(struct vmballoon *b)
+{
+	int error;
+
+	VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID,
+			VMCI_INVALID_ID, error);
+	STATS_INC(b->stats.doorbell_unset);
+
+	if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
+		vmci_doorbell_destroy(b->vmci_doorbell);
+		b->vmci_doorbell = VMCI_INVALID_HANDLE;
+	}
+}
+
+/*
+ * Initialize vmci doorbell, to get notified as soon as balloon changes
+ */
+static int vmballoon_vmci_init(struct vmballoon *b)
+{
+	int error = 0;
+
+	if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) != 0) {
+		error = vmci_doorbell_create(&b->vmci_doorbell,
+				VMCI_FLAG_DELAYED_CB,
+				VMCI_PRIVILEGE_FLAG_RESTRICTED,
+				vmballoon_doorbell, b);
+
+		if (error == VMCI_SUCCESS) {
+			VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET,
+					b->vmci_doorbell.context,
+					b->vmci_doorbell.resource, error);
+			STATS_INC(b->stats.doorbell_set);
+		}
+	}
+
+	if (error != 0) {
+		vmballoon_vmci_cleanup(b);
+
+		return -EIO;
 	}
 
-	/* slowly increase rate if there were no errors */
-	b->rate_free = min(b->rate_free + VMW_BALLOON_RATE_FREE_INC,
-			   VMW_BALLOON_RATE_FREE_MAX);
+	return 0;
+}
+
+/*
+ * Perform standard reset sequence by popping the balloon (in case it
+ * is not  empty) and then restarting protocol. This operation normally
+ * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
+ */
+static void vmballoon_reset(struct vmballoon *b)
+{
+	int error;
+
+	vmballoon_vmci_cleanup(b);
+
+	/* free all pages, skipping monitor unlock */
+	vmballoon_pop(b);
+
+	if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
+		return;
+
+	if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
+		b->ops = &vmballoon_batched_ops;
+		b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
+		if (!vmballoon_init_batching(b)) {
+			/*
+			 * We failed to initialize batching, inform the monitor
+			 * about it by sending a null capability.
+			 *
+			 * The guest will retry in one second.
+			 */
+			vmballoon_send_start(b, 0);
+			return;
+		}
+	} else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
+		b->ops = &vmballoon_basic_ops;
+		b->batch_max_pages = 1;
+	}
+
+	b->reset_required = false;
+
+	error = vmballoon_vmci_init(b);
+	if (error)
+		pr_err("failed to initialize vmci doorbell\n");
+
+	if (!vmballoon_send_guest_id(b))
+		pr_err("failed to send guest ID to the host\n");
 }
 
 /*
@@ -664,13 +1126,14 @@ static void vmballoon_work(struct work_struct *work)
 	if (b->slow_allocation_cycles > 0)
 		b->slow_allocation_cycles--;
 
-	if (vmballoon_send_get_target(b, &target)) {
+	if (!b->reset_required && vmballoon_send_get_target(b, &target)) {
 		/* update target, adjust size */
 		b->target = target;
 
 		if (b->size < target)
 			vmballoon_inflate(b);
-		else if (b->size > target)
+		else if (target == 0 ||
+				b->size > target + vmballoon_page_size(true))
 			vmballoon_deflate(b);
 	}
 
@@ -692,6 +1155,14 @@ static int vmballoon_debug_show(struct seq_file *f, void *offset)
 	struct vmballoon *b = f->private;
 	struct vmballoon_stats *stats = &b->stats;
 
+	/* format capabilities info */
+	seq_printf(f,
+		   "balloon capabilities:   %#4x\n"
+		   "used capabilities:      %#4lx\n"
+		   "is resetting:           %c\n",
+		   VMW_BALLOON_CAPABILITIES, b->capabilities,
+		   b->reset_required ? 'y' : 'n');
+
 	/* format size info */
 	seq_printf(f,
 		   "target:             %8d pages\n"
@@ -700,35 +1171,48 @@ static int vmballoon_debug_show(struct seq_file *f, void *offset)
 
 	/* format rate info */
 	seq_printf(f,
-		   "rateNoSleepAlloc:   %8d pages/sec\n"
-		   "rateSleepAlloc:     %8d pages/sec\n"
-		   "rateFree:           %8d pages/sec\n",
-		   VMW_BALLOON_NOSLEEP_ALLOC_MAX,
-		   b->rate_alloc, b->rate_free);
+		   "rateSleepAlloc:     %8d pages/sec\n",
+		   b->rate_alloc);
 
 	seq_printf(f,
 		   "\n"
 		   "timer:              %8u\n"
+		   "doorbell:           %8u\n"
 		   "start:              %8u (%4u failed)\n"
 		   "guestType:          %8u (%4u failed)\n"
+		   "2m-lock:            %8u (%4u failed)\n"
 		   "lock:               %8u (%4u failed)\n"
+		   "2m-unlock:          %8u (%4u failed)\n"
 		   "unlock:             %8u (%4u failed)\n"
 		   "target:             %8u (%4u failed)\n"
+		   "prim2mAlloc:        %8u (%4u failed)\n"
 		   "primNoSleepAlloc:   %8u (%4u failed)\n"
 		   "primCanSleepAlloc:  %8u (%4u failed)\n"
+		   "prim2mFree:         %8u\n"
 		   "primFree:           %8u\n"
+		   "err2mAlloc:         %8u\n"
 		   "errAlloc:           %8u\n"
-		   "errFree:            %8u\n",
+		   "err2mFree:          %8u\n"
+		   "errFree:            %8u\n"
+		   "doorbellSet:        %8u\n"
+		   "doorbellUnset:      %8u\n",
 		   stats->timer,
+		   stats->doorbell,
 		   stats->start, stats->start_fail,
 		   stats->guest_type, stats->guest_type_fail,
-		   stats->lock,  stats->lock_fail,
-		   stats->unlock, stats->unlock_fail,
+		   stats->lock[true],  stats->lock_fail[true],
+		   stats->lock[false],  stats->lock_fail[false],
+		   stats->unlock[true], stats->unlock_fail[true],
+		   stats->unlock[false], stats->unlock_fail[false],
 		   stats->target, stats->target_fail,
-		   stats->alloc, stats->alloc_fail,
+		   stats->alloc[true], stats->alloc_fail[true],
+		   stats->alloc[false], stats->alloc_fail[false],
 		   stats->sleep_alloc, stats->sleep_alloc_fail,
-		   stats->free,
-		   stats->refused_alloc, stats->refused_free);
+		   stats->free[true],
+		   stats->free[false],
+		   stats->refused_alloc[true], stats->refused_alloc[false],
+		   stats->refused_free[true], stats->refused_free[false],
+		   stats->doorbell_set, stats->doorbell_unset);
 
 	return 0;
 }
@@ -782,7 +1266,7 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b)
 static int __init vmballoon_init(void)
 {
 	int error;
-
+	unsigned is_2m_pages;
 	/*
 	 * Check if we are running on VMware's hypervisor and bail out
 	 * if we are not.
@@ -790,32 +1274,26 @@ static int __init vmballoon_init(void)
 	if (x86_hyper != &x86_hyper_vmware)
 		return -ENODEV;
 
-	INIT_LIST_HEAD(&balloon.pages);
-	INIT_LIST_HEAD(&balloon.refused_pages);
+	for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
+			is_2m_pages++) {
+		INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages);
+		INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages);
+	}
 
 	/* initialize rates */
 	balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX;
-	balloon.rate_free = VMW_BALLOON_RATE_FREE_MAX;
 
 	INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
 
-	/*
-	 * Start balloon.
-	 */
-	if (!vmballoon_send_start(&balloon)) {
-		pr_err("failed to send start command to the host\n");
-		return -EIO;
-	}
-
-	if (!vmballoon_send_guest_id(&balloon)) {
-		pr_err("failed to send guest ID to the host\n");
-		return -EIO;
-	}
-
 	error = vmballoon_debugfs_init(&balloon);
 	if (error)
 		return error;
 
+	balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
+	balloon.batch_page = NULL;
+	balloon.page = NULL;
+	balloon.reset_required = true;
+
 	queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
 
 	return 0;
@@ -824,6 +1302,7 @@ module_init(vmballoon_init);
 
 static void __exit vmballoon_exit(void)
 {
+	vmballoon_vmci_cleanup(&balloon);
 	cancel_delayed_work_sync(&balloon.dwork);
 
 	vmballoon_debugfs_exit(&balloon);
@@ -833,7 +1312,7 @@ static void __exit vmballoon_exit(void)
 	 * Reset connection before deallocating memory to avoid potential for
 	 * additional spurious resets from guest touching deallocated pages.
 	 */
-	vmballoon_send_start(&balloon);
+	vmballoon_send_start(&balloon, 0);
 	vmballoon_pop(&balloon);
 }
 module_exit(vmballoon_exit);
diff --git a/kernel/drivers/misc/vmw_vmci/vmci_datagram.c b/kernel/drivers/misc/vmw_vmci/vmci_datagram.c
index 822665245..8a4b6bbe1 100644
--- a/kernel/drivers/misc/vmw_vmci/vmci_datagram.c
+++ b/kernel/drivers/misc/vmw_vmci/vmci_datagram.c
@@ -276,11 +276,10 @@ static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg)
 		}
 
 		/* We make a copy to enqueue. */
-		new_dg = kmalloc(dg_size, GFP_KERNEL);
+		new_dg = kmemdup(dg, dg_size, GFP_KERNEL);
 		if (new_dg == NULL)
 			return VMCI_ERROR_NO_MEM;
 
-		memcpy(new_dg, dg, dg_size);
 		retval = vmci_ctx_enqueue_datagram(dg->dst.context, new_dg);
 		if (retval < VMCI_SUCCESS) {
 			kfree(new_dg);
diff --git a/kernel/drivers/misc/vmw_vmci/vmci_host.c b/kernel/drivers/misc/vmw_vmci/vmci_host.c
index a721b5d8a..9ec262a52 100644
--- a/kernel/drivers/misc/vmw_vmci/vmci_host.c
+++ b/kernel/drivers/misc/vmw_vmci/vmci_host.c
@@ -1031,14 +1031,9 @@ int __init vmci_host_init(void)
 
 void __exit vmci_host_exit(void)
 {
-	int error;
-
 	vmci_host_device_initialized = false;
 
-	error = misc_deregister(&vmci_host_miscdev);
-	if (error)
-		pr_warn("Error unregistering character device: %d\n", error);
-
+	misc_deregister(&vmci_host_miscdev);
 	vmci_ctx_destroy(host_context);
 	vmci_qp_broker_exit();
author	José Pekkarinen <jose.pekkarinen@nokia.com>	2016-04-11 10:41:07 +0300
committer	José Pekkarinen <jose.pekkarinen@nokia.com>	2016-04-13 08:17:18 +0300
commit	e09b41010ba33a20a87472ee821fa407a5b8da36 (patch)
tree	d10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/drivers/misc
parent	f93b97fd65072de626c074dbe099a1fff05ce060 (diff)