summaryrefslogtreecommitdiffstats
path: root/kernel/arch/powerpc/platforms
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/arch/powerpc/platforms')
-rw-r--r--kernel/arch/powerpc/platforms/512x/Kconfig10
-rw-r--r--kernel/arch/powerpc/platforms/512x/Makefile1
-rw-r--r--kernel/arch/powerpc/platforms/512x/clock-commonclk.c1
-rw-r--r--kernel/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c8
-rw-r--r--kernel/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c540
-rw-r--r--kernel/arch/powerpc/platforms/52xx/media5200.c2
-rw-r--r--kernel/arch/powerpc/platforms/52xx/mpc52xx_gpt.c10
-rw-r--r--kernel/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c1
-rw-r--r--kernel/arch/powerpc/platforms/52xx/mpc52xx_pci.c2
-rw-r--r--kernel/arch/powerpc/platforms/52xx/mpc52xx_pic.c2
-rw-r--r--kernel/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c2
-rw-r--r--kernel/arch/powerpc/platforms/83xx/suspend.c3
-rw-r--r--kernel/arch/powerpc/platforms/85xx/Kconfig2
-rw-r--r--kernel/arch/powerpc/platforms/85xx/c293pcie.c4
-rw-r--r--kernel/arch/powerpc/platforms/85xx/common.c2
-rw-r--r--kernel/arch/powerpc/platforms/85xx/corenet_generic.c16
-rw-r--r--kernel/arch/powerpc/platforms/85xx/mpc85xx_cds.c5
-rw-r--r--kernel/arch/powerpc/platforms/85xx/mpc85xx_ds.c2
-rw-r--r--kernel/arch/powerpc/platforms/85xx/mpc85xx_mds.c2
-rw-r--r--kernel/arch/powerpc/platforms/85xx/mpc85xx_rdb.c2
-rw-r--r--kernel/arch/powerpc/platforms/85xx/p1022_ds.c2
-rw-r--r--kernel/arch/powerpc/platforms/85xx/p1022_rdk.c2
-rw-r--r--kernel/arch/powerpc/platforms/85xx/smp.c121
-rw-r--r--kernel/arch/powerpc/platforms/85xx/socrates_fpga_pic.c3
-rw-r--r--kernel/arch/powerpc/platforms/85xx/twr_p102x.c6
-rw-r--r--kernel/arch/powerpc/platforms/86xx/mpc8610_hpcd.c2
-rw-r--r--kernel/arch/powerpc/platforms/86xx/pic.c2
-rw-r--r--kernel/arch/powerpc/platforms/8xx/m8xx_setup.c2
-rw-r--r--kernel/arch/powerpc/platforms/Kconfig.cputype22
-rw-r--r--kernel/arch/powerpc/platforms/cell/Kconfig19
-rw-r--r--kernel/arch/powerpc/platforms/cell/axon_msi.c17
-rw-r--r--kernel/arch/powerpc/platforms/cell/interrupt.c6
-rw-r--r--kernel/arch/powerpc/platforms/cell/iommu.c8
-rw-r--r--kernel/arch/powerpc/platforms/cell/ras.c2
-rw-r--r--kernel/arch/powerpc/platforms/cell/spider-pic.c11
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/file.c55
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/inode.c2
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c124
-rw-r--r--kernel/arch/powerpc/platforms/chrp/setup.c2
-rw-r--r--kernel/arch/powerpc/platforms/embedded6xx/flipper-pic.c3
-rw-r--r--kernel/arch/powerpc/platforms/embedded6xx/hlwd-pic.c5
-rw-r--r--kernel/arch/powerpc/platforms/embedded6xx/mvme5100.c2
-rw-r--r--kernel/arch/powerpc/platforms/maple/Kconfig2
-rw-r--r--kernel/arch/powerpc/platforms/pasemi/Kconfig2
-rw-r--r--kernel/arch/powerpc/platforms/pasemi/Makefile1
-rw-r--r--kernel/arch/powerpc/platforms/pasemi/iommu.c7
-rw-r--r--kernel/arch/powerpc/platforms/pasemi/msi.c171
-rw-r--r--kernel/arch/powerpc/platforms/powermac/Kconfig2
-rw-r--r--kernel/arch/powerpc/platforms/powermac/pic.c3
-rw-r--r--kernel/arch/powerpc/platforms/powernv/Kconfig7
-rw-r--r--kernel/arch/powerpc/platforms/powernv/Makefile6
-rw-r--r--kernel/arch/powerpc/platforms/powernv/eeh-powernv.c165
-rw-r--r--kernel/arch/powerpc/platforms/powernv/idle.c293
-rw-r--r--kernel/arch/powerpc/platforms/powernv/opal-async.c3
-rw-r--r--kernel/arch/powerpc/platforms/powernv/opal-dump.c56
-rw-r--r--kernel/arch/powerpc/platforms/powernv/opal-elog.c40
-rw-r--r--kernel/arch/powerpc/platforms/powernv/opal-hmi.c180
-rw-r--r--kernel/arch/powerpc/platforms/powernv/opal-irqchip.c266
-rw-r--r--kernel/arch/powerpc/platforms/powernv/opal-kmsg.c75
-rw-r--r--kernel/arch/powerpc/platforms/powernv/opal-memory-errors.c2
-rw-r--r--kernel/arch/powerpc/platforms/powernv/opal-power.c147
-rw-r--r--kernel/arch/powerpc/platforms/powernv/opal-prd.c448
-rw-r--r--kernel/arch/powerpc/platforms/powernv/opal-sensor.c3
-rw-r--r--kernel/arch/powerpc/platforms/powernv/opal-sysparam.c43
-rw-r--r--kernel/arch/powerpc/platforms/powernv/opal-wrappers.S7
-rw-r--r--kernel/arch/powerpc/platforms/powernv/opal.c272
-rw-r--r--kernel/arch/powerpc/platforms/powernv/pci-ioda.c951
-rw-r--r--kernel/arch/powerpc/platforms/powernv/pci-p5ioc2.c45
-rw-r--r--kernel/arch/powerpc/platforms/powernv/pci.c234
-rw-r--r--kernel/arch/powerpc/platforms/powernv/pci.h39
-rw-r--r--kernel/arch/powerpc/platforms/powernv/powernv.h17
-rw-r--r--kernel/arch/powerpc/platforms/powernv/rng.c2
-rw-r--r--kernel/arch/powerpc/platforms/powernv/setup.c218
-rw-r--r--kernel/arch/powerpc/platforms/powernv/smp.c29
-rw-r--r--kernel/arch/powerpc/platforms/powernv/subcore.c4
-rw-r--r--kernel/arch/powerpc/platforms/ps3/Kconfig2
-rw-r--r--kernel/arch/powerpc/platforms/ps3/interrupt.c3
-rw-r--r--kernel/arch/powerpc/platforms/ps3/os-area.c5
-rw-r--r--kernel/arch/powerpc/platforms/ps3/time.c3
-rw-r--r--kernel/arch/powerpc/platforms/pseries/Kconfig7
-rw-r--r--kernel/arch/powerpc/platforms/pseries/Makefile7
-rw-r--r--kernel/arch/powerpc/platforms/pseries/dlpar.c38
-rw-r--r--kernel/arch/powerpc/platforms/pseries/eeh_pseries.c62
-rw-r--r--kernel/arch/powerpc/platforms/pseries/hotplug-memory.c3
-rw-r--r--kernel/arch/powerpc/platforms/pseries/hvcserver.c2
-rw-r--r--kernel/arch/powerpc/platforms/pseries/iommu.c192
-rw-r--r--kernel/arch/powerpc/platforms/pseries/msi.c22
-rw-r--r--kernel/arch/powerpc/platforms/pseries/of_helpers.c38
-rw-r--r--kernel/arch/powerpc/platforms/pseries/of_helpers.h8
-rw-r--r--kernel/arch/powerpc/platforms/pseries/reconfig.c34
-rw-r--r--kernel/arch/powerpc/platforms/pseries/rng.c2
-rw-r--r--kernel/arch/powerpc/platforms/pseries/setup.c44
92 files changed, 3741 insertions, 1506 deletions
diff --git a/kernel/arch/powerpc/platforms/512x/Kconfig b/kernel/arch/powerpc/platforms/512x/Kconfig
index 5aa3f4b53..f09016f6b 100644
--- a/kernel/arch/powerpc/platforms/512x/Kconfig
+++ b/kernel/arch/powerpc/platforms/512x/Kconfig
@@ -7,8 +7,14 @@ config PPC_MPC512x
select PPC_PCI_CHOICE
select FSL_PCI if PCI
select ARCH_WANT_OPTIONAL_GPIOLIB
- select USB_EHCI_BIG_ENDIAN_MMIO
- select USB_EHCI_BIG_ENDIAN_DESC
+ select USB_EHCI_BIG_ENDIAN_MMIO if USB_EHCI_HCD
+ select USB_EHCI_BIG_ENDIAN_DESC if USB_EHCI_HCD
+
+config MPC512x_LPBFIFO
+ tristate "MPC512x LocalPlus Bus FIFO driver"
+ depends on PPC_MPC512x && MPC512X_DMA
+ help
+ Enable support for Freescale MPC512x LocalPlus Bus FIFO (SCLPC).
config MPC5121_ADS
bool "Freescale MPC5121E ADS"
diff --git a/kernel/arch/powerpc/platforms/512x/Makefile b/kernel/arch/powerpc/platforms/512x/Makefile
index 01693121a..f47d42295 100644
--- a/kernel/arch/powerpc/platforms/512x/Makefile
+++ b/kernel/arch/powerpc/platforms/512x/Makefile
@@ -5,4 +5,5 @@ obj-$(CONFIG_COMMON_CLK) += clock-commonclk.o
obj-y += mpc512x_shared.o
obj-$(CONFIG_MPC5121_ADS) += mpc5121_ads.o mpc5121_ads_cpld.o
obj-$(CONFIG_MPC512x_GENERIC) += mpc512x_generic.o
+obj-$(CONFIG_MPC512x_LPBFIFO) += mpc512x_lpbfifo.o
obj-$(CONFIG_PDM360NG) += pdm360ng.o
diff --git a/kernel/arch/powerpc/platforms/512x/clock-commonclk.c b/kernel/arch/powerpc/platforms/512x/clock-commonclk.c
index f691bcabd..c50ea76ba 100644
--- a/kernel/arch/powerpc/platforms/512x/clock-commonclk.c
+++ b/kernel/arch/powerpc/platforms/512x/clock-commonclk.c
@@ -12,6 +12,7 @@
*/
#include <linux/bitops.h>
+#include <linux/clk.h>
#include <linux/clk-provider.h>
#include <linux/clkdev.h>
#include <linux/device.h>
diff --git a/kernel/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/kernel/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
index ca3a062ed..0035d146d 100644
--- a/kernel/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
+++ b/kernel/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
@@ -104,9 +104,10 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp,
return irq_linear_revmap(cpld_pic_host, cpld_irq);
}
-static void
-cpld_pic_cascade(unsigned int irq, struct irq_desc *desc)
+static void cpld_pic_cascade(struct irq_desc *desc)
{
+ unsigned int irq;
+
irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status,
&cpld_regs->pci_mask);
if (irq != NO_IRQ) {
@@ -123,7 +124,8 @@ cpld_pic_cascade(unsigned int irq, struct irq_desc *desc)
}
static int
-cpld_pic_host_match(struct irq_domain *h, struct device_node *node)
+cpld_pic_host_match(struct irq_domain *h, struct device_node *node,
+ enum irq_domain_bus_token bus_token)
{
return cpld_pic_node == node;
}
diff --git a/kernel/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/kernel/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
new file mode 100644
index 000000000..8eb82b043
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
@@ -0,0 +1,540 @@
+/*
+ * The driver for Freescale MPC512x LocalPlus Bus FIFO
+ * (called SCLPC in the Reference Manual).
+ *
+ * Copyright (C) 2013-2015 Alexander Popov <alex.popov@linux.com>.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <asm/mpc5121.h>
+#include <asm/io.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+
+#define DRV_NAME "mpc512x_lpbfifo"
+
+struct cs_range {
+ u32 csnum;
+ u32 base; /* must be zero */
+ u32 addr;
+ u32 size;
+};
+
+static struct lpbfifo_data {
+ spinlock_t lock; /* for protecting lpbfifo_data */
+ phys_addr_t regs_phys;
+ resource_size_t regs_size;
+ struct mpc512x_lpbfifo __iomem *regs;
+ int irq;
+ struct cs_range *cs_ranges;
+ size_t cs_n;
+ struct dma_chan *chan;
+ struct mpc512x_lpbfifo_request *req;
+ dma_addr_t ram_bus_addr;
+ bool wait_lpbfifo_irq;
+ bool wait_lpbfifo_callback;
+} lpbfifo;
+
+/*
+ * A data transfer from RAM to some device on LPB is finished
+ * when both mpc512x_lpbfifo_irq() and mpc512x_lpbfifo_callback()
+ * have been called. We execute the callback registered in
+ * mpc512x_lpbfifo_request just after that.
+ * But for a data transfer from some device on LPB to RAM we don't enable
+ * LPBFIFO interrupt because clearing MPC512X_SCLPC_SUCCESS interrupt flag
+ * automatically disables LPBFIFO reading request to the DMA controller
+ * and the data transfer hangs. So the callback registered in
+ * mpc512x_lpbfifo_request is executed at the end of mpc512x_lpbfifo_callback().
+ */
+
+/*
+ * mpc512x_lpbfifo_irq - IRQ handler for LPB FIFO
+ */
+static irqreturn_t mpc512x_lpbfifo_irq(int irq, void *param)
+{
+ struct device *dev = (struct device *)param;
+ struct mpc512x_lpbfifo_request *req = NULL;
+ unsigned long flags;
+ u32 status;
+
+ spin_lock_irqsave(&lpbfifo.lock, flags);
+
+ if (!lpbfifo.regs)
+ goto end;
+
+ req = lpbfifo.req;
+ if (!req || req->dir == MPC512X_LPBFIFO_REQ_DIR_READ) {
+ dev_err(dev, "bogus LPBFIFO IRQ\n");
+ goto end;
+ }
+
+ status = in_be32(&lpbfifo.regs->status);
+ if (status != MPC512X_SCLPC_SUCCESS) {
+ dev_err(dev, "DMA transfer from RAM to peripheral failed\n");
+ out_be32(&lpbfifo.regs->enable,
+ MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+ goto end;
+ }
+ /* Clear the interrupt flag */
+ out_be32(&lpbfifo.regs->status, MPC512X_SCLPC_SUCCESS);
+
+ lpbfifo.wait_lpbfifo_irq = false;
+
+ if (lpbfifo.wait_lpbfifo_callback)
+ goto end;
+
+ /* Transfer is finished, set the FIFO as idle */
+ lpbfifo.req = NULL;
+
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+ if (req->callback)
+ req->callback(req);
+
+ return IRQ_HANDLED;
+
+ end:
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+ return IRQ_HANDLED;
+}
+
+/*
+ * mpc512x_lpbfifo_callback is called by DMA driver when
+ * DMA transaction is finished.
+ */
+static void mpc512x_lpbfifo_callback(void *param)
+{
+ unsigned long flags;
+ struct mpc512x_lpbfifo_request *req = NULL;
+ enum dma_data_direction dir;
+
+ spin_lock_irqsave(&lpbfifo.lock, flags);
+
+ if (!lpbfifo.regs) {
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+ return;
+ }
+
+ req = lpbfifo.req;
+ if (!req) {
+ pr_err("bogus LPBFIFO callback\n");
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+ return;
+ }
+
+ /* Release the mapping */
+ if (req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE)
+ dir = DMA_TO_DEVICE;
+ else
+ dir = DMA_FROM_DEVICE;
+ dma_unmap_single(lpbfifo.chan->device->dev,
+ lpbfifo.ram_bus_addr, req->size, dir);
+
+ lpbfifo.wait_lpbfifo_callback = false;
+
+ if (!lpbfifo.wait_lpbfifo_irq) {
+ /* Transfer is finished, set the FIFO as idle */
+ lpbfifo.req = NULL;
+
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+ if (req->callback)
+ req->callback(req);
+ } else {
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+ }
+}
+
+static int mpc512x_lpbfifo_kick(void)
+{
+ u32 bits;
+ bool no_incr = false;
+ u32 bpt = 32; /* max bytes per LPBFIFO transaction involving DMA */
+ u32 cs = 0;
+ size_t i;
+ struct dma_device *dma_dev = NULL;
+ struct scatterlist sg;
+ enum dma_data_direction dir;
+ struct dma_slave_config dma_conf = {};
+ struct dma_async_tx_descriptor *dma_tx = NULL;
+ dma_cookie_t cookie;
+ int ret;
+
+ /*
+ * 1. Fit the requirements:
+ * - the packet size must be a multiple of 4 since FIFO Data Word
+ * Register allows only full-word access according the Reference
+ * Manual;
+ * - the physical address of the device on LPB and the packet size
+ * must be aligned on BPT (bytes per transaction) or 8-bytes
+ * boundary according the Reference Manual;
+ * - but we choose DMA maxburst equal (or very close to) BPT to prevent
+ * DMA controller from overtaking FIFO and causing FIFO underflow
+ * error. So we force the packet size to be aligned on BPT boundary
+ * not to confuse DMA driver which requires the packet size to be
+ * aligned on maxburst boundary;
+ * - BPT should be set to the LPB device port size for operation with
+ * disabled auto-incrementing according Reference Manual.
+ */
+ if (lpbfifo.req->size == 0 || !IS_ALIGNED(lpbfifo.req->size, 4))
+ return -EINVAL;
+
+ if (lpbfifo.req->portsize != LPB_DEV_PORTSIZE_UNDEFINED) {
+ bpt = lpbfifo.req->portsize;
+ no_incr = true;
+ }
+
+ while (bpt > 1) {
+ if (IS_ALIGNED(lpbfifo.req->dev_phys_addr, min(bpt, 0x8u)) &&
+ IS_ALIGNED(lpbfifo.req->size, bpt)) {
+ break;
+ }
+
+ if (no_incr)
+ return -EINVAL;
+
+ bpt >>= 1;
+ }
+ dma_conf.dst_maxburst = max(bpt, 0x4u) / 4;
+ dma_conf.src_maxburst = max(bpt, 0x4u) / 4;
+
+ for (i = 0; i < lpbfifo.cs_n; i++) {
+ phys_addr_t cs_start = lpbfifo.cs_ranges[i].addr;
+ phys_addr_t cs_end = cs_start + lpbfifo.cs_ranges[i].size;
+ phys_addr_t access_start = lpbfifo.req->dev_phys_addr;
+ phys_addr_t access_end = access_start + lpbfifo.req->size;
+
+ if (access_start >= cs_start && access_end <= cs_end) {
+ cs = lpbfifo.cs_ranges[i].csnum;
+ break;
+ }
+ }
+ if (i == lpbfifo.cs_n)
+ return -EFAULT;
+
+ /* 2. Prepare DMA */
+ dma_dev = lpbfifo.chan->device;
+
+ if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE) {
+ dir = DMA_TO_DEVICE;
+ dma_conf.direction = DMA_MEM_TO_DEV;
+ dma_conf.dst_addr = lpbfifo.regs_phys +
+ offsetof(struct mpc512x_lpbfifo, data_word);
+ } else {
+ dir = DMA_FROM_DEVICE;
+ dma_conf.direction = DMA_DEV_TO_MEM;
+ dma_conf.src_addr = lpbfifo.regs_phys +
+ offsetof(struct mpc512x_lpbfifo, data_word);
+ }
+ dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ dma_conf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+
+ /* Make DMA channel work with LPB FIFO data register */
+ if (dma_dev->device_config(lpbfifo.chan, &dma_conf)) {
+ ret = -EINVAL;
+ goto err_dma_prep;
+ }
+
+ sg_init_table(&sg, 1);
+
+ sg_dma_address(&sg) = dma_map_single(dma_dev->dev,
+ lpbfifo.req->ram_virt_addr, lpbfifo.req->size, dir);
+ if (dma_mapping_error(dma_dev->dev, sg_dma_address(&sg)))
+ return -EFAULT;
+
+ lpbfifo.ram_bus_addr = sg_dma_address(&sg); /* For freeing later */
+
+ sg_dma_len(&sg) = lpbfifo.req->size;
+
+ dma_tx = dmaengine_prep_slave_sg(lpbfifo.chan, &sg,
+ 1, dma_conf.direction, 0);
+ if (!dma_tx) {
+ ret = -ENOSPC;
+ goto err_dma_prep;
+ }
+ dma_tx->callback = mpc512x_lpbfifo_callback;
+ dma_tx->callback_param = NULL;
+
+ /* 3. Prepare FIFO */
+ out_be32(&lpbfifo.regs->enable,
+ MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+ out_be32(&lpbfifo.regs->enable, 0x0);
+
+ /*
+ * Configure the watermarks for write operation (RAM->DMA->FIFO->dev):
+ * - high watermark 7 words according the Reference Manual,
+ * - low watermark 512 bytes (half of the FIFO).
+ * These watermarks don't work for read operation since the
+ * MPC512X_SCLPC_FLUSH bit is set (according the Reference Manual).
+ */
+ out_be32(&lpbfifo.regs->fifo_ctrl, MPC512X_SCLPC_FIFO_CTRL(0x7));
+ out_be32(&lpbfifo.regs->fifo_alarm, MPC512X_SCLPC_FIFO_ALARM(0x200));
+
+ /*
+ * Start address is a physical address of the region which belongs
+ * to the device on the LocalPlus Bus
+ */
+ out_be32(&lpbfifo.regs->start_addr, lpbfifo.req->dev_phys_addr);
+
+ /*
+ * Configure chip select, transfer direction, address increment option
+ * and bytes per transaction option
+ */
+ bits = MPC512X_SCLPC_CS(cs);
+ if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_READ)
+ bits |= MPC512X_SCLPC_READ | MPC512X_SCLPC_FLUSH;
+ if (no_incr)
+ bits |= MPC512X_SCLPC_DAI;
+ bits |= MPC512X_SCLPC_BPT(bpt);
+ out_be32(&lpbfifo.regs->ctrl, bits);
+
+ /* Unmask irqs */
+ bits = MPC512X_SCLPC_ENABLE | MPC512X_SCLPC_ABORT_INT_ENABLE;
+ if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE)
+ bits |= MPC512X_SCLPC_NORM_INT_ENABLE;
+ else
+ lpbfifo.wait_lpbfifo_irq = false;
+
+ out_be32(&lpbfifo.regs->enable, bits);
+
+ /* 4. Set packet size and kick FIFO off */
+ bits = lpbfifo.req->size | MPC512X_SCLPC_START;
+ out_be32(&lpbfifo.regs->pkt_size, bits);
+
+ /* 5. Finally kick DMA off */
+ cookie = dma_tx->tx_submit(dma_tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOSPC;
+ goto err_dma_submit;
+ }
+
+ return 0;
+
+ err_dma_submit:
+ out_be32(&lpbfifo.regs->enable,
+ MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+ err_dma_prep:
+ dma_unmap_single(dma_dev->dev, sg_dma_address(&sg),
+ lpbfifo.req->size, dir);
+ return ret;
+}
+
+static int mpc512x_lpbfifo_submit_locked(struct mpc512x_lpbfifo_request *req)
+{
+ int ret = 0;
+
+ if (!lpbfifo.regs)
+ return -ENODEV;
+
+ /* Check whether a transfer is in progress */
+ if (lpbfifo.req)
+ return -EBUSY;
+
+ lpbfifo.wait_lpbfifo_irq = true;
+ lpbfifo.wait_lpbfifo_callback = true;
+ lpbfifo.req = req;
+
+ ret = mpc512x_lpbfifo_kick();
+ if (ret != 0)
+ lpbfifo.req = NULL; /* Set the FIFO as idle */
+
+ return ret;
+}
+
+int mpc512x_lpbfifo_submit(struct mpc512x_lpbfifo_request *req)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&lpbfifo.lock, flags);
+ ret = mpc512x_lpbfifo_submit_locked(req);
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(mpc512x_lpbfifo_submit);
+
+/*
+ * LPBFIFO driver uses "ranges" property of "localbus" device tree node
+ * for being able to determine the chip select number of a client device
+ * ordering a DMA transfer.
+ */
+static int get_cs_ranges(struct device *dev)
+{
+ int ret = -ENODEV;
+ struct device_node *lb_node;
+ const u32 *addr_cells_p;
+ const u32 *size_cells_p;
+ int proplen;
+ size_t i;
+
+ lb_node = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-localbus");
+ if (!lb_node)
+ return ret;
+
+ /*
+ * The node defined as compatible with 'fsl,mpc5121-localbus'
+ * should have two address cells and one size cell.
+ * Every item of its ranges property should consist of:
+ * - the first address cell which is the chipselect number;
+ * - the second address cell which is the offset in the chipselect,
+ * must be zero.
+ * - CPU address of the beginning of an access window;
+ * - the only size cell which is the size of an access window.
+ */
+ addr_cells_p = of_get_property(lb_node, "#address-cells", NULL);
+ size_cells_p = of_get_property(lb_node, "#size-cells", NULL);
+ if (addr_cells_p == NULL || *addr_cells_p != 2 ||
+ size_cells_p == NULL || *size_cells_p != 1) {
+ goto end;
+ }
+
+ proplen = of_property_count_u32_elems(lb_node, "ranges");
+ if (proplen <= 0 || proplen % 4 != 0)
+ goto end;
+
+ lpbfifo.cs_n = proplen / 4;
+ lpbfifo.cs_ranges = devm_kcalloc(dev, lpbfifo.cs_n,
+ sizeof(struct cs_range), GFP_KERNEL);
+ if (!lpbfifo.cs_ranges)
+ goto end;
+
+ if (of_property_read_u32_array(lb_node, "ranges",
+ (u32 *)lpbfifo.cs_ranges, proplen) != 0) {
+ goto end;
+ }
+
+ for (i = 0; i < lpbfifo.cs_n; i++) {
+ if (lpbfifo.cs_ranges[i].base != 0)
+ goto end;
+ }
+
+ ret = 0;
+
+ end:
+ of_node_put(lb_node);
+ return ret;
+}
+
+static int mpc512x_lpbfifo_probe(struct platform_device *pdev)
+{
+ struct resource r;
+ int ret = 0;
+
+ memset(&lpbfifo, 0, sizeof(struct lpbfifo_data));
+ spin_lock_init(&lpbfifo.lock);
+
+ lpbfifo.chan = dma_request_slave_channel(&pdev->dev, "rx-tx");
+ if (lpbfifo.chan == NULL)
+ return -EPROBE_DEFER;
+
+ if (of_address_to_resource(pdev->dev.of_node, 0, &r) != 0) {
+ dev_err(&pdev->dev, "bad 'reg' in 'sclpc' device tree node\n");
+ ret = -ENODEV;
+ goto err0;
+ }
+
+ lpbfifo.regs_phys = r.start;
+ lpbfifo.regs_size = resource_size(&r);
+
+ if (!devm_request_mem_region(&pdev->dev, lpbfifo.regs_phys,
+ lpbfifo.regs_size, DRV_NAME)) {
+ dev_err(&pdev->dev, "unable to request region\n");
+ ret = -EBUSY;
+ goto err0;
+ }
+
+ lpbfifo.regs = devm_ioremap(&pdev->dev,
+ lpbfifo.regs_phys, lpbfifo.regs_size);
+ if (!lpbfifo.regs) {
+ dev_err(&pdev->dev, "mapping registers failed\n");
+ ret = -ENOMEM;
+ goto err0;
+ }
+
+ out_be32(&lpbfifo.regs->enable,
+ MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+
+ if (get_cs_ranges(&pdev->dev) != 0) {
+ dev_err(&pdev->dev, "bad '/localbus' device tree node\n");
+ ret = -ENODEV;
+ goto err0;
+ }
+
+ lpbfifo.irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+ if (lpbfifo.irq == NO_IRQ) {
+ dev_err(&pdev->dev, "mapping irq failed\n");
+ ret = -ENODEV;
+ goto err0;
+ }
+
+ if (request_irq(lpbfifo.irq, mpc512x_lpbfifo_irq, 0,
+ DRV_NAME, &pdev->dev) != 0) {
+ dev_err(&pdev->dev, "requesting irq failed\n");
+ ret = -ENODEV;
+ goto err1;
+ }
+
+ dev_info(&pdev->dev, "probe succeeded\n");
+ return 0;
+
+ err1:
+ irq_dispose_mapping(lpbfifo.irq);
+ err0:
+ dma_release_channel(lpbfifo.chan);
+ return ret;
+}
+
+static int mpc512x_lpbfifo_remove(struct platform_device *pdev)
+{
+ unsigned long flags;
+ struct dma_device *dma_dev = lpbfifo.chan->device;
+ struct mpc512x_lpbfifo __iomem *regs = NULL;
+
+ spin_lock_irqsave(&lpbfifo.lock, flags);
+ regs = lpbfifo.regs;
+ lpbfifo.regs = NULL;
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+ dma_dev->device_terminate_all(lpbfifo.chan);
+ out_be32(&regs->enable, MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+
+ free_irq(lpbfifo.irq, &pdev->dev);
+ irq_dispose_mapping(lpbfifo.irq);
+ dma_release_channel(lpbfifo.chan);
+
+ return 0;
+}
+
+static const struct of_device_id mpc512x_lpbfifo_match[] = {
+ { .compatible = "fsl,mpc512x-lpbfifo", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, mpc512x_lpbfifo_match);
+
+static struct platform_driver mpc512x_lpbfifo_driver = {
+ .probe = mpc512x_lpbfifo_probe,
+ .remove = mpc512x_lpbfifo_remove,
+ .driver = {
+ .name = DRV_NAME,
+ .owner = THIS_MODULE,
+ .of_match_table = mpc512x_lpbfifo_match,
+ },
+};
+
+module_platform_driver(mpc512x_lpbfifo_driver);
+
+MODULE_AUTHOR("Alexander Popov <alex.popov@linux.com>");
+MODULE_DESCRIPTION("MPC512x LocalPlus Bus FIFO device driver");
+MODULE_LICENSE("GPL v2");
diff --git a/kernel/arch/powerpc/platforms/52xx/media5200.c b/kernel/arch/powerpc/platforms/52xx/media5200.c
index 32cae33c4..8fb95480f 100644
--- a/kernel/arch/powerpc/platforms/52xx/media5200.c
+++ b/kernel/arch/powerpc/platforms/52xx/media5200.c
@@ -80,7 +80,7 @@ static struct irq_chip media5200_irq_chip = {
.irq_mask_ack = media5200_irq_mask,
};
-void media5200_irq_cascade(unsigned int virq, struct irq_desc *desc)
+static void media5200_irq_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
int sub_virq, val;
diff --git a/kernel/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/kernel/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index c949ca055..3048e34db 100644
--- a/kernel/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/kernel/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -191,9 +191,9 @@ static struct irq_chip mpc52xx_gpt_irq_chip = {
.irq_set_type = mpc52xx_gpt_irq_set_type,
};
-void mpc52xx_gpt_irq_cascade(unsigned int virq, struct irq_desc *desc)
+static void mpc52xx_gpt_irq_cascade(struct irq_desc *desc)
{
- struct mpc52xx_gpt_priv *gpt = irq_get_handler_data(virq);
+ struct mpc52xx_gpt_priv *gpt = irq_desc_get_handler_data(desc);
int sub_virq;
u32 status;
@@ -724,7 +724,7 @@ static int mpc52xx_gpt_probe(struct platform_device *ofdev)
{
struct mpc52xx_gpt_priv *gpt;
- gpt = kzalloc(sizeof *gpt, GFP_KERNEL);
+ gpt = devm_kzalloc(&ofdev->dev, sizeof *gpt, GFP_KERNEL);
if (!gpt)
return -ENOMEM;
@@ -732,10 +732,8 @@ static int mpc52xx_gpt_probe(struct platform_device *ofdev)
gpt->dev = &ofdev->dev;
gpt->ipb_freq = mpc5xxx_get_bus_frequency(ofdev->dev.of_node);
gpt->regs = of_iomap(ofdev->dev.of_node, 0);
- if (!gpt->regs) {
- kfree(gpt);
+ if (!gpt->regs)
return -ENOMEM;
- }
dev_set_drvdata(&ofdev->dev, gpt);
diff --git a/kernel/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/kernel/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
index 251dcb90e..7bb42a010 100644
--- a/kernel/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
+++ b/kernel/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
@@ -568,6 +568,7 @@ static const struct of_device_id mpc52xx_lpbfifo_match[] = {
{ .compatible = "fsl,mpc5200-lpbfifo", },
{},
};
+MODULE_DEVICE_TABLE(of, mpc52xx_lpbfifo_match);
static struct platform_driver mpc52xx_lpbfifo_driver = {
.driver = {
diff --git a/kernel/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/kernel/arch/powerpc/platforms/52xx/mpc52xx_pci.c
index e2d401ad8..6eb3b2aba 100644
--- a/kernel/arch/powerpc/platforms/52xx/mpc52xx_pci.c
+++ b/kernel/arch/powerpc/platforms/52xx/mpc52xx_pci.c
@@ -12,7 +12,7 @@
#undef DEBUG
-#include <asm/pci.h>
+#include <linux/pci.h>
#include <asm/mpc52xx.h>
#include <asm/delay.h>
#include <asm/machdep.h>
diff --git a/kernel/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/kernel/arch/powerpc/platforms/52xx/mpc52xx_pic.c
index 2944bc84b..4fe2074c8 100644
--- a/kernel/arch/powerpc/platforms/52xx/mpc52xx_pic.c
+++ b/kernel/arch/powerpc/platforms/52xx/mpc52xx_pic.c
@@ -196,7 +196,7 @@ static int mpc52xx_extirq_set_type(struct irq_data *d, unsigned int flow_type)
ctrl_reg |= (type << (22 - (l2irq * 2)));
out_be32(&intr->ctrl, ctrl_reg);
- __irq_set_handler_locked(d->irq, handler);
+ irq_set_handler_locked(d, handler);
return 0;
}
diff --git a/kernel/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/kernel/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
index 74861a7fb..60e89fc9c 100644
--- a/kernel/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
+++ b/kernel/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
@@ -78,7 +78,7 @@ static struct irq_chip pq2ads_pci_ic = {
.irq_disable = pq2ads_pci_mask_irq
};
-static void pq2ads_pci_irq_demux(unsigned int irq, struct irq_desc *desc)
+static void pq2ads_pci_irq_demux(struct irq_desc *desc)
{
struct pq2ads_pci_pic *priv = irq_desc_get_handler_data(desc);
u32 stat, mask, pend;
diff --git a/kernel/arch/powerpc/platforms/83xx/suspend.c b/kernel/arch/powerpc/platforms/83xx/suspend.c
index c9adbfb65..fcbea4b51 100644
--- a/kernel/arch/powerpc/platforms/83xx/suspend.c
+++ b/kernel/arch/powerpc/platforms/83xx/suspend.c
@@ -445,5 +445,4 @@ static int pmc_init(void)
{
return platform_driver_register(&pmc_driver);
}
-
-module_init(pmc_init);
+device_initcall(pmc_init);
diff --git a/kernel/arch/powerpc/platforms/85xx/Kconfig b/kernel/arch/powerpc/platforms/85xx/Kconfig
index 2fb4b2436..97915feff 100644
--- a/kernel/arch/powerpc/platforms/85xx/Kconfig
+++ b/kernel/arch/powerpc/platforms/85xx/Kconfig
@@ -282,7 +282,7 @@ config CORENET_GENERIC
For 64bit kernel, the following boards are supported:
T208x QDS/RDB, T4240 QDS/RDB and B4 QDS
The following boards are supported for both 32bit and 64bit kernel:
- P5020 DS, P5040 DS and T104xQDS/RDB
+ P5020 DS, P5040 DS, T102x QDS/RDB, T104x QDS/RDB
endif # FSL_SOC_BOOKE
diff --git a/kernel/arch/powerpc/platforms/85xx/c293pcie.c b/kernel/arch/powerpc/platforms/85xx/c293pcie.c
index 84476b646..61bc851e9 100644
--- a/kernel/arch/powerpc/platforms/85xx/c293pcie.c
+++ b/kernel/arch/powerpc/platforms/85xx/c293pcie.c
@@ -66,10 +66,6 @@ define_machine(c293_pcie) {
.probe = c293_pcie_probe,
.setup_arch = c293_pcie_setup_arch,
.init_IRQ = c293_pcie_pic_init,
-#ifdef CONFIG_PCI
- .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
- .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
-#endif
.get_irq = mpic_get_irq,
.restart = fsl_rstcr_restart,
.calibrate_decr = generic_calibrate_decr,
diff --git a/kernel/arch/powerpc/platforms/85xx/common.c b/kernel/arch/powerpc/platforms/85xx/common.c
index 7bfb9b184..23791de7b 100644
--- a/kernel/arch/powerpc/platforms/85xx/common.c
+++ b/kernel/arch/powerpc/platforms/85xx/common.c
@@ -49,7 +49,7 @@ int __init mpc85xx_common_publish_devices(void)
return of_platform_bus_probe(NULL, mpc85xx_common_ids, NULL);
}
#ifdef CONFIG_CPM2
-static void cpm2_cascade(unsigned int irq, struct irq_desc *desc)
+static void cpm2_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
int cascade_irq;
diff --git a/kernel/arch/powerpc/platforms/85xx/corenet_generic.c b/kernel/arch/powerpc/platforms/85xx/corenet_generic.c
index 9824d2cf7..46d05c94a 100644
--- a/kernel/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/kernel/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -150,12 +150,18 @@ static const char * const boards[] __initconst = {
"fsl,B4860QDS",
"fsl,B4420QDS",
"fsl,B4220QDS",
+ "fsl,T1023RDB",
+ "fsl,T1024QDS",
+ "fsl,T1024RDB",
+ "fsl,T1040D4RDB",
+ "fsl,T1042D4RDB",
"fsl,T1040QDS",
"fsl,T1042QDS",
"fsl,T1040RDB",
"fsl,T1042RDB",
"fsl,T1042RDB_PI",
"keymile,kmcoge4",
+ "varisys,CYRUS",
NULL
};
@@ -209,7 +215,17 @@ define_machine(corenet_generic) {
.pcibios_fixup_bus = fsl_pcibios_fixup_bus,
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
+/*
+ * Core reset may cause issues if using the proxy mode of MPIC.
+ * So, use the mixed mode of MPIC if enabling CPU hotplug.
+ *
+ * Likewise, problems have been seen with kexec when coreint is enabled.
+ */
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
+ .get_irq = mpic_get_irq,
+#else
.get_irq = mpic_get_coreint_irq,
+#endif
.restart = fsl_rstcr_restart,
.calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
diff --git a/kernel/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/kernel/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index b0753e222..5ac70de3e 100644
--- a/kernel/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/kernel/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -192,8 +192,7 @@ void mpc85xx_cds_fixup_bus(struct pci_bus *bus)
}
#ifdef CONFIG_PPC_I8259
-static void mpc85xx_8259_cascade_handler(unsigned int irq,
- struct irq_desc *desc)
+static void mpc85xx_8259_cascade_handler(struct irq_desc *desc)
{
unsigned int cascade_irq = i8259_irq();
@@ -202,7 +201,7 @@ static void mpc85xx_8259_cascade_handler(unsigned int irq,
generic_handle_irq(cascade_irq);
/* check for any interrupts from the shared IRQ line */
- handle_fasteoi_irq(irq, desc);
+ handle_fasteoi_irq(desc);
}
static irqreturn_t mpc85xx_8259_cascade_action(int irq, void *dev_id)
diff --git a/kernel/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/kernel/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index ffdf02121..f858306db 100644
--- a/kernel/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/kernel/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -46,7 +46,7 @@
#endif
#ifdef CONFIG_PPC_I8259
-static void mpc85xx_8259_cascade(unsigned int irq, struct irq_desc *desc)
+static void mpc85xx_8259_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int cascade_irq = i8259_irq();
diff --git a/kernel/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/kernel/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index a392e94a0..f0be439ce 100644
--- a/kernel/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/kernel/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -34,6 +34,7 @@
#include <linux/of_device.h>
#include <linux/phy.h>
#include <linux/memblock.h>
+#include <linux/fsl/guts.h>
#include <linux/atomic.h>
#include <asm/time.h>
@@ -51,7 +52,6 @@
#include <asm/qe_ic.h>
#include <asm/mpic.h>
#include <asm/swiotlb.h>
-#include <asm/fsl_guts.h>
#include "smp.h"
#include "mpc85xx.h"
diff --git a/kernel/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/kernel/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
index e358bed66..50dcc00a0 100644
--- a/kernel/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
+++ b/kernel/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -17,6 +17,7 @@
#include <linux/seq_file.h>
#include <linux/interrupt.h>
#include <linux/of_platform.h>
+#include <linux/fsl/guts.h>
#include <asm/time.h>
#include <asm/machdep.h>
@@ -27,7 +28,6 @@
#include <asm/mpic.h>
#include <asm/qe.h>
#include <asm/qe_ic.h>
-#include <asm/fsl_guts.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
diff --git a/kernel/arch/powerpc/platforms/85xx/p1022_ds.c b/kernel/arch/powerpc/platforms/85xx/p1022_ds.c
index 6ac986d3f..371df822e 100644
--- a/kernel/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/kernel/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -16,6 +16,7 @@
* kind, whether express or implied.
*/
+#include <linux/fsl/guts.h>
#include <linux/pci.h>
#include <linux/of_platform.h>
#include <asm/div64.h>
@@ -25,7 +26,6 @@
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
#include <asm/udbg.h>
-#include <asm/fsl_guts.h>
#include <asm/fsl_lbc.h>
#include "smp.h"
diff --git a/kernel/arch/powerpc/platforms/85xx/p1022_rdk.c b/kernel/arch/powerpc/platforms/85xx/p1022_rdk.c
index 680232d6b..5087becaa 100644
--- a/kernel/arch/powerpc/platforms/85xx/p1022_rdk.c
+++ b/kernel/arch/powerpc/platforms/85xx/p1022_rdk.c
@@ -12,6 +12,7 @@
* kind, whether express or implied.
*/
+#include <linux/fsl/guts.h>
#include <linux/pci.h>
#include <linux/of_platform.h>
#include <asm/div64.h>
@@ -21,7 +22,6 @@
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
#include <asm/udbg.h>
-#include <asm/fsl_guts.h>
#include "smp.h"
#include "mpc85xx.h"
diff --git a/kernel/arch/powerpc/platforms/85xx/smp.c b/kernel/arch/powerpc/platforms/85xx/smp.c
index 8631ac5f0..6b107cea1 100644
--- a/kernel/arch/powerpc/platforms/85xx/smp.c
+++ b/kernel/arch/powerpc/platforms/85xx/smp.c
@@ -19,6 +19,7 @@
#include <linux/kexec.h>
#include <linux/highmem.h>
#include <linux/cpu.h>
+#include <linux/fsl/guts.h>
#include <asm/machdep.h>
#include <asm/pgtable.h>
@@ -26,7 +27,6 @@
#include <asm/mpic.h>
#include <asm/cacheflush.h>
#include <asm/dbell.h>
-#include <asm/fsl_guts.h>
#include <asm/code-patching.h>
#include <asm/cputhreads.h>
@@ -173,15 +173,22 @@ static inline u32 read_spin_table_addr_l(void *spin_table)
static void wake_hw_thread(void *info)
{
void fsl_secondary_thread_init(void);
- unsigned long imsr1, inia1;
+ unsigned long imsr, inia;
int nr = *(const int *)info;
- imsr1 = MSR_KERNEL;
- inia1 = *(unsigned long *)fsl_secondary_thread_init;
+ imsr = MSR_KERNEL;
+ inia = *(unsigned long *)fsl_secondary_thread_init;
- mttmr(TMRN_IMSR1, imsr1);
- mttmr(TMRN_INIA1, inia1);
- mtspr(SPRN_TENS, TEN_THREAD(1));
+ if (cpu_thread_in_core(nr) == 0) {
+ /* For when we boot on a secondary thread with kdump */
+ mttmr(TMRN_IMSR0, imsr);
+ mttmr(TMRN_INIA0, inia);
+ mtspr(SPRN_TENS, TEN_THREAD(0));
+ } else {
+ mttmr(TMRN_IMSR1, imsr);
+ mttmr(TMRN_INIA1, inia);
+ mtspr(SPRN_TENS, TEN_THREAD(1));
+ }
smp_generic_kick_cpu(nr);
}
@@ -224,6 +231,12 @@ static int smp_85xx_kick_cpu(int nr)
smp_call_function_single(primary, wake_hw_thread, &nr, 0);
return 0;
+ } else if (cpu_thread_in_core(boot_cpuid) != 0 &&
+ cpu_first_thread_sibling(boot_cpuid) == nr) {
+ if (WARN_ON_ONCE(!cpu_has_feature(CPU_FTR_SMT)))
+ return -ENOENT;
+
+ smp_call_function_single(boot_cpuid, wake_hw_thread, &nr, 0);
}
#endif
@@ -331,13 +344,14 @@ struct smp_ops_t smp_85xx_ops = {
.cpu_disable = generic_cpu_disable,
.cpu_die = generic_cpu_die,
#endif
-#ifdef CONFIG_KEXEC
+#if defined(CONFIG_KEXEC) && !defined(CONFIG_PPC64)
.give_timebase = smp_generic_give_timebase,
.take_timebase = smp_generic_take_timebase,
#endif
};
#ifdef CONFIG_KEXEC
+#ifdef CONFIG_PPC32
atomic_t kexec_down_cpus = ATOMIC_INIT(0);
void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
@@ -345,6 +359,7 @@ void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
local_irq_disable();
if (secondary) {
+ __flush_disable_L1();
atomic_inc(&kexec_down_cpus);
/* loop forever */
while (1);
@@ -356,62 +371,67 @@ static void mpc85xx_smp_kexec_down(void *arg)
if (ppc_md.kexec_cpu_down)
ppc_md.kexec_cpu_down(0,1);
}
-
-static void map_and_flush(unsigned long paddr)
+#else
+void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
{
- struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
- unsigned long kaddr = (unsigned long)kmap_atomic(page);
+ int cpu = smp_processor_id();
+ int sibling = cpu_last_thread_sibling(cpu);
+ bool notified = false;
+ int disable_cpu;
+ int disable_threadbit = 0;
+ long start = mftb();
+ long now;
- flush_dcache_range(kaddr, kaddr + PAGE_SIZE);
- kunmap_atomic((void *)kaddr);
-}
+ local_irq_disable();
+ hard_irq_disable();
+ mpic_teardown_this_cpu(secondary);
-/**
- * Before we reset the other cores, we need to flush relevant cache
- * out to memory so we don't get anything corrupted, some of these flushes
- * are performed out of an overabundance of caution as interrupts are not
- * disabled yet and we can switch cores
- */
-static void mpc85xx_smp_flush_dcache_kexec(struct kimage *image)
-{
- kimage_entry_t *ptr, entry;
- unsigned long paddr;
- int i;
-
- if (image->type == KEXEC_TYPE_DEFAULT) {
- /* normal kexec images are stored in temporary pages */
- for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
- ptr = (entry & IND_INDIRECTION) ?
- phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
- if (!(entry & IND_DESTINATION)) {
- map_and_flush(entry);
+ if (cpu == crashing_cpu && cpu_thread_in_core(cpu) != 0) {
+ /*
+ * We enter the crash kernel on whatever cpu crashed,
+ * even if it's a secondary thread. If that's the case,
+ * disable the corresponding primary thread.
+ */
+ disable_threadbit = 1;
+ disable_cpu = cpu_first_thread_sibling(cpu);
+ } else if (sibling != crashing_cpu &&
+ cpu_thread_in_core(cpu) == 0 &&
+ cpu_thread_in_core(sibling) != 0) {
+ disable_threadbit = 2;
+ disable_cpu = sibling;
+ }
+
+ if (disable_threadbit) {
+ while (paca[disable_cpu].kexec_state < KEXEC_STATE_REAL_MODE) {
+ barrier();
+ now = mftb();
+ if (!notified && now - start > 1000000) {
+ pr_info("%s/%d: waiting for cpu %d to enter KEXEC_STATE_REAL_MODE (%d)\n",
+ __func__, smp_processor_id(),
+ disable_cpu,
+ paca[disable_cpu].kexec_state);
+ notified = true;
}
}
- /* flush out last IND_DONE page */
- map_and_flush(entry);
- } else {
- /* crash type kexec images are copied to the crash region */
- for (i = 0; i < image->nr_segments; i++) {
- struct kexec_segment *seg = &image->segment[i];
- for (paddr = seg->mem; paddr < seg->mem + seg->memsz;
- paddr += PAGE_SIZE) {
- map_and_flush(paddr);
- }
+
+ if (notified) {
+ pr_info("%s: cpu %d done waiting\n",
+ __func__, disable_cpu);
}
- }
- /* also flush the kimage struct to be passed in as well */
- flush_dcache_range((unsigned long)image,
- (unsigned long)image + sizeof(*image));
+ mtspr(SPRN_TENC, disable_threadbit);
+ while (mfspr(SPRN_TENSR) & disable_threadbit)
+ cpu_relax();
+ }
}
+#endif
static void mpc85xx_smp_machine_kexec(struct kimage *image)
{
+#ifdef CONFIG_PPC32
int timeout = INT_MAX;
int i, num_cpus = num_present_cpus();
- mpc85xx_smp_flush_dcache_kexec(image);
-
if (image->type == KEXEC_TYPE_DEFAULT)
smp_call_function(mpc85xx_smp_kexec_down, NULL, 0);
@@ -429,6 +449,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image)
if ( i == smp_processor_id() ) continue;
mpic_reset_core(i);
}
+#endif
default_machine_kexec(image);
}
diff --git a/kernel/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/kernel/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
index 55a9682b9..b02d6a5bb 100644
--- a/kernel/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/kernel/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -91,9 +91,10 @@ static inline unsigned int socrates_fpga_pic_get_irq(unsigned int irq)
(irq_hw_number_t)i);
}
-void socrates_fpga_pic_cascade(unsigned int irq, struct irq_desc *desc)
+static void socrates_fpga_pic_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
+ unsigned int irq = irq_desc_get_irq(desc);
unsigned int cascade_irq;
/*
diff --git a/kernel/arch/powerpc/platforms/85xx/twr_p102x.c b/kernel/arch/powerpc/platforms/85xx/twr_p102x.c
index 1eadb6d0d..892e61351 100644
--- a/kernel/arch/powerpc/platforms/85xx/twr_p102x.c
+++ b/kernel/arch/powerpc/platforms/85xx/twr_p102x.c
@@ -15,6 +15,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/errno.h>
+#include <linux/fsl/guts.h>
#include <linux/pci.h>
#include <linux/of_platform.h>
@@ -23,7 +24,6 @@
#include <asm/mpic.h>
#include <asm/qe.h>
#include <asm/qe_ic.h>
-#include <asm/fsl_guts.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
@@ -79,7 +79,7 @@ static void __init twr_p1025_setup_arch(void)
mpc85xx_qe_init();
mpc85xx_qe_par_io_init();
-#if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE)
+#if IS_ENABLED(CONFIG_UCC_GETH) || IS_ENABLED(CONFIG_SERIAL_QE)
if (machine_is(twr_p1025)) {
struct ccsr_guts __iomem *guts;
@@ -101,7 +101,7 @@ static void __init twr_p1025_setup_arch(void)
MPC85xx_PMUXCR_QE(12));
iounmap(guts);
-#if defined(CONFIG_SERIAL_QE)
+#if IS_ENABLED(CONFIG_SERIAL_QE)
/* On P1025TWR board, the UCC7 acted as UART port.
* However, The UCC7's CTS pin is low level in default,
* it will impact the transmission in full duplex
diff --git a/kernel/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/kernel/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index 55413a547..437a9c372 100644
--- a/kernel/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/kernel/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -24,6 +24,7 @@
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/of.h>
+#include <linux/fsl/guts.h>
#include <asm/time.h>
#include <asm/machdep.h>
@@ -38,7 +39,6 @@
#include <sysdev/fsl_pci.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/simple_gpio.h>
-#include <asm/fsl_guts.h>
#include "mpc86xx.h"
diff --git a/kernel/arch/powerpc/platforms/86xx/pic.c b/kernel/arch/powerpc/platforms/86xx/pic.c
index d5b98c0f9..845defa1f 100644
--- a/kernel/arch/powerpc/platforms/86xx/pic.c
+++ b/kernel/arch/powerpc/platforms/86xx/pic.c
@@ -17,7 +17,7 @@
#include <asm/i8259.h>
#ifdef CONFIG_PPC_I8259
-static void mpc86xx_8259_cascade(unsigned int irq, struct irq_desc *desc)
+static void mpc86xx_8259_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int cascade_irq = i8259_irq();
diff --git a/kernel/arch/powerpc/platforms/8xx/m8xx_setup.c b/kernel/arch/powerpc/platforms/8xx/m8xx_setup.c
index d30377470..c289fc77b 100644
--- a/kernel/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/kernel/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -214,7 +214,7 @@ void mpc8xx_restart(char *cmd)
panic("Restart failed\n");
}
-static void cpm_cascade(unsigned int irq, struct irq_desc *desc)
+static void cpm_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
int cascade_irq = cpm_get_irq();
diff --git a/kernel/arch/powerpc/platforms/Kconfig.cputype b/kernel/arch/powerpc/platforms/Kconfig.cputype
index 7264e9119..142dff5e9 100644
--- a/kernel/arch/powerpc/platforms/Kconfig.cputype
+++ b/kernel/arch/powerpc/platforms/Kconfig.cputype
@@ -147,17 +147,6 @@ config 6xx
depends on PPC32 && PPC_BOOK3S
select PPC_HAVE_PMU_SUPPORT
-config TUNE_CELL
- bool "Optimize for Cell Broadband Engine"
- depends on PPC64 && PPC_BOOK3S
- help
- Cause the compiler to optimize for the PPE of the Cell Broadband
- Engine. This will make the code run considerably faster on Cell
- but somewhat slower on other machines. This option only changes
- the scheduling of instructions, not the selection of instructions
- itself, so the resulting kernel will keep running on all other
- machines.
-
# this is temp to handle compat with arch=ppc
config 8xx
bool
@@ -405,6 +394,16 @@ config PPC_DOORBELL
endmenu
+config VDSO32
+ def_bool y
+ depends on PPC32 || CPU_BIG_ENDIAN
+ help
+ This symbol controls whether we build the 32-bit VDSO. We obviously
+ want to do that if we're building a 32-bit kernel. If we're building
+ a 64-bit kernel then we only want a 32-bit VDSO if we're building for
+ big endian. That is because the only little endian configuration we
+ support is ppc64le which is 64-bit only.
+
choice
prompt "Endianness selection"
default CPU_BIG_ENDIAN
@@ -421,6 +420,7 @@ config CPU_BIG_ENDIAN
config CPU_LITTLE_ENDIAN
bool "Build little endian kernel"
+ depends on PPC_BOOK3S_64
select PPC64_BOOT_WRAPPER
help
Build a little endian kernel.
diff --git a/kernel/arch/powerpc/platforms/cell/Kconfig b/kernel/arch/powerpc/platforms/cell/Kconfig
index 2f23133ab..429fc59d2 100644
--- a/kernel/arch/powerpc/platforms/cell/Kconfig
+++ b/kernel/arch/powerpc/platforms/cell/Kconfig
@@ -25,7 +25,7 @@ config PPC_CELL_NATIVE
config PPC_IBM_CELL_BLADE
bool "IBM Cell Blade"
- depends on PPC64 && PPC_BOOK3S
+ depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
select PPC_CELL_NATIVE
select PPC_OF_PLATFORM_PCI
select PCI
@@ -35,7 +35,7 @@ config PPC_IBM_CELL_BLADE
config PPC_CELL_QPACE
bool "IBM Cell - QPACE"
- depends on PPC64 && PPC_BOOK3S
+ depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
select PPC_CELL_COMMON
config AXON_MSI
@@ -57,21 +57,6 @@ config SPU_FS
Units on machines implementing the Broadband Processor
Architecture.
-config SPU_FS_64K_LS
- bool "Use 64K pages to map SPE local store"
- # we depend on PPC_MM_SLICES for now rather than selecting
- # it because we depend on hugetlbfs hooks being present. We
- # will fix that when the generic code has been improved to
- # not require hijacking hugetlbfs hooks.
- depends on SPU_FS && PPC_MM_SLICES && !PPC_64K_PAGES
- default y
- select PPC_HAS_HASH_64K
- help
- This option causes SPE local stores to be mapped in process
- address spaces using 64K pages while the rest of the kernel
- uses 4K pages. This can improve performances of applications
- using multiple SPEs by lowering the TLB pressure on them.
-
config SPU_BASE
bool
default n
diff --git a/kernel/arch/powerpc/platforms/cell/axon_msi.c b/kernel/arch/powerpc/platforms/cell/axon_msi.c
index 623bd9614..aed771449 100644
--- a/kernel/arch/powerpc/platforms/cell/axon_msi.c
+++ b/kernel/arch/powerpc/platforms/cell/axon_msi.c
@@ -22,6 +22,7 @@
#include <asm/machdep.h>
#include <asm/prom.h>
+#include "cell.h"
/*
* MSIC registers, specified as offsets from dcr_base
@@ -92,10 +93,10 @@ static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val)
dcr_write(msic->dcr_host, dcr_n, val);
}
-static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
+static void axon_msi_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
- struct axon_msic *msic = irq_get_handler_data(irq);
+ struct axon_msic *msic = irq_desc_get_handler_data(desc);
u32 write_offset, msi;
int idx;
int retry = 0;
@@ -212,7 +213,7 @@ static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
return -ENODEV;
}
- entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
+ entry = first_pci_msi_entry(dev);
for (; dn; dn = of_get_next_parent(dn)) {
if (entry->msi_attrib.is_64) {
@@ -268,7 +269,7 @@ static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
if (rc)
return rc;
- list_for_each_entry(entry, &dev->msi_list, list) {
+ for_each_pci_msi_entry(entry, dev) {
virq = irq_create_direct_mapping(msic->irq_domain);
if (virq == NO_IRQ) {
dev_warn(&dev->dev,
@@ -291,7 +292,7 @@ static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n");
- list_for_each_entry(entry, &dev->msi_list, list) {
+ for_each_pci_msi_entry(entry, dev) {
if (entry->irq == NO_IRQ)
continue;
@@ -326,7 +327,7 @@ static void axon_msi_shutdown(struct platform_device *device)
u32 tmp;
pr_devel("axon_msi: disabling %s\n",
- msic->irq_domain->of_node->full_name);
+ irq_domain_get_of_node(msic->irq_domain)->full_name);
tmp = dcr_read(msic->dcr_host, MSIC_CTRL_REG);
tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
@@ -406,8 +407,8 @@ static int axon_msi_probe(struct platform_device *device)
dev_set_drvdata(&device->dev, msic);
- ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs;
- ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs;
+ cell_pci_controller_ops.setup_msi_irqs = axon_msi_setup_msi_irqs;
+ cell_pci_controller_ops.teardown_msi_irqs = axon_msi_teardown_msi_irqs;
axon_msi_debug_setup(dn, msic);
diff --git a/kernel/arch/powerpc/platforms/cell/interrupt.c b/kernel/arch/powerpc/platforms/cell/interrupt.c
index 3af8324c1..9f609fc8d 100644
--- a/kernel/arch/powerpc/platforms/cell/interrupt.c
+++ b/kernel/arch/powerpc/platforms/cell/interrupt.c
@@ -99,11 +99,12 @@ static void iic_ioexc_eoi(struct irq_data *d)
{
}
-static void iic_ioexc_cascade(unsigned int irq, struct irq_desc *desc)
+static void iic_ioexc_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
struct cbe_iic_regs __iomem *node_iic =
(void __iomem *)irq_desc_get_handler_data(desc);
+ unsigned int irq = irq_desc_get_irq(desc);
unsigned int base = (irq & 0xffffff00) | IIC_IRQ_TYPE_IOEXC;
unsigned long bits, ack;
int cascade;
@@ -222,7 +223,8 @@ void iic_request_IPIs(void)
#endif /* CONFIG_SMP */
-static int iic_host_match(struct irq_domain *h, struct device_node *node)
+static int iic_host_match(struct irq_domain *h, struct device_node *node,
+ enum irq_domain_bus_token bus_token)
{
return of_device_is_compatible(node,
"IBM,CBEA-Internal-Interrupt-Controller");
diff --git a/kernel/arch/powerpc/platforms/cell/iommu.c b/kernel/arch/powerpc/platforms/cell/iommu.c
index 21b502398..14a582b21 100644
--- a/kernel/arch/powerpc/platforms/cell/iommu.c
+++ b/kernel/arch/powerpc/platforms/cell/iommu.c
@@ -466,6 +466,11 @@ static inline u32 cell_iommu_get_ioid(struct device_node *np)
return *ioid;
}
+static struct iommu_table_ops cell_iommu_ops = {
+ .set = tce_build_cell,
+ .clear = tce_free_cell
+};
+
static struct iommu_window * __init
cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
unsigned long offset, unsigned long size,
@@ -492,6 +497,7 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
window->table.it_offset =
(offset >> window->table.it_page_shift) + pte_offset;
window->table.it_size = size >> window->table.it_page_shift;
+ window->table.it_ops = &cell_iommu_ops;
iommu_init_table(&window->table, iommu->nid);
@@ -1201,8 +1207,6 @@ static int __init cell_iommu_init(void)
/* Setup various callbacks */
cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
ppc_md.dma_get_required_mask = cell_dma_get_required_mask;
- ppc_md.tce_build = tce_build_cell;
- ppc_md.tce_free = tce_free_cell;
if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0)
goto bail;
diff --git a/kernel/arch/powerpc/platforms/cell/ras.c b/kernel/arch/powerpc/platforms/cell/ras.c
index e865d7481..2d4f60c01 100644
--- a/kernel/arch/powerpc/platforms/cell/ras.c
+++ b/kernel/arch/powerpc/platforms/cell/ras.c
@@ -123,7 +123,7 @@ static int __init cbe_ptcal_enable_on_node(int nid, int order)
area->nid = nid;
area->order = order;
- area->pages = alloc_pages_exact_node(area->nid,
+ area->pages = __alloc_pages_node(area->nid,
GFP_KERNEL|__GFP_THISNODE,
area->order);
diff --git a/kernel/arch/powerpc/platforms/cell/spider-pic.c b/kernel/arch/powerpc/platforms/cell/spider-pic.c
index 1f72f4ab6..54ee5743c 100644
--- a/kernel/arch/powerpc/platforms/cell/spider-pic.c
+++ b/kernel/arch/powerpc/platforms/cell/spider-pic.c
@@ -199,7 +199,7 @@ static const struct irq_domain_ops spider_host_ops = {
.xlate = spider_host_xlate,
};
-static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc)
+static void spider_irq_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
struct spider_pic *pic = irq_desc_get_handler_data(desc);
@@ -231,20 +231,23 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
const u32 *imap, *tmp;
int imaplen, intsize, unit;
struct device_node *iic;
+ struct device_node *of_node;
+
+ of_node = irq_domain_get_of_node(pic->host);
/* First, we check whether we have a real "interrupts" in the device
* tree in case the device-tree is ever fixed
*/
- virq = irq_of_parse_and_map(pic->host->of_node, 0);
+ virq = irq_of_parse_and_map(of_node, 0);
if (virq)
return virq;
/* Now do the horrible hacks */
- tmp = of_get_property(pic->host->of_node, "#interrupt-cells", NULL);
+ tmp = of_get_property(of_node, "#interrupt-cells", NULL);
if (tmp == NULL)
return NO_IRQ;
intsize = *tmp;
- imap = of_get_property(pic->host->of_node, "interrupt-map", &imaplen);
+ imap = of_get_property(of_node, "interrupt-map", &imaplen);
if (imap == NULL || imaplen < (intsize + 1))
return NO_IRQ;
iic = of_find_node_by_phandle(imap[intsize]);
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/file.c b/kernel/arch/powerpc/platforms/cell/spufs/file.c
index d966bbe58..5038fd578 100644
--- a/kernel/arch/powerpc/platforms/cell/spufs/file.c
+++ b/kernel/arch/powerpc/platforms/cell/spufs/file.c
@@ -239,23 +239,6 @@ spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
unsigned long address = (unsigned long)vmf->virtual_address;
unsigned long pfn, offset;
-#ifdef CONFIG_SPU_FS_64K_LS
- struct spu_state *csa = &ctx->csa;
- int psize;
-
- /* Check what page size we are using */
- psize = get_slice_psize(vma->vm_mm, address);
-
- /* Some sanity checking */
- BUG_ON(csa->use_big_pages != (psize == MMU_PAGE_64K));
-
- /* Wow, 64K, cool, we need to align the address though */
- if (csa->use_big_pages) {
- BUG_ON(vma->vm_start & 0xffff);
- address &= ~0xfffful;
- }
-#endif /* CONFIG_SPU_FS_64K_LS */
-
offset = vmf->pgoff << PAGE_SHIFT;
if (offset >= LS_SIZE)
return VM_FAULT_SIGBUS;
@@ -310,22 +293,6 @@ static const struct vm_operations_struct spufs_mem_mmap_vmops = {
static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
{
-#ifdef CONFIG_SPU_FS_64K_LS
- struct spu_context *ctx = file->private_data;
- struct spu_state *csa = &ctx->csa;
-
- /* Sanity check VMA alignment */
- if (csa->use_big_pages) {
- pr_debug("spufs_mem_mmap 64K, start=0x%lx, end=0x%lx,"
- " pgoff=0x%lx\n", vma->vm_start, vma->vm_end,
- vma->vm_pgoff);
- if (vma->vm_start & 0xffff)
- return -EINVAL;
- if (vma->vm_pgoff & 0xf)
- return -EINVAL;
- }
-#endif /* CONFIG_SPU_FS_64K_LS */
-
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
@@ -336,25 +303,6 @@ static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
-#ifdef CONFIG_SPU_FS_64K_LS
-static unsigned long spufs_get_unmapped_area(struct file *file,
- unsigned long addr, unsigned long len, unsigned long pgoff,
- unsigned long flags)
-{
- struct spu_context *ctx = file->private_data;
- struct spu_state *csa = &ctx->csa;
-
- /* If not using big pages, fallback to normal MM g_u_a */
- if (!csa->use_big_pages)
- return current->mm->get_unmapped_area(file, addr, len,
- pgoff, flags);
-
- /* Else, try to obtain a 64K pages slice */
- return slice_get_unmapped_area(addr, len, flags,
- MMU_PAGE_64K, 1);
-}
-#endif /* CONFIG_SPU_FS_64K_LS */
-
static const struct file_operations spufs_mem_fops = {
.open = spufs_mem_open,
.release = spufs_mem_release,
@@ -362,9 +310,6 @@ static const struct file_operations spufs_mem_fops = {
.write = spufs_mem_write,
.llseek = generic_file_llseek,
.mmap = spufs_mem_mmap,
-#ifdef CONFIG_SPU_FS_64K_LS
- .get_unmapped_area = spufs_get_unmapped_area,
-#endif
};
static int spufs_ps_fault(struct vm_area_struct *vma,
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/inode.c b/kernel/arch/powerpc/platforms/cell/spufs/inode.c
index 1ba6307be..11634fa7a 100644
--- a/kernel/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/kernel/arch/powerpc/platforms/cell/spufs/inode.c
@@ -166,7 +166,7 @@ static void spufs_prune_dir(struct dentry *dir)
mutex_lock(&d_inode(dir)->i_mutex);
list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) {
spin_lock(&dentry->d_lock);
- if (!(d_unhashed(dentry)) && d_really_is_positive(dentry)) {
+ if (simple_positive(dentry)) {
dget_dlock(dentry);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/kernel/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
index 147069938..b847e9403 100644
--- a/kernel/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
+++ b/kernel/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@@ -31,7 +31,7 @@
#include "spufs.h"
-static int spu_alloc_lscsa_std(struct spu_state *csa)
+int spu_alloc_lscsa(struct spu_state *csa)
{
struct spu_lscsa *lscsa;
unsigned char *p;
@@ -48,7 +48,7 @@ static int spu_alloc_lscsa_std(struct spu_state *csa)
return 0;
}
-static void spu_free_lscsa_std(struct spu_state *csa)
+void spu_free_lscsa(struct spu_state *csa)
{
/* Clear reserved bit before vfree. */
unsigned char *p;
@@ -61,123 +61,3 @@ static void spu_free_lscsa_std(struct spu_state *csa)
vfree(csa->lscsa);
}
-
-#ifdef CONFIG_SPU_FS_64K_LS
-
-#define SPU_64K_PAGE_SHIFT 16
-#define SPU_64K_PAGE_ORDER (SPU_64K_PAGE_SHIFT - PAGE_SHIFT)
-#define SPU_64K_PAGE_COUNT (1ul << SPU_64K_PAGE_ORDER)
-
-int spu_alloc_lscsa(struct spu_state *csa)
-{
- struct page **pgarray;
- unsigned char *p;
- int i, j, n_4k;
-
- /* Check availability of 64K pages */
- if (!spu_64k_pages_available())
- goto fail;
-
- csa->use_big_pages = 1;
-
- pr_debug("spu_alloc_lscsa(csa=0x%p), trying to allocate 64K pages\n",
- csa);
-
- /* First try to allocate our 64K pages. We need 5 of them
- * with the current implementation. In the future, we should try
- * to separate the lscsa with the actual local store image, thus
- * allowing us to require only 4 64K pages per context
- */
- for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) {
- /* XXX This is likely to fail, we should use a special pool
- * similar to what hugetlbfs does.
- */
- csa->lscsa_pages[i] = alloc_pages(GFP_KERNEL,
- SPU_64K_PAGE_ORDER);
- if (csa->lscsa_pages[i] == NULL)
- goto fail;
- }
-
- pr_debug(" success ! creating vmap...\n");
-
- /* Now we need to create a vmalloc mapping of these for the kernel
- * and SPU context switch code to use. Currently, we stick to a
- * normal kernel vmalloc mapping, which in our case will be 4K
- */
- n_4k = SPU_64K_PAGE_COUNT * SPU_LSCSA_NUM_BIG_PAGES;
- pgarray = kmalloc(sizeof(struct page *) * n_4k, GFP_KERNEL);
- if (pgarray == NULL)
- goto fail;
- for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
- for (j = 0; j < SPU_64K_PAGE_COUNT; j++)
- /* We assume all the struct page's are contiguous
- * which should be hopefully the case for an order 4
- * allocation..
- */
- pgarray[i * SPU_64K_PAGE_COUNT + j] =
- csa->lscsa_pages[i] + j;
- csa->lscsa = vmap(pgarray, n_4k, VM_USERMAP, PAGE_KERNEL);
- kfree(pgarray);
- if (csa->lscsa == NULL)
- goto fail;
-
- memset(csa->lscsa, 0, sizeof(struct spu_lscsa));
-
- /* Set LS pages reserved to allow for user-space mapping.
- *
- * XXX isn't that a bit obsolete ? I think we should just
- * make sure the page count is high enough. Anyway, won't harm
- * for now
- */
- for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
- SetPageReserved(vmalloc_to_page(p));
-
- pr_debug(" all good !\n");
-
- return 0;
-fail:
- pr_debug("spufs: failed to allocate lscsa 64K pages, falling back\n");
- spu_free_lscsa(csa);
- return spu_alloc_lscsa_std(csa);
-}
-
-void spu_free_lscsa(struct spu_state *csa)
-{
- unsigned char *p;
- int i;
-
- if (!csa->use_big_pages) {
- spu_free_lscsa_std(csa);
- return;
- }
- csa->use_big_pages = 0;
-
- if (csa->lscsa == NULL)
- goto free_pages;
-
- for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
- ClearPageReserved(vmalloc_to_page(p));
-
- vunmap(csa->lscsa);
- csa->lscsa = NULL;
-
- free_pages:
-
- for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
- if (csa->lscsa_pages[i])
- __free_pages(csa->lscsa_pages[i], SPU_64K_PAGE_ORDER);
-}
-
-#else /* CONFIG_SPU_FS_64K_LS */
-
-int spu_alloc_lscsa(struct spu_state *csa)
-{
- return spu_alloc_lscsa_std(csa);
-}
-
-void spu_free_lscsa(struct spu_state *csa)
-{
- spu_free_lscsa_std(csa);
-}
-
-#endif /* !defined(CONFIG_SPU_FS_64K_LS) */
diff --git a/kernel/arch/powerpc/platforms/chrp/setup.c b/kernel/arch/powerpc/platforms/chrp/setup.c
index 15ebc4e8a..987d1b8d6 100644
--- a/kernel/arch/powerpc/platforms/chrp/setup.c
+++ b/kernel/arch/powerpc/platforms/chrp/setup.c
@@ -363,7 +363,7 @@ void __init chrp_setup_arch(void)
if (ppc_md.progress) ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0x0);
}
-static void chrp_8259_cascade(unsigned int irq, struct irq_desc *desc)
+static void chrp_8259_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int cascade_irq = i8259_irq();
diff --git a/kernel/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/kernel/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index 4cde8e7da..b7866e014 100644
--- a/kernel/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/kernel/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -108,7 +108,8 @@ static int flipper_pic_map(struct irq_domain *h, unsigned int virq,
return 0;
}
-static int flipper_pic_match(struct irq_domain *h, struct device_node *np)
+static int flipper_pic_match(struct irq_domain *h, struct device_node *np,
+ enum irq_domain_bus_token bus_token)
{
return 1;
}
diff --git a/kernel/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/kernel/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index c269caee5..9b7975706 100644
--- a/kernel/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/kernel/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -120,11 +120,10 @@ static unsigned int __hlwd_pic_get_irq(struct irq_domain *h)
return irq_linear_revmap(h, irq);
}
-static void hlwd_pic_irq_cascade(unsigned int cascade_virq,
- struct irq_desc *desc)
+static void hlwd_pic_irq_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
- struct irq_domain *irq_domain = irq_get_handler_data(cascade_virq);
+ struct irq_domain *irq_domain = irq_desc_get_handler_data(desc);
unsigned int virq;
raw_spin_lock(&desc->lock);
diff --git a/kernel/arch/powerpc/platforms/embedded6xx/mvme5100.c b/kernel/arch/powerpc/platforms/embedded6xx/mvme5100.c
index 161330317..8f65aa374 100644
--- a/kernel/arch/powerpc/platforms/embedded6xx/mvme5100.c
+++ b/kernel/arch/powerpc/platforms/embedded6xx/mvme5100.c
@@ -42,7 +42,7 @@
static phys_addr_t pci_membase;
static u_char *restart;
-static void mvme5100_8259_cascade(unsigned int irq, struct irq_desc *desc)
+static void mvme5100_8259_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int cascade_irq = i8259_irq();
diff --git a/kernel/arch/powerpc/platforms/maple/Kconfig b/kernel/arch/powerpc/platforms/maple/Kconfig
index 1ea621a94..e359d0db0 100644
--- a/kernel/arch/powerpc/platforms/maple/Kconfig
+++ b/kernel/arch/powerpc/platforms/maple/Kconfig
@@ -1,5 +1,5 @@
config PPC_MAPLE
- depends on PPC64 && PPC_BOOK3S
+ depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
bool "Maple 970FX Evaluation Board"
select PCI
select MPIC
diff --git a/kernel/arch/powerpc/platforms/pasemi/Kconfig b/kernel/arch/powerpc/platforms/pasemi/Kconfig
index a2aeb327d..00d4b28cb 100644
--- a/kernel/arch/powerpc/platforms/pasemi/Kconfig
+++ b/kernel/arch/powerpc/platforms/pasemi/Kconfig
@@ -1,5 +1,5 @@
config PPC_PASEMI
- depends on PPC64 && PPC_BOOK3S
+ depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
bool "PA Semi SoC-based platforms"
default n
select MPIC
diff --git a/kernel/arch/powerpc/platforms/pasemi/Makefile b/kernel/arch/powerpc/platforms/pasemi/Makefile
index 8e8d4cae5..60b4e0fd9 100644
--- a/kernel/arch/powerpc/platforms/pasemi/Makefile
+++ b/kernel/arch/powerpc/platforms/pasemi/Makefile
@@ -1,2 +1,3 @@
obj-y += setup.o pci.o time.o idle.o powersave.o iommu.o dma_lib.o misc.o
obj-$(CONFIG_PPC_PASEMI_MDIO) += gpio_mdio.o
+obj-$(CONFIG_PCI_MSI) += msi.o
diff --git a/kernel/arch/powerpc/platforms/pasemi/iommu.c b/kernel/arch/powerpc/platforms/pasemi/iommu.c
index b8f567b2e..c929644e7 100644
--- a/kernel/arch/powerpc/platforms/pasemi/iommu.c
+++ b/kernel/arch/powerpc/platforms/pasemi/iommu.c
@@ -134,6 +134,10 @@ static void iobmap_free(struct iommu_table *tbl, long index,
}
}
+static struct iommu_table_ops iommu_table_iobmap_ops = {
+ .set = iobmap_build,
+ .clear = iobmap_free
+};
static void iommu_table_iobmap_setup(void)
{
@@ -153,6 +157,7 @@ static void iommu_table_iobmap_setup(void)
* Should probably be 8 (64 bytes)
*/
iommu_table_iobmap.it_blocksize = 4;
+ iommu_table_iobmap.it_ops = &iommu_table_iobmap_ops;
iommu_init_table(&iommu_table_iobmap, 0);
pr_debug(" <- %s\n", __func__);
}
@@ -252,8 +257,6 @@ void __init iommu_init_early_pasemi(void)
pasemi_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pasemi;
pasemi_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pasemi;
- ppc_md.tce_build = iobmap_build;
- ppc_md.tce_free = iobmap_free;
set_pci_dma_ops(&dma_iommu_ops);
}
diff --git a/kernel/arch/powerpc/platforms/pasemi/msi.c b/kernel/arch/powerpc/platforms/pasemi/msi.c
new file mode 100644
index 000000000..d9af76342
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/pasemi/msi.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2007, Olof Johansson, PA Semi
+ *
+ * Based on arch/powerpc/sysdev/mpic_u3msi.c:
+ *
+ * Copyright 2006, Segher Boessenkool, IBM Corporation.
+ * Copyright 2006-2007, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ */
+
+#include <linux/irq.h>
+#include <linux/msi.h>
+#include <asm/mpic.h>
+#include <asm/prom.h>
+#include <asm/hw_irq.h>
+#include <asm/ppc-pci.h>
+#include <asm/msi_bitmap.h>
+
+#include <sysdev/mpic.h>
+
+/* Allocate 16 interrupts per device, to give an alignment of 16,
+ * since that's the size of the grouping w.r.t. affinity. If someone
+ * needs more than 32 MSI's down the road we'll have to rethink this,
+ * but it should be OK for now.
+ */
+#define ALLOC_CHUNK 16
+
+#define PASEMI_MSI_ADDR 0xfc080000
+
+/* A bit ugly, can we get this from the pci_dev somehow? */
+static struct mpic *msi_mpic;
+
+
+static void mpic_pasemi_msi_mask_irq(struct irq_data *data)
+{
+ pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq);
+ pci_msi_mask_irq(data);
+ mpic_mask_irq(data);
+}
+
+static void mpic_pasemi_msi_unmask_irq(struct irq_data *data)
+{
+ pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq);
+ mpic_unmask_irq(data);
+ pci_msi_unmask_irq(data);
+}
+
+static struct irq_chip mpic_pasemi_msi_chip = {
+ .irq_shutdown = mpic_pasemi_msi_mask_irq,
+ .irq_mask = mpic_pasemi_msi_mask_irq,
+ .irq_unmask = mpic_pasemi_msi_unmask_irq,
+ .irq_eoi = mpic_end_irq,
+ .irq_set_type = mpic_set_irq_type,
+ .irq_set_affinity = mpic_set_affinity,
+ .name = "PASEMI-MSI",
+};
+
+static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev)
+{
+ struct msi_desc *entry;
+ irq_hw_number_t hwirq;
+
+ pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev);
+
+ for_each_pci_msi_entry(entry, pdev) {
+ if (entry->irq == NO_IRQ)
+ continue;
+
+ hwirq = virq_to_hw(entry->irq);
+ irq_set_msi_desc(entry->irq, NULL);
+ irq_dispose_mapping(entry->irq);
+ msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, ALLOC_CHUNK);
+ }
+
+ return;
+}
+
+static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+ unsigned int virq;
+ struct msi_desc *entry;
+ struct msi_msg msg;
+ int hwirq;
+
+ if (type == PCI_CAP_ID_MSIX)
+ pr_debug("pasemi_msi: MSI-X untested, trying anyway\n");
+ pr_debug("pasemi_msi_setup_msi_irqs, pdev %p nvec %d type %d\n",
+ pdev, nvec, type);
+
+ msg.address_hi = 0;
+ msg.address_lo = PASEMI_MSI_ADDR;
+
+ for_each_pci_msi_entry(entry, pdev) {
+ /* Allocate 16 interrupts for now, since that's the grouping for
+ * affinity. This can be changed later if it turns out 32 is too
+ * few MSIs for someone, but restrictions will apply to how the
+ * sources can be changed independently.
+ */
+ hwirq = msi_bitmap_alloc_hwirqs(&msi_mpic->msi_bitmap,
+ ALLOC_CHUNK);
+ if (hwirq < 0) {
+ pr_debug("pasemi_msi: failed allocating hwirq\n");
+ return hwirq;
+ }
+
+ virq = irq_create_mapping(msi_mpic->irqhost, hwirq);
+ if (virq == NO_IRQ) {
+ pr_debug("pasemi_msi: failed mapping hwirq 0x%x\n",
+ hwirq);
+ msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq,
+ ALLOC_CHUNK);
+ return -ENOSPC;
+ }
+
+ /* Vector on MSI is really an offset, the hardware adds
+ * it to the value written at the magic address. So set
+ * it to 0 to remain sane.
+ */
+ mpic_set_vector(virq, 0);
+
+ irq_set_msi_desc(virq, entry);
+ irq_set_chip(virq, &mpic_pasemi_msi_chip);
+ irq_set_irq_type(virq, IRQ_TYPE_EDGE_RISING);
+
+ pr_debug("pasemi_msi: allocated virq 0x%x (hw 0x%x) " \
+ "addr 0x%x\n", virq, hwirq, msg.address_lo);
+
+ /* Likewise, the device writes [0...511] into the target
+ * register to generate MSI [512...1023]
+ */
+ msg.data = hwirq-0x200;
+ pci_write_msi_msg(virq, &msg);
+ }
+
+ return 0;
+}
+
+int mpic_pasemi_msi_init(struct mpic *mpic)
+{
+ int rc;
+ struct pci_controller *phb;
+ struct device_node *of_node;
+
+ of_node = irq_domain_get_of_node(mpic->irqhost);
+ if (!of_node ||
+ !of_device_is_compatible(of_node,
+ "pasemi,pwrficient-openpic"))
+ return -ENODEV;
+
+ rc = mpic_msi_init_allocator(mpic);
+ if (rc) {
+ pr_debug("pasemi_msi: Error allocating bitmap!\n");
+ return rc;
+ }
+
+ pr_debug("pasemi_msi: Registering PA Semi MPIC MSI callbacks\n");
+
+ msi_mpic = mpic;
+ list_for_each_entry(phb, &hose_list, list_node) {
+ WARN_ON(phb->controller_ops.setup_msi_irqs);
+ phb->controller_ops.setup_msi_irqs = pasemi_msi_setup_msi_irqs;
+ phb->controller_ops.teardown_msi_irqs = pasemi_msi_teardown_msi_irqs;
+ }
+
+ return 0;
+}
diff --git a/kernel/arch/powerpc/platforms/powermac/Kconfig b/kernel/arch/powerpc/platforms/powermac/Kconfig
index 607124bae..43c606268 100644
--- a/kernel/arch/powerpc/platforms/powermac/Kconfig
+++ b/kernel/arch/powerpc/platforms/powermac/Kconfig
@@ -1,6 +1,6 @@
config PPC_PMAC
bool "Apple PowerMac based machines"
- depends on PPC_BOOK3S
+ depends on PPC_BOOK3S && CPU_BIG_ENDIAN
select MPIC
select PCI
select PPC_INDIRECT_PCI if PPC32
diff --git a/kernel/arch/powerpc/platforms/powermac/pic.c b/kernel/arch/powerpc/platforms/powermac/pic.c
index 59cfc9d63..6f4f8b060 100644
--- a/kernel/arch/powerpc/platforms/powermac/pic.c
+++ b/kernel/arch/powerpc/platforms/powermac/pic.c
@@ -268,7 +268,8 @@ static struct irqaction gatwick_cascade_action = {
.name = "cascade",
};
-static int pmac_pic_host_match(struct irq_domain *h, struct device_node *node)
+static int pmac_pic_host_match(struct irq_domain *h, struct device_node *node,
+ enum irq_domain_bus_token bus_token)
{
/* We match all, we don't always have a node anyway */
return 1;
diff --git a/kernel/arch/powerpc/platforms/powernv/Kconfig b/kernel/arch/powerpc/platforms/powernv/Kconfig
index 4b044d8cb..604190cab 100644
--- a/kernel/arch/powerpc/platforms/powernv/Kconfig
+++ b/kernel/arch/powerpc/platforms/powernv/Kconfig
@@ -19,3 +19,10 @@ config PPC_POWERNV
select CPU_FREQ_GOV_CONSERVATIVE
select PPC_DOORBELL
default y
+
+config OPAL_PRD
+ tristate 'OPAL PRD driver'
+ depends on PPC_POWERNV
+ help
+ This enables the opal-prd driver, a facility to run processor
+ recovery diagnostics on OpenPower machines
diff --git a/kernel/arch/powerpc/platforms/powernv/Makefile b/kernel/arch/powerpc/platforms/powernv/Makefile
index 33e44f372..b9de7ef48 100644
--- a/kernel/arch/powerpc/platforms/powernv/Makefile
+++ b/kernel/arch/powerpc/platforms/powernv/Makefile
@@ -1,7 +1,8 @@
-obj-y += setup.o opal-wrappers.o opal.o opal-async.o
+obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o
obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
-obj-y += opal-msglog.o opal-hmi.o opal-power.o
+obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
+obj-y += opal-kmsg.o
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o
@@ -9,3 +10,4 @@ obj-$(CONFIG_EEH) += eeh-powernv.o
obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o
+obj-$(CONFIG_OPAL_PRD) += opal-prd.o
diff --git a/kernel/arch/powerpc/platforms/powernv/eeh-powernv.c b/kernel/arch/powerpc/platforms/powernv/eeh-powernv.c
index ce738ab3d..2ba602591 100644
--- a/kernel/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/kernel/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -16,6 +16,7 @@
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/init.h>
+#include <linux/interrupt.h>
#include <linux/list.h>
#include <linux/msi.h>
#include <linux/of.h>
@@ -40,18 +41,13 @@
#include "pci.h"
static bool pnv_eeh_nb_init = false;
+static int eeh_event_irq = -EINVAL;
-/**
- * pnv_eeh_init - EEH platform dependent initialization
- *
- * EEH platform dependent initialization on powernv
- */
static int pnv_eeh_init(void)
{
struct pci_controller *hose;
struct pnv_phb *phb;
- /* We require OPALv3 */
if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
pr_warn("%s: OPALv3 is required !\n",
__func__);
@@ -75,9 +71,9 @@ static int pnv_eeh_init(void)
/*
* PE#0 should be regarded as valid by EEH core
* if it's not the reserved one. Currently, we
- * have the reserved PE#0 and PE#127 for PHB3
+ * have the reserved PE#255 and PE#127 for PHB3
* and P7IOC separately. So we should regard
- * PE#0 as valid for P7IOC.
+ * PE#0 as valid for PHB3 and P7IOC.
*/
if (phb->ioda.reserved_pe != 0)
eeh_add_flag(EEH_VALID_PE_ZERO);
@@ -88,34 +84,22 @@ static int pnv_eeh_init(void)
return 0;
}
-static int pnv_eeh_event(struct notifier_block *nb,
- unsigned long events, void *change)
+static irqreturn_t pnv_eeh_event(int irq, void *data)
{
- uint64_t changed_evts = (uint64_t)change;
-
/*
- * We simply send special EEH event if EEH has
- * been enabled, or clear pending events in
- * case that we enable EEH soon
+ * We simply send a special EEH event if EEH has been
+ * enabled. We don't care about EEH events until we've
+ * finished processing the outstanding ones. Event processing
+ * gets unmasked in next_error() if EEH is enabled.
*/
- if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
- !(events & OPAL_EVENT_PCI_ERROR))
- return 0;
+ disable_irq_nosync(irq);
if (eeh_enabled())
eeh_send_failure_event(NULL);
- else
- opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
- return 0;
+ return IRQ_HANDLED;
}
-static struct notifier_block pnv_eeh_nb = {
- .notifier_call = pnv_eeh_event,
- .next = NULL,
- .priority = 0
-};
-
#ifdef CONFIG_DEBUG_FS
static ssize_t pnv_eeh_ei_write(struct file *filp,
const char __user *user_buf,
@@ -237,16 +221,28 @@ static int pnv_eeh_post_init(void)
/* Register OPAL event notifier */
if (!pnv_eeh_nb_init) {
- ret = opal_notifier_register(&pnv_eeh_nb);
- if (ret) {
- pr_warn("%s: Can't register OPAL event notifier (%d)\n",
- __func__, ret);
+ eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
+ if (eeh_event_irq < 0) {
+ pr_err("%s: Can't register OPAL event interrupt (%d)\n",
+ __func__, eeh_event_irq);
+ return eeh_event_irq;
+ }
+
+ ret = request_irq(eeh_event_irq, pnv_eeh_event,
+ IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL);
+ if (ret < 0) {
+ irq_dispose_mapping(eeh_event_irq);
+ pr_err("%s: Can't request OPAL event interrupt (%d)\n",
+ __func__, eeh_event_irq);
return ret;
}
pnv_eeh_nb_init = true;
}
+ if (!eeh_enabled())
+ disable_irq(eeh_event_irq);
+
list_for_each_entry(hose, &hose_list, list_node) {
phb = hose->private_data;
@@ -282,33 +278,23 @@ static int pnv_eeh_post_init(void)
#endif /* CONFIG_DEBUG_FS */
}
-
return ret;
}
-static int pnv_eeh_cap_start(struct pci_dn *pdn)
+static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap)
{
- u32 status;
+ int pos = PCI_CAPABILITY_LIST;
+ int cnt = 48; /* Maximal number of capabilities */
+ u32 status, id;
if (!pdn)
return 0;
+ /* Check if the device supports capabilities */
pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status);
if (!(status & PCI_STATUS_CAP_LIST))
return 0;
- return PCI_CAPABILITY_LIST;
-}
-
-static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap)
-{
- int pos = pnv_eeh_cap_start(pdn);
- int cnt = 48; /* Maximal number of capabilities */
- u32 id;
-
- if (!pos)
- return 0;
-
while (cnt--) {
pnv_pci_cfg_read(pdn, pos, 1, &pos);
if (pos < 0x40)
@@ -441,11 +427,14 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
* that PE to block its config space.
*
* Broadcom Austin 4-ports NICs (14e4:1657)
+ * Broadcom Shiner 4-ports 1G NICs (14e4:168a)
* Broadcom Shiner 2-ports 10G NICs (14e4:168e)
*/
if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
pdn->device_id == 0x1657) ||
(pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+ pdn->device_id == 0x168a) ||
+ (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
pdn->device_id == 0x168e))
edev->pe->state |= EEH_PE_CFG_RESTRICTED;
@@ -455,9 +444,12 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
* PCI devices of the PE are expected to be removed prior
* to PE reset.
*/
- if (!edev->pe->bus)
+ if (!(edev->pe->state & EEH_PE_PRI_BUS)) {
edev->pe->bus = pci_find_bus(hose->global_number,
pdn->busno);
+ if (edev->pe->bus)
+ edev->pe->state |= EEH_PE_PRI_BUS;
+ }
/*
* Enable EEH explicitly so that we will do EEH check
@@ -485,10 +477,9 @@ static int pnv_eeh_set_option(struct eeh_pe *pe, int option)
struct pci_controller *hose = pe->phb;
struct pnv_phb *phb = hose->private_data;
bool freeze_pe = false;
- int opt, ret = 0;
+ int opt;
s64 rc;
- /* Sanity check on option */
switch (option) {
case EEH_OPT_DISABLE:
return -EPERM;
@@ -509,38 +500,37 @@ static int pnv_eeh_set_option(struct eeh_pe *pe, int option)
return -EINVAL;
}
- /* If PHB supports compound PE, to handle it */
+ /* Freeze master and slave PEs if PHB supports compound PEs */
if (freeze_pe) {
if (phb->freeze_pe) {
phb->freeze_pe(phb, pe->addr);
- } else {
- rc = opal_pci_eeh_freeze_set(phb->opal_id,
- pe->addr, opt);
- if (rc != OPAL_SUCCESS) {
- pr_warn("%s: Failure %lld freezing "
- "PHB#%x-PE#%x\n",
- __func__, rc,
- phb->hose->global_number, pe->addr);
- ret = -EIO;
- }
+ return 0;
}
- } else {
- if (phb->unfreeze_pe) {
- ret = phb->unfreeze_pe(phb, pe->addr, opt);
- } else {
- rc = opal_pci_eeh_freeze_clear(phb->opal_id,
- pe->addr, opt);
- if (rc != OPAL_SUCCESS) {
- pr_warn("%s: Failure %lld enable %d "
- "for PHB#%x-PE#%x\n",
- __func__, rc, option,
- phb->hose->global_number, pe->addr);
- ret = -EIO;
- }
+
+ rc = opal_pci_eeh_freeze_set(phb->opal_id, pe->addr, opt);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
+ __func__, rc, phb->hose->global_number,
+ pe->addr);
+ return -EIO;
}
+
+ return 0;
}
- return ret;
+ /* Unfreeze master and slave PEs if PHB supports */
+ if (phb->unfreeze_pe)
+ return phb->unfreeze_pe(phb, pe->addr, opt);
+
+ rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe->addr, opt);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n",
+ __func__, rc, option, phb->hose->global_number,
+ pe->addr);
+ return -EIO;
+ }
+
+ return 0;
}
/**
@@ -979,7 +969,7 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
/**
* pnv_eeh_wait_state - Wait for PE state
* @pe: EEH PE
- * @max_wait: maximal period in microsecond
+ * @max_wait: maximal period in millisecond
*
* Wait for the state of associated PE. It might take some time
* to retrieve the PE's state.
@@ -1000,13 +990,13 @@ static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
if (ret != EEH_STATE_UNAVAILABLE)
return ret;
- max_wait -= mwait;
if (max_wait <= 0) {
pr_warn("%s: Timeout getting PE#%x's state (%d)\n",
__func__, pe->addr, max_wait);
return EEH_STATE_NOT_SUPPORT;
}
+ max_wait -= mwait;
msleep(mwait);
}
@@ -1063,7 +1053,6 @@ static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
struct pnv_phb *phb = hose->private_data;
s64 rc;
- /* Sanity check on error type */
if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
pr_warn("%s: Invalid error type %d\n",
@@ -1303,12 +1292,10 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
int state, ret = EEH_NEXT_ERR_NONE;
/*
- * While running here, it's safe to purge the event queue.
- * And we should keep the cached OPAL notifier event sychronized
- * between the kernel and firmware.
+ * While running here, it's safe to purge the event queue. The
+ * event should still be masked.
*/
eeh_remove_event(NULL, false);
- opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
list_for_each_entry(hose, &hose_list, list_node) {
/*
@@ -1394,11 +1381,19 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
*/
if (pnv_eeh_get_pe(hose,
be64_to_cpu(frozen_pe_no), pe)) {
- /* Try best to clear it */
pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
- hose->global_number, frozen_pe_no);
+ hose->global_number, be64_to_cpu(frozen_pe_no));
pr_info("EEH: PHB location: %s\n",
eeh_pe_loc_get(phb_pe));
+
+ /* Dump PHB diag-data */
+ rc = opal_pci_get_phb_diag_data2(phb->opal_id,
+ phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
+ if (rc == OPAL_SUCCESS)
+ pnv_pci_dump_phb_diag_data(hose,
+ phb->diag.blob);
+
+ /* Try best to clear it */
opal_pci_eeh_freeze_clear(phb->opal_id,
frozen_pe_no,
OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
@@ -1477,6 +1472,10 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
break;
}
+ /* Unmask the event */
+ if (ret == EEH_NEXT_ERR_NONE && eeh_enabled())
+ enable_irq(eeh_event_irq);
+
return ret;
}
diff --git a/kernel/arch/powerpc/platforms/powernv/idle.c b/kernel/arch/powerpc/platforms/powernv/idle.c
new file mode 100644
index 000000000..59d735d2e
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/powernv/idle.c
@@ -0,0 +1,293 @@
+/*
+ * PowerNV cpuidle code
+ *
+ * Copyright 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+#include <asm/opal.h>
+#include <asm/cputhreads.h>
+#include <asm/cpuidle.h>
+#include <asm/code-patching.h>
+#include <asm/smp.h>
+
+#include "powernv.h"
+#include "subcore.h"
+
+static u32 supported_cpuidle_states;
+
+int pnv_save_sprs_for_winkle(void)
+{
+ int cpu;
+ int rc;
+
+ /*
+ * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
+ * all cpus at boot. Get these reg values of current cpu and use the
+ * same accross all cpus.
+ */
+ uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
+ uint64_t hid0_val = mfspr(SPRN_HID0);
+ uint64_t hid1_val = mfspr(SPRN_HID1);
+ uint64_t hid4_val = mfspr(SPRN_HID4);
+ uint64_t hid5_val = mfspr(SPRN_HID5);
+ uint64_t hmeer_val = mfspr(SPRN_HMEER);
+
+ for_each_possible_cpu(cpu) {
+ uint64_t pir = get_hard_smp_processor_id(cpu);
+ uint64_t hsprg0_val = (uint64_t)&paca[cpu];
+
+ /*
+ * HSPRG0 is used to store the cpu's pointer to paca. Hence last
+ * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0
+ * with 63rd bit set, so that when a thread wakes up at 0x100 we
+ * can use this bit to distinguish between fastsleep and
+ * deep winkle.
+ */
+ hsprg0_val |= 1;
+
+ rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+ if (rc != 0)
+ return rc;
+
+ /* HIDs are per core registers */
+ if (cpu_thread_in_core(cpu) == 0) {
+
+ rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
+ if (rc != 0)
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+static void pnv_alloc_idle_core_states(void)
+{
+ int i, j;
+ int nr_cores = cpu_nr_cores();
+ u32 *core_idle_state;
+
+ /*
+ * core_idle_state - First 8 bits track the idle state of each thread
+ * of the core. The 8th bit is the lock bit. Initially all thread bits
+ * are set. They are cleared when the thread enters deep idle state
+ * like sleep and winkle. Initially the lock bit is cleared.
+ * The lock bit has 2 purposes
+ * a. While the first thread is restoring core state, it prevents
+ * other threads in the core from switching to process context.
+ * b. While the last thread in the core is saving the core state, it
+ * prevents a different thread from waking up.
+ */
+ for (i = 0; i < nr_cores; i++) {
+ int first_cpu = i * threads_per_core;
+ int node = cpu_to_node(first_cpu);
+
+ core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
+ *core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
+
+ for (j = 0; j < threads_per_core; j++) {
+ int cpu = first_cpu + j;
+
+ paca[cpu].core_idle_state_ptr = core_idle_state;
+ paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
+ paca[cpu].thread_mask = 1 << j;
+ }
+ }
+
+ update_subcore_sibling_mask();
+
+ if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED)
+ pnv_save_sprs_for_winkle();
+}
+
+u32 pnv_get_supported_cpuidle_states(void)
+{
+ return supported_cpuidle_states;
+}
+EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
+
+
+static void pnv_fastsleep_workaround_apply(void *info)
+
+{
+ int rc;
+ int *err = info;
+
+ rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
+ OPAL_CONFIG_IDLE_APPLY);
+ if (rc)
+ *err = 1;
+}
+
+/*
+ * Used to store fastsleep workaround state
+ * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
+ * 1 - Workaround applied once, never undone.
+ */
+static u8 fastsleep_workaround_applyonce;
+
+static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
+}
+
+static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ cpumask_t primary_thread_mask;
+ int err;
+ u8 val;
+
+ if (kstrtou8(buf, 0, &val) || val != 1)
+ return -EINVAL;
+
+ if (fastsleep_workaround_applyonce == 1)
+ return count;
+
+ /*
+ * fastsleep_workaround_applyonce = 1 implies
+ * fastsleep workaround needs to be left in 'applied' state on all
+ * the cores. Do this by-
+ * 1. Patching out the call to 'undo' workaround in fastsleep exit path
+ * 2. Sending ipi to all the cores which have atleast one online thread
+ * 3. Patching out the call to 'apply' workaround in fastsleep entry
+ * path
+ * There is no need to send ipi to cores which have all threads
+ * offlined, as last thread of the core entering fastsleep or deeper
+ * state would have applied workaround.
+ */
+ err = patch_instruction(
+ (unsigned int *)pnv_fastsleep_workaround_at_exit,
+ PPC_INST_NOP);
+ if (err) {
+ pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit");
+ goto fail;
+ }
+
+ get_online_cpus();
+ primary_thread_mask = cpu_online_cores_map();
+ on_each_cpu_mask(&primary_thread_mask,
+ pnv_fastsleep_workaround_apply,
+ &err, 1);
+ put_online_cpus();
+ if (err) {
+ pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
+ goto fail;
+ }
+
+ err = patch_instruction(
+ (unsigned int *)pnv_fastsleep_workaround_at_entry,
+ PPC_INST_NOP);
+ if (err) {
+ pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry");
+ goto fail;
+ }
+
+ fastsleep_workaround_applyonce = 1;
+
+ return count;
+fail:
+ return -EIO;
+}
+
+static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
+ show_fastsleep_workaround_applyonce,
+ store_fastsleep_workaround_applyonce);
+
+static int __init pnv_init_idle_states(void)
+{
+ struct device_node *power_mgt;
+ int dt_idle_states;
+ u32 *flags;
+ int i;
+
+ supported_cpuidle_states = 0;
+
+ if (cpuidle_disable != IDLE_NO_OVERRIDE)
+ goto out;
+
+ if (!firmware_has_feature(FW_FEATURE_OPALv3))
+ goto out;
+
+ power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
+ if (!power_mgt) {
+ pr_warn("opal: PowerMgmt Node not found\n");
+ goto out;
+ }
+ dt_idle_states = of_property_count_u32_elems(power_mgt,
+ "ibm,cpu-idle-state-flags");
+ if (dt_idle_states < 0) {
+ pr_warn("cpuidle-powernv: no idle states found in the DT\n");
+ goto out;
+ }
+
+ flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL);
+ if (of_property_read_u32_array(power_mgt,
+ "ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
+ pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
+ goto out_free;
+ }
+
+ for (i = 0; i < dt_idle_states; i++)
+ supported_cpuidle_states |= flags[i];
+
+ if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
+ patch_instruction(
+ (unsigned int *)pnv_fastsleep_workaround_at_entry,
+ PPC_INST_NOP);
+ patch_instruction(
+ (unsigned int *)pnv_fastsleep_workaround_at_exit,
+ PPC_INST_NOP);
+ } else {
+ /*
+ * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
+ * workaround is needed to use fastsleep. Provide sysfs
+ * control to choose how this workaround has to be applied.
+ */
+ device_create_file(cpu_subsys.dev_root,
+ &dev_attr_fastsleep_workaround_applyonce);
+ }
+
+ pnv_alloc_idle_core_states();
+out_free:
+ kfree(flags);
+out:
+ return 0;
+}
+machine_subsys_initcall(powernv, pnv_init_idle_states);
diff --git a/kernel/arch/powerpc/platforms/powernv/opal-async.c b/kernel/arch/powerpc/platforms/powernv/opal-async.c
index 693b6cdac..bdc8c0c71 100644
--- a/kernel/arch/powerpc/platforms/powernv/opal-async.c
+++ b/kernel/arch/powerpc/platforms/powernv/opal-async.c
@@ -151,7 +151,7 @@ static struct notifier_block opal_async_comp_nb = {
.priority = 0,
};
-static int __init opal_async_comp_init(void)
+int __init opal_async_comp_init(void)
{
struct device_node *opal_node;
const __be32 *async;
@@ -205,4 +205,3 @@ out_opal_node:
out:
return err;
}
-machine_subsys_initcall(powernv, opal_async_comp_init);
diff --git a/kernel/arch/powerpc/platforms/powernv/opal-dump.c b/kernel/arch/powerpc/platforms/powernv/opal-dump.c
index 5aa9c1ce4..2ee96431f 100644
--- a/kernel/arch/powerpc/platforms/powernv/opal-dump.c
+++ b/kernel/arch/powerpc/platforms/powernv/opal-dump.c
@@ -15,6 +15,7 @@
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/delay.h>
+#include <linux/interrupt.h>
#include <asm/opal.h>
@@ -60,7 +61,7 @@ static ssize_t dump_type_show(struct dump_obj *dump_obj,
struct dump_attribute *attr,
char *buf)
{
-
+
return sprintf(buf, "0x%x %s\n", dump_obj->type,
dump_type_to_string(dump_obj->type));
}
@@ -363,7 +364,7 @@ static struct dump_obj *create_dump_obj(uint32_t id, size_t size,
return dump;
}
-static int process_dump(void)
+static irqreturn_t process_dump(int irq, void *data)
{
int rc;
uint32_t dump_id, dump_size, dump_type;
@@ -387,45 +388,13 @@ static int process_dump(void)
if (!dump)
return -1;
- return 0;
-}
-
-static void dump_work_fn(struct work_struct *work)
-{
- process_dump();
+ return IRQ_HANDLED;
}
-static DECLARE_WORK(dump_work, dump_work_fn);
-
-static void schedule_process_dump(void)
-{
- schedule_work(&dump_work);
-}
-
-/*
- * New dump available notification
- *
- * Once we get notification, we add sysfs entries for it.
- * We only fetch the dump on demand, and create sysfs asynchronously.
- */
-static int dump_event(struct notifier_block *nb,
- unsigned long events, void *change)
-{
- if (events & OPAL_EVENT_DUMP_AVAIL)
- schedule_process_dump();
-
- return 0;
-}
-
-static struct notifier_block dump_nb = {
- .notifier_call = dump_event,
- .next = NULL,
- .priority = 0
-};
-
void __init opal_platform_dump_init(void)
{
int rc;
+ int dump_irq;
/* ELOG not supported by firmware */
if (!opal_check_token(OPAL_DUMP_READ))
@@ -445,10 +414,19 @@ void __init opal_platform_dump_init(void)
return;
}
- rc = opal_notifier_register(&dump_nb);
+ dump_irq = opal_event_request(ilog2(OPAL_EVENT_DUMP_AVAIL));
+ if (!dump_irq) {
+ pr_err("%s: Can't register OPAL event irq (%d)\n",
+ __func__, dump_irq);
+ return;
+ }
+
+ rc = request_threaded_irq(dump_irq, NULL, process_dump,
+ IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+ "opal-dump", NULL);
if (rc) {
- pr_warn("%s: Can't register OPAL event notifier (%d)\n",
- __func__, rc);
+ pr_err("%s: Can't request OPAL event irq (%d)\n",
+ __func__, rc);
return;
}
diff --git a/kernel/arch/powerpc/platforms/powernv/opal-elog.c b/kernel/arch/powerpc/platforms/powernv/opal-elog.c
index 38ce757e5..37f959bf3 100644
--- a/kernel/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/kernel/arch/powerpc/platforms/powernv/opal-elog.c
@@ -10,6 +10,7 @@
*/
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/interrupt.h>
#include <linux/of.h>
#include <linux/slab.h>
#include <linux/sysfs.h>
@@ -236,7 +237,7 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type)
return elog;
}
-static void elog_work_fn(struct work_struct *work)
+static irqreturn_t elog_event(int irq, void *data)
{
__be64 size;
__be64 id;
@@ -250,7 +251,7 @@ static void elog_work_fn(struct work_struct *work)
rc = opal_get_elog_size(&id, &size, &type);
if (rc != OPAL_SUCCESS) {
pr_err("ELOG: OPAL log info read failed\n");
- return;
+ return IRQ_HANDLED;
}
elog_size = be64_to_cpu(size);
@@ -269,31 +270,16 @@ static void elog_work_fn(struct work_struct *work)
* entries.
*/
if (kset_find_obj(elog_kset, name))
- return;
+ return IRQ_HANDLED;
create_elog_obj(log_id, elog_size, elog_type);
-}
-
-static DECLARE_WORK(elog_work, elog_work_fn);
-static int elog_event(struct notifier_block *nb,
- unsigned long events, void *change)
-{
- /* check for error log event */
- if (events & OPAL_EVENT_ERROR_LOG_AVAIL)
- schedule_work(&elog_work);
- return 0;
+ return IRQ_HANDLED;
}
-static struct notifier_block elog_nb = {
- .notifier_call = elog_event,
- .next = NULL,
- .priority = 0
-};
-
int __init opal_elog_init(void)
{
- int rc = 0;
+ int rc = 0, irq;
/* ELOG not supported by firmware */
if (!opal_check_token(OPAL_ELOG_READ))
@@ -305,10 +291,18 @@ int __init opal_elog_init(void)
return -1;
}
- rc = opal_notifier_register(&elog_nb);
+ irq = opal_event_request(ilog2(OPAL_EVENT_ERROR_LOG_AVAIL));
+ if (!irq) {
+ pr_err("%s: Can't register OPAL event irq (%d)\n",
+ __func__, irq);
+ return irq;
+ }
+
+ rc = request_threaded_irq(irq, NULL, elog_event,
+ IRQF_TRIGGER_HIGH | IRQF_ONESHOT, "opal-elog", NULL);
if (rc) {
- pr_err("%s: Can't register OPAL event notifier (%d)\n",
- __func__, rc);
+ pr_err("%s: Can't request OPAL event irq (%d)\n",
+ __func__, rc);
return rc;
}
diff --git a/kernel/arch/powerpc/platforms/powernv/opal-hmi.c b/kernel/arch/powerpc/platforms/powernv/opal-hmi.c
index b322bfb51..d000f4e21 100644
--- a/kernel/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/kernel/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -35,9 +35,134 @@ struct OpalHmiEvtNode {
struct list_head list;
struct OpalHMIEvent hmi_evt;
};
+
+struct xstop_reason {
+ uint32_t xstop_reason;
+ const char *unit_failed;
+ const char *description;
+};
+
static LIST_HEAD(opal_hmi_evt_list);
static DEFINE_SPINLOCK(opal_hmi_evt_lock);
+static void print_core_checkstop_reason(const char *level,
+ struct OpalHMIEvent *hmi_evt)
+{
+ int i;
+ static const struct xstop_reason xstop_reason[] = {
+ { CORE_CHECKSTOP_IFU_REGFILE, "IFU",
+ "RegFile core check stop" },
+ { CORE_CHECKSTOP_IFU_LOGIC, "IFU", "Logic core check stop" },
+ { CORE_CHECKSTOP_PC_DURING_RECOV, "PC",
+ "Core checkstop during recovery" },
+ { CORE_CHECKSTOP_ISU_REGFILE, "ISU",
+ "RegFile core check stop (mapper error)" },
+ { CORE_CHECKSTOP_ISU_LOGIC, "ISU", "Logic core check stop" },
+ { CORE_CHECKSTOP_FXU_LOGIC, "FXU", "Logic core check stop" },
+ { CORE_CHECKSTOP_VSU_LOGIC, "VSU", "Logic core check stop" },
+ { CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE, "PC",
+ "Recovery in maintenance mode" },
+ { CORE_CHECKSTOP_LSU_REGFILE, "LSU",
+ "RegFile core check stop" },
+ { CORE_CHECKSTOP_PC_FWD_PROGRESS, "PC",
+ "Forward Progress Error" },
+ { CORE_CHECKSTOP_LSU_LOGIC, "LSU", "Logic core check stop" },
+ { CORE_CHECKSTOP_PC_LOGIC, "PC", "Logic core check stop" },
+ { CORE_CHECKSTOP_PC_HYP_RESOURCE, "PC",
+ "Hypervisor Resource error - core check stop" },
+ { CORE_CHECKSTOP_PC_HANG_RECOV_FAILED, "PC",
+ "Hang Recovery Failed (core check stop)" },
+ { CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED, "PC",
+ "Ambiguous Hang Detected (unknown source)" },
+ { CORE_CHECKSTOP_PC_DEBUG_TRIG_ERR_INJ, "PC",
+ "Debug Trigger Error inject" },
+ { CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ, "PC",
+ "Hypervisor check stop via SPRC/SPRD" },
+ };
+
+ /* Validity check */
+ if (!hmi_evt->u.xstop_error.xstop_reason) {
+ printk("%s Unknown Core check stop.\n", level);
+ return;
+ }
+
+ printk("%s CPU PIR: %08x\n", level,
+ be32_to_cpu(hmi_evt->u.xstop_error.u.pir));
+ for (i = 0; i < ARRAY_SIZE(xstop_reason); i++)
+ if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) &
+ xstop_reason[i].xstop_reason)
+ printk("%s [Unit: %-3s] %s\n", level,
+ xstop_reason[i].unit_failed,
+ xstop_reason[i].description);
+}
+
+static void print_nx_checkstop_reason(const char *level,
+ struct OpalHMIEvent *hmi_evt)
+{
+ int i;
+ static const struct xstop_reason xstop_reason[] = {
+ { NX_CHECKSTOP_SHM_INVAL_STATE_ERR, "DMA & Engine",
+ "SHM invalid state error" },
+ { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1, "DMA & Engine",
+ "DMA invalid state error bit 15" },
+ { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2, "DMA & Engine",
+ "DMA invalid state error bit 16" },
+ { NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR, "DMA & Engine",
+ "Channel 0 invalid state error" },
+ { NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR, "DMA & Engine",
+ "Channel 1 invalid state error" },
+ { NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR, "DMA & Engine",
+ "Channel 2 invalid state error" },
+ { NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR, "DMA & Engine",
+ "Channel 3 invalid state error" },
+ { NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR, "DMA & Engine",
+ "Channel 4 invalid state error" },
+ { NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR, "DMA & Engine",
+ "Channel 5 invalid state error" },
+ { NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR, "DMA & Engine",
+ "Channel 6 invalid state error" },
+ { NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR, "DMA & Engine",
+ "Channel 7 invalid state error" },
+ { NX_CHECKSTOP_DMA_CRB_UE, "DMA & Engine",
+ "UE error on CRB(CSB address, CCB)" },
+ { NX_CHECKSTOP_DMA_CRB_SUE, "DMA & Engine",
+ "SUE error on CRB(CSB address, CCB)" },
+ { NX_CHECKSTOP_PBI_ISN_UE, "PowerBus Interface",
+ "CRB Kill ISN received while holding ISN with UE error" },
+ };
+
+ /* Validity check */
+ if (!hmi_evt->u.xstop_error.xstop_reason) {
+ printk("%s Unknown NX check stop.\n", level);
+ return;
+ }
+
+ printk("%s NX checkstop on CHIP ID: %x\n", level,
+ be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id));
+ for (i = 0; i < ARRAY_SIZE(xstop_reason); i++)
+ if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) &
+ xstop_reason[i].xstop_reason)
+ printk("%s [Unit: %-3s] %s\n", level,
+ xstop_reason[i].unit_failed,
+ xstop_reason[i].description);
+}
+
+static void print_checkstop_reason(const char *level,
+ struct OpalHMIEvent *hmi_evt)
+{
+ switch (hmi_evt->u.xstop_error.xstop_type) {
+ case CHECKSTOP_TYPE_CORE:
+ print_core_checkstop_reason(level, hmi_evt);
+ break;
+ case CHECKSTOP_TYPE_NX:
+ print_nx_checkstop_reason(level, hmi_evt);
+ break;
+ case CHECKSTOP_TYPE_UNKNOWN:
+ printk("%s Unknown Malfunction Alert.\n", level);
+ break;
+ }
+}
+
static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
{
const char *level, *sevstr, *error_info;
@@ -95,6 +220,13 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
(hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY))
printk("%s TFMR: %016llx\n", level,
be64_to_cpu(hmi_evt->tfmr));
+
+ if (hmi_evt->version < OpalHMIEvt_V2)
+ return;
+
+ /* OpalHMIEvt_V2 and above provides reason for malfunction alert. */
+ if (hmi_evt->type == OpalHMI_ERROR_MALFUNC_ALERT)
+ print_checkstop_reason(level, hmi_evt);
}
static void hmi_event_handler(struct work_struct *work)
@@ -103,6 +235,8 @@ static void hmi_event_handler(struct work_struct *work)
struct OpalHMIEvent *hmi_evt;
struct OpalHmiEvtNode *msg_node;
uint8_t disposition;
+ struct opal_msg msg;
+ int unrecoverable = 0;
spin_lock_irqsave(&opal_hmi_evt_lock, flags);
while (!list_empty(&opal_hmi_evt_list)) {
@@ -118,14 +252,53 @@ static void hmi_event_handler(struct work_struct *work)
/*
* Check if HMI event has been recovered or not. If not
- * then we can't continue, invoke panic.
+ * then kernel can't continue, we need to panic.
+ * But before we do that, display all the HMI event
+ * available on the list and set unrecoverable flag to 1.
*/
if (disposition != OpalHMI_DISPOSITION_RECOVERED)
- panic("Unrecoverable HMI exception");
+ unrecoverable = 1;
spin_lock_irqsave(&opal_hmi_evt_lock, flags);
}
spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
+
+ if (unrecoverable) {
+ int ret;
+
+ /* Pull all HMI events from OPAL before we panic. */
+ while (opal_get_msg(__pa(&msg), sizeof(msg)) == OPAL_SUCCESS) {
+ u32 type;
+
+ type = be32_to_cpu(msg.msg_type);
+
+ /* skip if not HMI event */
+ if (type != OPAL_MSG_HMI_EVT)
+ continue;
+
+ /* HMI event info starts from param[0] */
+ hmi_evt = (struct OpalHMIEvent *)&msg.params[0];
+ print_hmi_event_info(hmi_evt);
+ }
+
+ /*
+ * Unrecoverable HMI exception. We need to inform BMC/OCC
+ * about this error so that it can collect relevant data
+ * for error analysis before rebooting.
+ */
+ ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR,
+ "Unrecoverable HMI exception");
+ if (ret == OPAL_UNSUPPORTED) {
+ pr_emerg("Reboot type %d not supported\n",
+ OPAL_REBOOT_PLATFORM_ERROR);
+ }
+
+ /*
+ * Fall through and panic if opal_cec_reboot2() returns
+ * OPAL_UNSUPPORTED.
+ */
+ panic("Unrecoverable HMI exception");
+ }
}
static DECLARE_WORK(hmi_event_work, hmi_event_handler);
@@ -170,7 +343,7 @@ static struct notifier_block opal_hmi_handler_nb = {
.priority = 0,
};
-static int __init opal_hmi_handler_init(void)
+int __init opal_hmi_handler_init(void)
{
int ret;
@@ -186,4 +359,3 @@ static int __init opal_hmi_handler_init(void)
}
return 0;
}
-machine_subsys_initcall(powernv, opal_hmi_handler_init);
diff --git a/kernel/arch/powerpc/platforms/powernv/opal-irqchip.c b/kernel/arch/powerpc/platforms/powernv/opal-irqchip.c
new file mode 100644
index 000000000..e505223b4
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -0,0 +1,266 @@
+/*
+ * This file implements an irqchip for OPAL events. Whenever there is
+ * an interrupt that is handled by OPAL we get passed a list of events
+ * that Linux needs to do something about. These basically look like
+ * interrupts to Linux so we implement an irqchip to handle them.
+ *
+ * Copyright Alistair Popple, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+#include <linux/bitops.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/irq_work.h>
+
+#include <asm/machdep.h>
+#include <asm/opal.h>
+
+#include "powernv.h"
+
+/* Maximum number of events supported by OPAL firmware */
+#define MAX_NUM_EVENTS 64
+
+struct opal_event_irqchip {
+ struct irq_chip irqchip;
+ struct irq_domain *domain;
+ unsigned long mask;
+};
+static struct opal_event_irqchip opal_event_irqchip;
+
+static unsigned int opal_irq_count;
+static unsigned int *opal_irqs;
+
+static void opal_handle_irq_work(struct irq_work *work);
+static u64 last_outstanding_events;
+static struct irq_work opal_event_irq_work = {
+ .func = opal_handle_irq_work,
+};
+
+void opal_handle_events(uint64_t events)
+{
+ int virq, hwirq = 0;
+ u64 mask = opal_event_irqchip.mask;
+
+ if (!in_irq() && (events & mask)) {
+ last_outstanding_events = events;
+ irq_work_queue(&opal_event_irq_work);
+ return;
+ }
+
+ while (events & mask) {
+ hwirq = fls64(events) - 1;
+ if (BIT_ULL(hwirq) & mask) {
+ virq = irq_find_mapping(opal_event_irqchip.domain,
+ hwirq);
+ if (virq)
+ generic_handle_irq(virq);
+ }
+ events &= ~BIT_ULL(hwirq);
+ }
+}
+
+static void opal_event_mask(struct irq_data *d)
+{
+ clear_bit(d->hwirq, &opal_event_irqchip.mask);
+}
+
+static void opal_event_unmask(struct irq_data *d)
+{
+ __be64 events;
+
+ set_bit(d->hwirq, &opal_event_irqchip.mask);
+
+ opal_poll_events(&events);
+ last_outstanding_events = be64_to_cpu(events);
+
+ /*
+ * We can't just handle the events now with opal_handle_events().
+ * If we did we would deadlock when opal_event_unmask() is called from
+ * handle_level_irq() with the irq descriptor lock held, because
+ * calling opal_handle_events() would call generic_handle_irq() and
+ * then handle_level_irq() which would try to take the descriptor lock
+ * again. Instead queue the events for later.
+ */
+ if (last_outstanding_events & opal_event_irqchip.mask)
+ /* Need to retrigger the interrupt */
+ irq_work_queue(&opal_event_irq_work);
+}
+
+static int opal_event_set_type(struct irq_data *d, unsigned int flow_type)
+{
+ /*
+ * For now we only support level triggered events. The irq
+ * handler will be called continuously until the event has
+ * been cleared in OPAL.
+ */
+ if (flow_type != IRQ_TYPE_LEVEL_HIGH)
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct opal_event_irqchip opal_event_irqchip = {
+ .irqchip = {
+ .name = "OPAL EVT",
+ .irq_mask = opal_event_mask,
+ .irq_unmask = opal_event_unmask,
+ .irq_set_type = opal_event_set_type,
+ },
+ .mask = 0,
+};
+
+static int opal_event_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hwirq)
+{
+ irq_set_chip_data(irq, &opal_event_irqchip);
+ irq_set_chip_and_handler(irq, &opal_event_irqchip.irqchip,
+ handle_level_irq);
+
+ return 0;
+}
+
+static irqreturn_t opal_interrupt(int irq, void *data)
+{
+ __be64 events;
+
+ opal_handle_interrupt(virq_to_hw(irq), &events);
+ opal_handle_events(be64_to_cpu(events));
+
+ return IRQ_HANDLED;
+}
+
+static void opal_handle_irq_work(struct irq_work *work)
+{
+ opal_handle_events(last_outstanding_events);
+}
+
+static int opal_event_match(struct irq_domain *h, struct device_node *node,
+ enum irq_domain_bus_token bus_token)
+{
+ return irq_domain_get_of_node(h) == node;
+}
+
+static int opal_event_xlate(struct irq_domain *h, struct device_node *np,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+{
+ *out_hwirq = intspec[0];
+ *out_flags = IRQ_TYPE_LEVEL_HIGH;
+
+ return 0;
+}
+
+static const struct irq_domain_ops opal_event_domain_ops = {
+ .match = opal_event_match,
+ .map = opal_event_map,
+ .xlate = opal_event_xlate,
+};
+
+void opal_event_shutdown(void)
+{
+ unsigned int i;
+
+ /* First free interrupts, which will also mask them */
+ for (i = 0; i < opal_irq_count; i++) {
+ if (opal_irqs[i])
+ free_irq(opal_irqs[i], NULL);
+ opal_irqs[i] = 0;
+ }
+}
+
+int __init opal_event_init(void)
+{
+ struct device_node *dn, *opal_node;
+ const __be32 *irqs;
+ int i, irqlen, rc = 0;
+
+ opal_node = of_find_node_by_path("/ibm,opal");
+ if (!opal_node) {
+ pr_warn("opal: Node not found\n");
+ return -ENODEV;
+ }
+
+ /* If dn is NULL it means the domain won't be linked to a DT
+ * node so therefore irq_of_parse_and_map(...) wont work. But
+ * that shouldn't be problem because if we're running a
+ * version of skiboot that doesn't have the dn then the
+ * devices won't have the correct properties and will have to
+ * fall back to the legacy method (opal_event_request(...))
+ * anyway. */
+ dn = of_find_compatible_node(NULL, NULL, "ibm,opal-event");
+ opal_event_irqchip.domain = irq_domain_add_linear(dn, MAX_NUM_EVENTS,
+ &opal_event_domain_ops, &opal_event_irqchip);
+ of_node_put(dn);
+ if (!opal_event_irqchip.domain) {
+ pr_warn("opal: Unable to create irq domain\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* Get interrupt property */
+ irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
+ opal_irq_count = irqs ? (irqlen / 4) : 0;
+ pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count);
+
+ /* Install interrupt handlers */
+ opal_irqs = kcalloc(opal_irq_count, sizeof(*opal_irqs), GFP_KERNEL);
+ for (i = 0; irqs && i < opal_irq_count; i++, irqs++) {
+ unsigned int irq, virq;
+
+ /* Get hardware and virtual IRQ */
+ irq = be32_to_cpup(irqs);
+ virq = irq_create_mapping(NULL, irq);
+ if (virq == NO_IRQ) {
+ pr_warn("Failed to map irq 0x%x\n", irq);
+ continue;
+ }
+
+ /* Install interrupt handler */
+ rc = request_irq(virq, opal_interrupt, 0, "opal", NULL);
+ if (rc) {
+ irq_dispose_mapping(virq);
+ pr_warn("Error %d requesting irq %d (0x%x)\n",
+ rc, virq, irq);
+ continue;
+ }
+
+ /* Cache IRQ */
+ opal_irqs[i] = virq;
+ }
+
+out:
+ of_node_put(opal_node);
+ return rc;
+}
+machine_arch_initcall(powernv, opal_event_init);
+
+/**
+ * opal_event_request(unsigned int opal_event_nr) - Request an event
+ * @opal_event_nr: the opal event number to request
+ *
+ * This routine can be used to find the linux virq number which can
+ * then be passed to request_irq to assign a handler for a particular
+ * opal event. This should only be used by legacy devices which don't
+ * have proper device tree bindings. Most devices should use
+ * irq_of_parse_and_map() instead.
+ */
+int opal_event_request(unsigned int opal_event_nr)
+{
+ if (WARN_ON_ONCE(!opal_event_irqchip.domain))
+ return NO_IRQ;
+
+ return irq_create_mapping(opal_event_irqchip.domain, opal_event_nr);
+}
+EXPORT_SYMBOL(opal_event_request);
diff --git a/kernel/arch/powerpc/platforms/powernv/opal-kmsg.c b/kernel/arch/powerpc/platforms/powernv/opal-kmsg.c
new file mode 100644
index 000000000..6f1214d4d
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/powernv/opal-kmsg.c
@@ -0,0 +1,75 @@
+/*
+ * kmsg dumper that ensures the OPAL console fully flushes panic messages
+ *
+ * Author: Russell Currey <ruscur@russell.cc>
+ *
+ * Copyright 2015 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kmsg_dump.h>
+
+#include <asm/opal.h>
+#include <asm/opal-api.h>
+
+/*
+ * Console output is controlled by OPAL firmware. The kernel regularly calls
+ * OPAL_POLL_EVENTS, which flushes some console output. In a panic state,
+ * however, the kernel no longer calls OPAL_POLL_EVENTS and the panic message
+ * may not be completely printed. This function does not actually dump the
+ * message, it just ensures that OPAL completely flushes the console buffer.
+ */
+static void force_opal_console_flush(struct kmsg_dumper *dumper,
+ enum kmsg_dump_reason reason)
+{
+ int i;
+ int64_t ret;
+
+ /*
+ * Outside of a panic context the pollers will continue to run,
+ * so we don't need to do any special flushing.
+ */
+ if (reason != KMSG_DUMP_PANIC)
+ return;
+
+ if (opal_check_token(OPAL_CONSOLE_FLUSH)) {
+ ret = opal_console_flush(0);
+
+ if (ret == OPAL_UNSUPPORTED || ret == OPAL_PARAMETER)
+ return;
+
+ /* Incrementally flush until there's nothing left */
+ while (opal_console_flush(0) != OPAL_SUCCESS);
+ } else {
+ /*
+ * If OPAL_CONSOLE_FLUSH is not implemented in the firmware,
+ * the console can still be flushed by calling the polling
+ * function enough times to flush the buffer. We don't know
+ * how much output still needs to be flushed, but we can be
+ * generous since the kernel is in panic and doesn't need
+ * to do much else.
+ */
+ printk(KERN_NOTICE "opal: OPAL_CONSOLE_FLUSH missing.\n");
+ for (i = 0; i < 1024; i++) {
+ opal_poll_events(NULL);
+ }
+ }
+}
+
+static struct kmsg_dumper opal_kmsg_dumper = {
+ .dump = force_opal_console_flush
+};
+
+void __init opal_kmsg_init(void)
+{
+ int rc;
+
+ /* Add our dumper to the list */
+ rc = kmsg_dump_register(&opal_kmsg_dumper);
+ if (rc != 0)
+ pr_err("opal: kmsg_dump_register failed; returned %d\n", rc);
+}
diff --git a/kernel/arch/powerpc/platforms/powernv/opal-memory-errors.c b/kernel/arch/powerpc/platforms/powernv/opal-memory-errors.c
index 43db2136d..00a29432b 100644
--- a/kernel/arch/powerpc/platforms/powernv/opal-memory-errors.c
+++ b/kernel/arch/powerpc/platforms/powernv/opal-memory-errors.c
@@ -144,4 +144,4 @@ static int __init opal_mem_err_init(void)
}
return 0;
}
-machine_subsys_initcall(powernv, opal_mem_err_init);
+machine_device_initcall(powernv, opal_mem_err_init);
diff --git a/kernel/arch/powerpc/platforms/powernv/opal-power.c b/kernel/arch/powerpc/platforms/powernv/opal-power.c
index ac46c2c24..58dc33082 100644
--- a/kernel/arch/powerpc/platforms/powernv/opal-power.c
+++ b/kernel/arch/powerpc/platforms/powernv/opal-power.c
@@ -9,9 +9,12 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) "opal-power: " fmt
+
#include <linux/kernel.h>
#include <linux/reboot.h>
#include <linux/notifier.h>
+#include <linux/of.h>
#include <asm/opal.h>
#include <asm/machdep.h>
@@ -19,30 +22,116 @@
#define SOFT_OFF 0x00
#define SOFT_REBOOT 0x01
+/* Detect EPOW event */
+static bool detect_epow(void)
+{
+ u16 epow;
+ int i, rc;
+ __be16 epow_classes;
+ __be16 opal_epow_status[OPAL_SYSEPOW_MAX] = {0};
+
+ /*
+ * Check for EPOW event. Kernel sends supported EPOW classes info
+ * to OPAL. OPAL returns EPOW info along with classes present.
+ */
+ epow_classes = cpu_to_be16(OPAL_SYSEPOW_MAX);
+ rc = opal_get_epow_status(opal_epow_status, &epow_classes);
+ if (rc != OPAL_SUCCESS) {
+ pr_err("Failed to get EPOW event information\n");
+ return false;
+ }
+
+ /* Look for EPOW events present */
+ for (i = 0; i < be16_to_cpu(epow_classes); i++) {
+ epow = be16_to_cpu(opal_epow_status[i]);
+
+ /* Filter events which do not need shutdown. */
+ if (i == OPAL_SYSEPOW_POWER)
+ epow &= ~(OPAL_SYSPOWER_CHNG | OPAL_SYSPOWER_FAIL |
+ OPAL_SYSPOWER_INCL);
+ if (epow)
+ return true;
+ }
+
+ return false;
+}
+
+/* Check for existing EPOW, DPO events */
+static bool poweroff_pending(void)
+{
+ int rc;
+ __be64 opal_dpo_timeout;
+
+ /* Check for DPO event */
+ rc = opal_get_dpo_status(&opal_dpo_timeout);
+ if (rc == OPAL_SUCCESS) {
+ pr_info("Existing DPO event detected.\n");
+ return true;
+ }
+
+ /* Check for EPOW event */
+ if (detect_epow()) {
+ pr_info("Existing EPOW event detected.\n");
+ return true;
+ }
+
+ return false;
+}
+
+/* OPAL power-control events notifier */
static int opal_power_control_event(struct notifier_block *nb,
- unsigned long msg_type, void *msg)
+ unsigned long msg_type, void *msg)
{
- struct opal_msg *power_msg = msg;
uint64_t type;
- type = be64_to_cpu(power_msg->params[0]);
-
- switch (type) {
- case SOFT_REBOOT:
- pr_info("OPAL: reboot requested\n");
- orderly_reboot();
+ switch (msg_type) {
+ case OPAL_MSG_EPOW:
+ if (detect_epow()) {
+ pr_info("EPOW msg received. Powering off system\n");
+ orderly_poweroff(true);
+ }
break;
- case SOFT_OFF:
- pr_info("OPAL: poweroff requested\n");
+ case OPAL_MSG_DPO:
+ pr_info("DPO msg received. Powering off system\n");
orderly_poweroff(true);
break;
+ case OPAL_MSG_SHUTDOWN:
+ type = be64_to_cpu(((struct opal_msg *)msg)->params[0]);
+ switch (type) {
+ case SOFT_REBOOT:
+ pr_info("Reboot requested\n");
+ orderly_reboot();
+ break;
+ case SOFT_OFF:
+ pr_info("Poweroff requested\n");
+ orderly_poweroff(true);
+ break;
+ default:
+ pr_err("Unknown power-control type %llu\n", type);
+ }
+ break;
default:
- pr_err("OPAL: power control type unexpected %016llx\n", type);
+ pr_err("Unknown OPAL message type %lu\n", msg_type);
}
return 0;
}
+/* OPAL EPOW event notifier block */
+static struct notifier_block opal_epow_nb = {
+ .notifier_call = opal_power_control_event,
+ .next = NULL,
+ .priority = 0,
+};
+
+/* OPAL DPO event notifier block */
+static struct notifier_block opal_dpo_nb = {
+ .notifier_call = opal_power_control_event,
+ .next = NULL,
+ .priority = 0,
+};
+
+/* OPAL power-control event notifier block */
static struct notifier_block opal_power_control_nb = {
.notifier_call = opal_power_control_event,
.next = NULL,
@@ -51,16 +140,40 @@ static struct notifier_block opal_power_control_nb = {
static int __init opal_power_control_init(void)
{
- int ret;
+ int ret, supported = 0;
+ struct device_node *np;
+ /* Register OPAL power-control events notifier */
ret = opal_message_notifier_register(OPAL_MSG_SHUTDOWN,
- &opal_power_control_nb);
- if (ret) {
- pr_err("%s: Can't register OPAL event notifier (%d)\n",
- __func__, ret);
- return ret;
+ &opal_power_control_nb);
+ if (ret)
+ pr_err("Failed to register SHUTDOWN notifier, ret = %d\n", ret);
+
+ /* Determine OPAL EPOW, DPO support */
+ np = of_find_node_by_path("/ibm,opal/epow");
+ if (np) {
+ supported = of_device_is_compatible(np, "ibm,opal-v3-epow");
+ of_node_put(np);
}
+ if (!supported)
+ return 0;
+ pr_info("OPAL EPOW, DPO support detected.\n");
+
+ /* Register EPOW event notifier */
+ ret = opal_message_notifier_register(OPAL_MSG_EPOW, &opal_epow_nb);
+ if (ret)
+ pr_err("Failed to register EPOW notifier, ret = %d\n", ret);
+
+ /* Register DPO event notifier */
+ ret = opal_message_notifier_register(OPAL_MSG_DPO, &opal_dpo_nb);
+ if (ret)
+ pr_err("Failed to register DPO notifier, ret = %d\n", ret);
+
+ /* Check for any pending EPOW or DPO events. */
+ if (poweroff_pending())
+ orderly_poweroff(true);
+
return 0;
}
machine_subsys_initcall(powernv, opal_power_control_init);
diff --git a/kernel/arch/powerpc/platforms/powernv/opal-prd.c b/kernel/arch/powerpc/platforms/powernv/opal-prd.c
new file mode 100644
index 000000000..4ece8e40d
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/powernv/opal-prd.c
@@ -0,0 +1,448 @@
+/*
+ * OPAL Runtime Diagnostics interface driver
+ * Supported on POWERNV platform
+ *
+ * Copyright IBM Corporation 2015
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "opal-prd: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/poll.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/opal-prd.h>
+#include <asm/opal.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+
+/**
+ * The msg member must be at the end of the struct, as it's followed by the
+ * message data.
+ */
+struct opal_prd_msg_queue_item {
+ struct list_head list;
+ struct opal_prd_msg_header msg;
+};
+
+static struct device_node *prd_node;
+static LIST_HEAD(opal_prd_msg_queue);
+static DEFINE_SPINLOCK(opal_prd_msg_queue_lock);
+static DECLARE_WAIT_QUEUE_HEAD(opal_prd_msg_wait);
+static atomic_t prd_usage;
+
+static bool opal_prd_range_is_valid(uint64_t addr, uint64_t size)
+{
+ struct device_node *parent, *node;
+ bool found;
+
+ if (addr + size < addr)
+ return false;
+
+ parent = of_find_node_by_path("/reserved-memory");
+ if (!parent)
+ return false;
+
+ found = false;
+
+ for_each_child_of_node(parent, node) {
+ uint64_t range_addr, range_size, range_end;
+ const __be32 *addrp;
+ const char *label;
+
+ addrp = of_get_address(node, 0, &range_size, NULL);
+
+ range_addr = of_read_number(addrp, 2);
+ range_end = range_addr + range_size;
+
+ label = of_get_property(node, "ibm,prd-label", NULL);
+
+ /* PRD ranges need a label */
+ if (!label)
+ continue;
+
+ if (range_end <= range_addr)
+ continue;
+
+ if (addr >= range_addr && addr + size <= range_end) {
+ found = true;
+ of_node_put(node);
+ break;
+ }
+ }
+
+ of_node_put(parent);
+ return found;
+}
+
+static int opal_prd_open(struct inode *inode, struct file *file)
+{
+ /*
+ * Prevent multiple (separate) processes from concurrent interactions
+ * with the FW PRD channel
+ */
+ if (atomic_xchg(&prd_usage, 1) == 1)
+ return -EBUSY;
+
+ return 0;
+}
+
+/*
+ * opal_prd_mmap - maps firmware-provided ranges into userspace
+ * @file: file structure for the device
+ * @vma: VMA to map the registers into
+ */
+
+static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ size_t addr, size;
+ pgprot_t page_prot;
+ int rc;
+
+ pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n",
+ vma->vm_start, vma->vm_end, vma->vm_pgoff,
+ vma->vm_flags);
+
+ addr = vma->vm_pgoff << PAGE_SHIFT;
+ size = vma->vm_end - vma->vm_start;
+
+ /* ensure we're mapping within one of the allowable ranges */
+ if (!opal_prd_range_is_valid(addr, size))
+ return -EINVAL;
+
+ page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
+ size, vma->vm_page_prot);
+
+ rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size,
+ page_prot);
+
+ return rc;
+}
+
+static bool opal_msg_queue_empty(void)
+{
+ unsigned long flags;
+ bool ret;
+
+ spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+ ret = list_empty(&opal_prd_msg_queue);
+ spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+
+ return ret;
+}
+
+static unsigned int opal_prd_poll(struct file *file,
+ struct poll_table_struct *wait)
+{
+ poll_wait(file, &opal_prd_msg_wait, wait);
+
+ if (!opal_msg_queue_empty())
+ return POLLIN | POLLRDNORM;
+
+ return 0;
+}
+
+static ssize_t opal_prd_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct opal_prd_msg_queue_item *item;
+ unsigned long flags;
+ ssize_t size, err;
+ int rc;
+
+ /* we need at least a header's worth of data */
+ if (count < sizeof(item->msg))
+ return -EINVAL;
+
+ if (*ppos)
+ return -ESPIPE;
+
+ item = NULL;
+
+ for (;;) {
+
+ spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+ if (!list_empty(&opal_prd_msg_queue)) {
+ item = list_first_entry(&opal_prd_msg_queue,
+ struct opal_prd_msg_queue_item, list);
+ list_del(&item->list);
+ }
+ spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+
+ if (item)
+ break;
+
+ if (file->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ rc = wait_event_interruptible(opal_prd_msg_wait,
+ !opal_msg_queue_empty());
+ if (rc)
+ return -EINTR;
+ }
+
+ size = be16_to_cpu(item->msg.size);
+ if (size > count) {
+ err = -EINVAL;
+ goto err_requeue;
+ }
+
+ rc = copy_to_user(buf, &item->msg, size);
+ if (rc) {
+ err = -EFAULT;
+ goto err_requeue;
+ }
+
+ kfree(item);
+
+ return size;
+
+err_requeue:
+ /* eep! re-queue at the head of the list */
+ spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+ list_add(&item->list, &opal_prd_msg_queue);
+ spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+ return err;
+}
+
+static ssize_t opal_prd_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct opal_prd_msg_header hdr;
+ ssize_t size;
+ void *msg;
+ int rc;
+
+ size = sizeof(hdr);
+
+ if (count < size)
+ return -EINVAL;
+
+ /* grab the header */
+ rc = copy_from_user(&hdr, buf, sizeof(hdr));
+ if (rc)
+ return -EFAULT;
+
+ size = be16_to_cpu(hdr.size);
+
+ msg = kmalloc(size, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ rc = copy_from_user(msg, buf, size);
+ if (rc) {
+ size = -EFAULT;
+ goto out_free;
+ }
+
+ rc = opal_prd_msg(msg);
+ if (rc) {
+ pr_warn("write: opal_prd_msg returned %d\n", rc);
+ size = -EIO;
+ }
+
+out_free:
+ kfree(msg);
+
+ return size;
+}
+
+static int opal_prd_release(struct inode *inode, struct file *file)
+{
+ struct opal_prd_msg_header msg;
+
+ msg.size = cpu_to_be16(sizeof(msg));
+ msg.type = OPAL_PRD_MSG_TYPE_FINI;
+
+ opal_prd_msg((struct opal_prd_msg *)&msg);
+
+ atomic_xchg(&prd_usage, 0);
+
+ return 0;
+}
+
+static long opal_prd_ioctl(struct file *file, unsigned int cmd,
+ unsigned long param)
+{
+ struct opal_prd_info info;
+ struct opal_prd_scom scom;
+ int rc = 0;
+
+ switch (cmd) {
+ case OPAL_PRD_GET_INFO:
+ memset(&info, 0, sizeof(info));
+ info.version = OPAL_PRD_KERNEL_VERSION;
+ rc = copy_to_user((void __user *)param, &info, sizeof(info));
+ if (rc)
+ return -EFAULT;
+ break;
+
+ case OPAL_PRD_SCOM_READ:
+ rc = copy_from_user(&scom, (void __user *)param, sizeof(scom));
+ if (rc)
+ return -EFAULT;
+
+ scom.rc = opal_xscom_read(scom.chip, scom.addr,
+ (__be64 *)&scom.data);
+ scom.data = be64_to_cpu(scom.data);
+ pr_devel("ioctl SCOM_READ: chip %llx addr %016llx data %016llx rc %lld\n",
+ scom.chip, scom.addr, scom.data, scom.rc);
+
+ rc = copy_to_user((void __user *)param, &scom, sizeof(scom));
+ if (rc)
+ return -EFAULT;
+ break;
+
+ case OPAL_PRD_SCOM_WRITE:
+ rc = copy_from_user(&scom, (void __user *)param, sizeof(scom));
+ if (rc)
+ return -EFAULT;
+
+ scom.rc = opal_xscom_write(scom.chip, scom.addr, scom.data);
+ pr_devel("ioctl SCOM_WRITE: chip %llx addr %016llx data %016llx rc %lld\n",
+ scom.chip, scom.addr, scom.data, scom.rc);
+
+ rc = copy_to_user((void __user *)param, &scom, sizeof(scom));
+ if (rc)
+ return -EFAULT;
+ break;
+
+ default:
+ rc = -EINVAL;
+ }
+
+ return rc;
+}
+
+static const struct file_operations opal_prd_fops = {
+ .open = opal_prd_open,
+ .mmap = opal_prd_mmap,
+ .poll = opal_prd_poll,
+ .read = opal_prd_read,
+ .write = opal_prd_write,
+ .unlocked_ioctl = opal_prd_ioctl,
+ .release = opal_prd_release,
+ .owner = THIS_MODULE,
+};
+
+static struct miscdevice opal_prd_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "opal-prd",
+ .fops = &opal_prd_fops,
+};
+
+/* opal interface */
+static int opal_prd_msg_notifier(struct notifier_block *nb,
+ unsigned long msg_type, void *_msg)
+{
+ struct opal_prd_msg_queue_item *item;
+ struct opal_prd_msg_header *hdr;
+ struct opal_msg *msg = _msg;
+ int msg_size, item_size;
+ unsigned long flags;
+
+ if (msg_type != OPAL_MSG_PRD)
+ return 0;
+
+ /* Calculate total size of the message and item we need to store. The
+ * 'size' field in the header includes the header itself. */
+ hdr = (void *)msg->params;
+ msg_size = be16_to_cpu(hdr->size);
+ item_size = msg_size + sizeof(*item) - sizeof(item->msg);
+
+ item = kzalloc(item_size, GFP_ATOMIC);
+ if (!item)
+ return -ENOMEM;
+
+ memcpy(&item->msg, msg->params, msg_size);
+
+ spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+ list_add_tail(&item->list, &opal_prd_msg_queue);
+ spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+
+ wake_up_interruptible(&opal_prd_msg_wait);
+
+ return 0;
+}
+
+static struct notifier_block opal_prd_event_nb = {
+ .notifier_call = opal_prd_msg_notifier,
+ .next = NULL,
+ .priority = 0,
+};
+
+static int opal_prd_probe(struct platform_device *pdev)
+{
+ int rc;
+
+ if (!pdev || !pdev->dev.of_node)
+ return -ENODEV;
+
+ /* We should only have one prd driver instance per machine; ensure
+ * that we only get a valid probe on a single OF node.
+ */
+ if (prd_node)
+ return -EBUSY;
+
+ prd_node = pdev->dev.of_node;
+
+ rc = opal_message_notifier_register(OPAL_MSG_PRD, &opal_prd_event_nb);
+ if (rc) {
+ pr_err("Couldn't register event notifier\n");
+ return rc;
+ }
+
+ rc = misc_register(&opal_prd_dev);
+ if (rc) {
+ pr_err("failed to register miscdev\n");
+ opal_message_notifier_unregister(OPAL_MSG_PRD,
+ &opal_prd_event_nb);
+ return rc;
+ }
+
+ return 0;
+}
+
+static int opal_prd_remove(struct platform_device *pdev)
+{
+ misc_deregister(&opal_prd_dev);
+ opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb);
+ return 0;
+}
+
+static const struct of_device_id opal_prd_match[] = {
+ { .compatible = "ibm,opal-prd" },
+ { },
+};
+
+static struct platform_driver opal_prd_driver = {
+ .driver = {
+ .name = "opal-prd",
+ .owner = THIS_MODULE,
+ .of_match_table = opal_prd_match,
+ },
+ .probe = opal_prd_probe,
+ .remove = opal_prd_remove,
+};
+
+module_platform_driver(opal_prd_driver);
+
+MODULE_DEVICE_TABLE(of, opal_prd_match);
+MODULE_DESCRIPTION("PowerNV OPAL runtime diagnostic driver");
+MODULE_LICENSE("GPL");
diff --git a/kernel/arch/powerpc/platforms/powernv/opal-sensor.c b/kernel/arch/powerpc/platforms/powernv/opal-sensor.c
index 655250499..a06059df9 100644
--- a/kernel/arch/powerpc/platforms/powernv/opal-sensor.c
+++ b/kernel/arch/powerpc/platforms/powernv/opal-sensor.c
@@ -77,7 +77,7 @@ out:
}
EXPORT_SYMBOL_GPL(opal_get_sensor_data);
-static __init int opal_sensor_init(void)
+int __init opal_sensor_init(void)
{
struct platform_device *pdev;
struct device_node *sensor;
@@ -93,4 +93,3 @@ static __init int opal_sensor_init(void)
return PTR_ERR_OR_ZERO(pdev);
}
-machine_subsys_initcall(powernv, opal_sensor_init);
diff --git a/kernel/arch/powerpc/platforms/powernv/opal-sysparam.c b/kernel/arch/powerpc/platforms/powernv/opal-sysparam.c
index 9d1acf22a..afe66c576 100644
--- a/kernel/arch/powerpc/platforms/powernv/opal-sysparam.c
+++ b/kernel/arch/powerpc/platforms/powernv/opal-sysparam.c
@@ -55,8 +55,10 @@ static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer)
}
ret = opal_get_param(token, param_id, (u64)buffer, length);
- if (ret != OPAL_ASYNC_COMPLETION)
+ if (ret != OPAL_ASYNC_COMPLETION) {
+ ret = opal_error_code(ret);
goto out_token;
+ }
ret = opal_async_wait_response(token, &msg);
if (ret) {
@@ -65,7 +67,7 @@ static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer)
goto out_token;
}
- ret = be64_to_cpu(msg.params[1]);
+ ret = opal_error_code(be64_to_cpu(msg.params[1]));
out_token:
opal_async_release_token(token);
@@ -89,8 +91,10 @@ static int opal_set_sys_param(u32 param_id, u32 length, void *buffer)
ret = opal_set_param(token, param_id, (u64)buffer, length);
- if (ret != OPAL_ASYNC_COMPLETION)
+ if (ret != OPAL_ASYNC_COMPLETION) {
+ ret = opal_error_code(ret);
goto out_token;
+ }
ret = opal_async_wait_response(token, &msg);
if (ret) {
@@ -99,7 +103,7 @@ static int opal_set_sys_param(u32 param_id, u32 length, void *buffer)
goto out_token;
}
- ret = be64_to_cpu(msg.params[1]);
+ ret = opal_error_code(be64_to_cpu(msg.params[1]));
out_token:
opal_async_release_token(token);
@@ -162,10 +166,20 @@ void __init opal_sys_param_init(void)
goto out;
}
+ /* Some systems do not use sysparams; this is not an error */
+ sysparam = of_find_node_by_path("/ibm,opal/sysparams");
+ if (!sysparam)
+ goto out;
+
+ if (!of_device_is_compatible(sysparam, "ibm,opal-sysparams")) {
+ pr_err("SYSPARAM: Opal sysparam node not compatible\n");
+ goto out_node_put;
+ }
+
sysparam_kobj = kobject_create_and_add("sysparams", opal_kobj);
if (!sysparam_kobj) {
pr_err("SYSPARAM: Failed to create sysparam kobject\n");
- goto out;
+ goto out_node_put;
}
/* Allocate big enough buffer for any get/set transactions */
@@ -176,30 +190,19 @@ void __init opal_sys_param_init(void)
goto out_kobj_put;
}
- sysparam = of_find_node_by_path("/ibm,opal/sysparams");
- if (!sysparam) {
- pr_err("SYSPARAM: Opal sysparam node not found\n");
- goto out_param_buf;
- }
-
- if (!of_device_is_compatible(sysparam, "ibm,opal-sysparams")) {
- pr_err("SYSPARAM: Opal sysparam node not compatible\n");
- goto out_node_put;
- }
-
/* Number of parameters exposed through DT */
count = of_property_count_strings(sysparam, "param-name");
if (count < 0) {
pr_err("SYSPARAM: No string found of property param-name in "
"the node %s\n", sysparam->name);
- goto out_node_put;
+ goto out_param_buf;
}
id = kzalloc(sizeof(*id) * count, GFP_KERNEL);
if (!id) {
pr_err("SYSPARAM: Failed to allocate memory to read parameter "
"id\n");
- goto out_node_put;
+ goto out_param_buf;
}
size = kzalloc(sizeof(*size) * count, GFP_KERNEL);
@@ -293,12 +296,12 @@ out_free_size:
kfree(size);
out_free_id:
kfree(id);
-out_node_put:
- of_node_put(sysparam);
out_param_buf:
kfree(param_data_buf);
out_kobj_put:
kobject_put(sysparam_kobj);
+out_node_put:
+ of_node_put(sysparam);
out:
return;
}
diff --git a/kernel/arch/powerpc/platforms/powernv/opal-wrappers.S b/kernel/arch/powerpc/platforms/powernv/opal-wrappers.S
index a7ade94cd..e45b88a5d 100644
--- a/kernel/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/kernel/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -202,6 +202,7 @@ OPAL_CALL(opal_rtc_read, OPAL_RTC_READ);
OPAL_CALL(opal_rtc_write, OPAL_RTC_WRITE);
OPAL_CALL(opal_cec_power_down, OPAL_CEC_POWER_DOWN);
OPAL_CALL(opal_cec_reboot, OPAL_CEC_REBOOT);
+OPAL_CALL(opal_cec_reboot2, OPAL_CEC_REBOOT2);
OPAL_CALL(opal_read_nvram, OPAL_READ_NVRAM);
OPAL_CALL(opal_write_nvram, OPAL_WRITE_NVRAM);
OPAL_CALL(opal_handle_interrupt, OPAL_HANDLE_INTERRUPT);
@@ -249,6 +250,7 @@ OPAL_CALL(opal_pci_reinit, OPAL_PCI_REINIT);
OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR);
OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS);
OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS);
+OPAL_CALL(opal_get_dpo_status, OPAL_GET_DPO_STATUS);
OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED);
OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR);
OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL);
@@ -283,6 +285,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
+OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE);
OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG);
OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
@@ -295,3 +298,7 @@ OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST);
OPAL_CALL(opal_flash_read, OPAL_FLASH_READ);
OPAL_CALL(opal_flash_write, OPAL_FLASH_WRITE);
OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE);
+OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG);
+OPAL_CALL(opal_leds_get_ind, OPAL_LEDS_GET_INDICATOR);
+OPAL_CALL(opal_leds_set_ind, OPAL_LEDS_SET_INDICATOR);
+OPAL_CALL(opal_console_flush, OPAL_CONSOLE_FLUSH);
diff --git a/kernel/arch/powerpc/platforms/powernv/opal.c b/kernel/arch/powerpc/platforms/powernv/opal.c
index 2241565b0..ae29eaf85 100644
--- a/kernel/arch/powerpc/platforms/powernv/opal.c
+++ b/kernel/arch/powerpc/platforms/powernv/opal.c
@@ -53,13 +53,7 @@ static int mc_recoverable_range_len;
struct device_node *opal_node;
static DEFINE_SPINLOCK(opal_write_lock);
-static unsigned int *opal_irqs;
-static unsigned int opal_irq_count;
-static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
-static DEFINE_SPINLOCK(opal_notifier_lock);
-static uint64_t last_notified_mask = 0x0ul;
-static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
static uint32_t opal_heartbeat;
static void opal_reinit_cores(void)
@@ -225,82 +219,6 @@ static int __init opal_register_exception_handlers(void)
}
machine_early_initcall(powernv, opal_register_exception_handlers);
-int opal_notifier_register(struct notifier_block *nb)
-{
- if (!nb) {
- pr_warning("%s: Invalid argument (%p)\n",
- __func__, nb);
- return -EINVAL;
- }
-
- atomic_notifier_chain_register(&opal_notifier_head, nb);
- return 0;
-}
-EXPORT_SYMBOL_GPL(opal_notifier_register);
-
-int opal_notifier_unregister(struct notifier_block *nb)
-{
- if (!nb) {
- pr_warning("%s: Invalid argument (%p)\n",
- __func__, nb);
- return -EINVAL;
- }
-
- atomic_notifier_chain_unregister(&opal_notifier_head, nb);
- return 0;
-}
-EXPORT_SYMBOL_GPL(opal_notifier_unregister);
-
-static void opal_do_notifier(uint64_t events)
-{
- unsigned long flags;
- uint64_t changed_mask;
-
- if (atomic_read(&opal_notifier_hold))
- return;
-
- spin_lock_irqsave(&opal_notifier_lock, flags);
- changed_mask = last_notified_mask ^ events;
- last_notified_mask = events;
- spin_unlock_irqrestore(&opal_notifier_lock, flags);
-
- /*
- * We feed with the event bits and changed bits for
- * enough information to the callback.
- */
- atomic_notifier_call_chain(&opal_notifier_head,
- events, (void *)changed_mask);
-}
-
-void opal_notifier_update_evt(uint64_t evt_mask,
- uint64_t evt_val)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&opal_notifier_lock, flags);
- last_notified_mask &= ~evt_mask;
- last_notified_mask |= evt_val;
- spin_unlock_irqrestore(&opal_notifier_lock, flags);
-}
-
-void opal_notifier_enable(void)
-{
- int64_t rc;
- __be64 evt = 0;
-
- atomic_set(&opal_notifier_hold, 0);
-
- /* Process pending events */
- rc = opal_poll_events(&evt);
- if (rc == OPAL_SUCCESS && evt)
- opal_do_notifier(be64_to_cpu(evt));
-}
-
-void opal_notifier_disable(void)
-{
- atomic_set(&opal_notifier_hold, 1);
-}
-
/*
* Opal message notifier based on message type. Allow subscribers to get
* notified for specific messgae type.
@@ -317,6 +235,7 @@ int opal_message_notifier_register(enum opal_msg_type msg_type,
return atomic_notifier_chain_register(
&opal_msg_notifier_head[msg_type], nb);
}
+EXPORT_SYMBOL_GPL(opal_message_notifier_register);
int opal_message_notifier_unregister(enum opal_msg_type msg_type,
struct notifier_block *nb)
@@ -324,6 +243,7 @@ int opal_message_notifier_unregister(enum opal_msg_type msg_type,
return atomic_notifier_chain_unregister(
&opal_msg_notifier_head[msg_type], nb);
}
+EXPORT_SYMBOL_GPL(opal_message_notifier_unregister);
static void opal_message_do_notify(uint32_t msg_type, void *msg)
{
@@ -358,42 +278,42 @@ static void opal_handle_message(void)
/* Sanity check */
if (type >= OPAL_MSG_TYPE_MAX) {
- pr_warning("%s: Unknown message type: %u\n", __func__, type);
+ pr_warn_once("%s: Unknown message type: %u\n", __func__, type);
return;
}
opal_message_do_notify(type, (void *)&msg);
}
-static int opal_message_notify(struct notifier_block *nb,
- unsigned long events, void *change)
+static irqreturn_t opal_message_notify(int irq, void *data)
{
- if (events & OPAL_EVENT_MSG_PENDING)
- opal_handle_message();
- return 0;
+ opal_handle_message();
+ return IRQ_HANDLED;
}
-static struct notifier_block opal_message_nb = {
- .notifier_call = opal_message_notify,
- .next = NULL,
- .priority = 0,
-};
-
static int __init opal_message_init(void)
{
- int ret, i;
+ int ret, i, irq;
for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
- ret = opal_notifier_register(&opal_message_nb);
+ irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING));
+ if (!irq) {
+ pr_err("%s: Can't register OPAL event irq (%d)\n",
+ __func__, irq);
+ return irq;
+ }
+
+ ret = request_irq(irq, opal_message_notify,
+ IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL);
if (ret) {
- pr_err("%s: Can't register OPAL event notifier (%d)\n",
+ pr_err("%s: Can't request OPAL event irq (%d)\n",
__func__, ret);
return ret;
}
+
return 0;
}
-machine_early_initcall(powernv, opal_message_init);
int opal_get_chars(uint32_t vtermno, char *buf, int count)
{
@@ -521,6 +441,7 @@ static int opal_recover_mce(struct pt_regs *regs,
int opal_machine_check(struct pt_regs *regs)
{
struct machine_check_event evt;
+ int ret;
if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
return 0;
@@ -535,6 +456,43 @@ int opal_machine_check(struct pt_regs *regs)
if (opal_recover_mce(regs, &evt))
return 1;
+
+ /*
+ * Unrecovered machine check, we are heading to panic path.
+ *
+ * We may have hit this MCE in very early stage of kernel
+ * initialization even before opal-prd has started running. If
+ * this is the case then this MCE error may go un-noticed or
+ * un-analyzed if we go down panic path. We need to inform
+ * BMC/OCC about this error so that they can collect relevant
+ * data for error analysis before rebooting.
+ * Use opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR) to do so.
+ * This function may not return on BMC based system.
+ */
+ ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR,
+ "Unrecoverable Machine Check exception");
+ if (ret == OPAL_UNSUPPORTED) {
+ pr_emerg("Reboot type %d not supported\n",
+ OPAL_REBOOT_PLATFORM_ERROR);
+ }
+
+ /*
+ * We reached here. There can be three possibilities:
+ * 1. We are running on a firmware level that do not support
+ * opal_cec_reboot2()
+ * 2. We are running on a firmware level that do not support
+ * OPAL_REBOOT_PLATFORM_ERROR reboot type.
+ * 3. We are running on FSP based system that does not need opal
+ * to trigger checkstop explicitly for error analysis. The FSP
+ * PRD component would have already got notified about this
+ * error through other channels.
+ *
+ * If hardware marked this as an unrecoverable MCE, we are
+ * going to panic anyway. Even if it didn't, it's not safe to
+ * continue at this point, so we should explicitly panic.
+ */
+
+ panic("PowerNV Unrecovered Machine Check");
return 0;
}
@@ -573,7 +531,7 @@ int opal_handle_hmi_exception(struct pt_regs *regs)
local_paca->hmi_event_available = 0;
rc = opal_poll_events(&evt);
if (rc == OPAL_SUCCESS && evt)
- opal_do_notifier(be64_to_cpu(evt));
+ opal_handle_events(be64_to_cpu(evt));
return 1;
}
@@ -610,17 +568,6 @@ out:
return !!recover_addr;
}
-static irqreturn_t opal_interrupt(int irq, void *data)
-{
- __be64 events;
-
- opal_handle_interrupt(virq_to_hw(irq), &events);
-
- opal_do_notifier(be64_to_cpu(events));
-
- return IRQ_HANDLED;
-}
-
static int opal_sysfs_init(void)
{
opal_kobj = kobject_create_and_add("opal", firmware_kobj);
@@ -693,21 +640,13 @@ static void __init opal_dump_region_init(void)
"rc = %d\n", rc);
}
-static void opal_flash_init(struct device_node *opal_node)
+static void opal_pdev_init(struct device_node *opal_node,
+ const char *compatible)
{
struct device_node *np;
for_each_child_of_node(opal_node, np)
- if (of_device_is_compatible(np, "ibm,opal-flash"))
- of_platform_device_create(np, NULL, NULL);
-}
-
-static void opal_ipmi_init(struct device_node *opal_node)
-{
- struct device_node *np;
-
- for_each_child_of_node(opal_node, np)
- if (of_device_is_compatible(np, "ibm,opal-ipmi"))
+ if (of_device_is_compatible(np, compatible))
of_platform_device_create(np, NULL, NULL);
}
@@ -719,52 +658,15 @@ static void opal_i2c_create_devs(void)
of_platform_device_create(np, NULL, NULL);
}
-static void __init opal_irq_init(struct device_node *dn)
-{
- const __be32 *irqs;
- int i, irqlen;
-
- /* Get interrupt property */
- irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
- opal_irq_count = irqs ? (irqlen / 4) : 0;
- pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count);
- if (!opal_irq_count)
- return;
-
- /* Install interrupt handlers */
- opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL);
- for (i = 0; irqs && i < opal_irq_count; i++, irqs++) {
- unsigned int irq, virq;
- int rc;
-
- /* Get hardware and virtual IRQ */
- irq = be32_to_cpup(irqs);
- virq = irq_create_mapping(NULL, irq);
- if (virq == NO_IRQ) {
- pr_warn("Failed to map irq 0x%x\n", irq);
- continue;
- }
-
- /* Install interrupt handler */
- rc = request_irq(virq, opal_interrupt, 0, "opal", NULL);
- if (rc) {
- irq_dispose_mapping(virq);
- pr_warn("Error %d requesting irq %d (0x%x)\n",
- rc, virq, irq);
- continue;
- }
-
- /* Cache IRQ */
- opal_irqs[i] = virq;
- }
-}
-
static int kopald(void *unused)
{
+ __be64 events;
+
set_freezable();
do {
try_to_freeze();
- opal_poll_events(NULL);
+ opal_poll_events(&events);
+ opal_handle_events(be64_to_cpu(events));
msleep_interruptible(opal_heartbeat);
} while (!kthread_should_stop());
@@ -784,7 +686,7 @@ static void opal_init_heartbeat(void)
static int __init opal_init(void)
{
- struct device_node *np, *consoles;
+ struct device_node *np, *consoles, *leds;
int rc;
opal_node = of_find_node_by_path("/ibm,opal");
@@ -807,14 +709,30 @@ static int __init opal_init(void)
of_node_put(consoles);
}
+ /* Initialise OPAL messaging system */
+ opal_message_init();
+
+ /* Initialise OPAL asynchronous completion interface */
+ opal_async_comp_init();
+
+ /* Initialise OPAL sensor interface */
+ opal_sensor_init();
+
+ /* Initialise OPAL hypervisor maintainence interrupt handling */
+ opal_hmi_handler_init();
+
/* Create i2c platform devices */
opal_i2c_create_devs();
/* Setup a heatbeat thread if requested by OPAL */
opal_init_heartbeat();
- /* Find all OPAL interrupts and request them */
- opal_irq_init(opal_node);
+ /* Create leds platform devices */
+ leds = of_find_node_by_path("/ibm,opal/leds");
+ if (leds) {
+ of_platform_device_create(leds, "opal_leds", NULL);
+ of_node_put(leds);
+ }
/* Create "opal" kobject under /sys/firmware */
rc = opal_sysfs_init();
@@ -835,10 +753,13 @@ static int __init opal_init(void)
opal_msglog_init();
}
- /* Initialize OPAL IPMI backend */
- opal_ipmi_init(opal_node);
+ /* Initialize platform devices: IPMI backend, PRD & flash interface */
+ opal_pdev_init(opal_node, "ibm,opal-ipmi");
+ opal_pdev_init(opal_node, "ibm,opal-flash");
+ opal_pdev_init(opal_node, "ibm,opal-prd");
- opal_flash_init(opal_node);
+ /* Initialise OPAL kmsg dumper for flushing console on panic */
+ opal_kmsg_init();
return 0;
}
@@ -846,15 +767,9 @@ machine_subsys_initcall(powernv, opal_init);
void opal_shutdown(void)
{
- unsigned int i;
long rc = OPAL_BUSY;
- /* First free interrupts, which will also mask them */
- for (i = 0; i < opal_irq_count; i++) {
- if (opal_irqs[i])
- free_irq(opal_irqs[i], NULL);
- opal_irqs[i] = 0;
- }
+ opal_event_shutdown();
/*
* Then sync with OPAL which ensure anything that can
@@ -876,11 +791,14 @@ void opal_shutdown(void)
/* Export this so that test modules can use it */
EXPORT_SYMBOL_GPL(opal_invalid_call);
+EXPORT_SYMBOL_GPL(opal_xscom_read);
+EXPORT_SYMBOL_GPL(opal_xscom_write);
EXPORT_SYMBOL_GPL(opal_ipmi_send);
EXPORT_SYMBOL_GPL(opal_ipmi_recv);
EXPORT_SYMBOL_GPL(opal_flash_read);
EXPORT_SYMBOL_GPL(opal_flash_write);
EXPORT_SYMBOL_GPL(opal_flash_erase);
+EXPORT_SYMBOL_GPL(opal_prd_msg);
/* Convert a region of vmalloc memory to an opal sg list */
struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
@@ -954,6 +872,7 @@ int opal_error_code(int rc)
case OPAL_ASYNC_COMPLETION: return -EINPROGRESS;
case OPAL_BUSY_EVENT: return -EBUSY;
case OPAL_NO_MEM: return -ENOMEM;
+ case OPAL_PERMISSION: return -EPERM;
case OPAL_UNSUPPORTED: return -EIO;
case OPAL_HARDWARE: return -EIO;
@@ -970,3 +889,6 @@ EXPORT_SYMBOL_GPL(opal_rtc_write);
EXPORT_SYMBOL_GPL(opal_tpo_read);
EXPORT_SYMBOL_GPL(opal_tpo_write);
EXPORT_SYMBOL_GPL(opal_i2c_request);
+/* Export these symbols for PowerNV LED class driver */
+EXPORT_SYMBOL_GPL(opal_leds_get_ind);
+EXPORT_SYMBOL_GPL(opal_leds_set_ind);
diff --git a/kernel/arch/powerpc/platforms/powernv/pci-ioda.c b/kernel/arch/powerpc/platforms/powernv/pci-ioda.c
index f8bc950ef..e40d07146 100644
--- a/kernel/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/kernel/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -23,6 +23,9 @@
#include <linux/io.h>
#include <linux/msi.h>
#include <linux/memblock.h>
+#include <linux/iommu.h>
+#include <linux/rculist.h>
+#include <linux/sizes.h>
#include <asm/sections.h>
#include <asm/io.h>
@@ -38,8 +41,9 @@
#include <asm/debug.h>
#include <asm/firmware.h>
#include <asm/pnv-pci.h>
+#include <asm/mmzone.h>
-#include <misc/cxl.h>
+#include <misc/cxl-base.h>
#include "powernv.h"
#include "pci.h"
@@ -47,6 +51,11 @@
/* 256M DMA window, 4K TCE pages, 8 bytes TCE */
#define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8)
+#define POWERNV_IOMMU_DEFAULT_LEVELS 1
+#define POWERNV_IOMMU_MAX_LEVELS 5
+
+static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
+
static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...)
{
@@ -131,11 +140,9 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
return;
}
- if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) {
- pr_warn("%s: PE %d was assigned on PHB#%x\n",
- __func__, pe_no, phb->hose->global_number);
- return;
- }
+ if (test_and_set_bit(pe_no, phb->ioda.pe_alloc))
+ pr_debug("%s: PE %d was reserved on PHB#%x\n",
+ __func__, pe_no, phb->hose->global_number);
phb->ioda.pe_array[pe_no].phb = phb;
phb->ioda.pe_array[pe_no].pe_number = pe_no;
@@ -222,61 +229,60 @@ fail:
return -EIO;
}
-static void pnv_ioda2_reserve_m64_pe(struct pnv_phb *phb)
+static void pnv_ioda2_reserve_dev_m64_pe(struct pci_dev *pdev,
+ unsigned long *pe_bitmap)
{
- resource_size_t sgsz = phb->ioda.m64_segsize;
- struct pci_dev *pdev;
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ struct pnv_phb *phb = hose->private_data;
struct resource *r;
- int base, step, i;
-
- /*
- * Root bus always has full M64 range and root port has
- * M64 range used in reality. So we're checking root port
- * instead of root bus.
- */
- list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) {
- for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
- r = &pdev->resource[PCI_BRIDGE_RESOURCES + i];
- if (!r->parent ||
- !pnv_pci_is_mem_pref_64(r->flags))
- continue;
+ resource_size_t base, sgsz, start, end;
+ int segno, i;
+
+ base = phb->ioda.m64_base;
+ sgsz = phb->ioda.m64_segsize;
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+ r = &pdev->resource[i];
+ if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags))
+ continue;
- base = (r->start - phb->ioda.m64_base) / sgsz;
- for (step = 0; step < resource_size(r) / sgsz; step++)
- pnv_ioda_reserve_pe(phb, base + step);
+ start = _ALIGN_DOWN(r->start - base, sgsz);
+ end = _ALIGN_UP(r->end - base, sgsz);
+ for (segno = start / sgsz; segno < end / sgsz; segno++) {
+ if (pe_bitmap)
+ set_bit(segno, pe_bitmap);
+ else
+ pnv_ioda_reserve_pe(phb, segno);
}
}
}
-static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb,
- struct pci_bus *bus, int all)
+static void pnv_ioda2_reserve_m64_pe(struct pci_bus *bus,
+ unsigned long *pe_bitmap,
+ bool all)
{
- resource_size_t segsz = phb->ioda.m64_segsize;
struct pci_dev *pdev;
- struct resource *r;
+
+ list_for_each_entry(pdev, &bus->devices, bus_list) {
+ pnv_ioda2_reserve_dev_m64_pe(pdev, pe_bitmap);
+
+ if (all && pdev->subordinate)
+ pnv_ioda2_reserve_m64_pe(pdev->subordinate,
+ pe_bitmap, all);
+ }
+}
+
+static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ struct pnv_phb *phb = hose->private_data;
struct pnv_ioda_pe *master_pe, *pe;
unsigned long size, *pe_alloc;
- bool found;
- int start, i, j;
+ int i;
/* Root bus shouldn't use M64 */
if (pci_is_root_bus(bus))
return IODA_INVALID_PE;
- /* We support only one M64 window on each bus */
- found = false;
- pci_bus_for_each_resource(bus, r, i) {
- if (r && r->parent &&
- pnv_pci_is_mem_pref_64(r->flags)) {
- found = true;
- break;
- }
- }
-
- /* No M64 window found ? */
- if (!found)
- return IODA_INVALID_PE;
-
/* Allocate bitmap */
size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
pe_alloc = kzalloc(size, GFP_KERNEL);
@@ -286,35 +292,8 @@ static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb,
return IODA_INVALID_PE;
}
- /*
- * Figure out reserved PE numbers by the PE
- * the its child PEs.
- */
- start = (r->start - phb->ioda.m64_base) / segsz;
- for (i = 0; i < resource_size(r) / segsz; i++)
- set_bit(start + i, pe_alloc);
-
- if (all)
- goto done;
-
- /*
- * If the PE doesn't cover all subordinate buses,
- * we need subtract from reserved PEs for children.
- */
- list_for_each_entry(pdev, &bus->devices, bus_list) {
- if (!pdev->subordinate)
- continue;
-
- pci_bus_for_each_resource(pdev->subordinate, r, i) {
- if (!r || !r->parent ||
- !pnv_pci_is_mem_pref_64(r->flags))
- continue;
-
- start = (r->start - phb->ioda.m64_base) / segsz;
- for (j = 0; j < resource_size(r) / segsz ; j++)
- clear_bit(start + j, pe_alloc);
- }
- }
+ /* Figure out reserved PE numbers by the PE */
+ pnv_ioda2_reserve_m64_pe(bus, pe_alloc, all);
/*
* the current bus might not own M64 window and that's all
@@ -330,7 +309,6 @@ static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb,
* Figure out the master PE and put all slave PEs to master
* PE's list to form compound PE.
*/
-done:
master_pe = NULL;
i = -1;
while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) <
@@ -644,7 +622,7 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
pdev = pe->pdev->bus->self;
#ifdef CONFIG_PCI_IOV
else if (pe->flags & PNV_IODA_PE_VF)
- pdev = pe->parent_dev->bus->self;
+ pdev = pe->parent_dev;
#endif /* CONFIG_PCI_IOV */
while (pdev) {
struct pci_dn *pdn = pci_get_pdn(pdev);
@@ -723,7 +701,7 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
parent = parent->bus->self;
}
- opal_pci_eeh_freeze_set(phb->opal_id, pe->pe_number,
+ opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
/* Disassociate PE in PELT */
@@ -937,8 +915,9 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
res2 = *res;
res->start += size * offset;
- dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (enabling %d VFs shifted by %d)\n",
- i, &res2, res, num_vfs, offset);
+ dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n",
+ i, &res2, res, (offset > 0) ? "En" : "Dis",
+ num_vfs, offset);
pci_update_resource(dev, i + PCI_IOV_RESOURCES);
}
return 0;
@@ -1041,7 +1020,7 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
* subordinate PCI devices and buses. The second type of PE is normally
* orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
*/
-static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
+static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
{
struct pci_controller *hose = pci_bus_to_host(bus);
struct pnv_phb *phb = hose->private_data;
@@ -1050,7 +1029,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
/* Check if PE is determined by M64 */
if (phb->pick_m64_pe)
- pe_num = phb->pick_m64_pe(phb, bus, all);
+ pe_num = phb->pick_m64_pe(bus, all);
/* The PE number isn't pinned by M64 */
if (pe_num == IODA_INVALID_PE)
@@ -1086,10 +1065,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
return;
}
- pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
- GFP_KERNEL, hose->node);
- pe->tce32_table->data = pe;
-
/* Associate it with all child devices */
pnv_ioda_setup_same_PE(bus, pe);
@@ -1112,12 +1087,12 @@ static void pnv_ioda_setup_PEs(struct pci_bus *bus)
{
struct pci_dev *dev;
- pnv_ioda_setup_bus_PE(bus, 0);
+ pnv_ioda_setup_bus_PE(bus, false);
list_for_each_entry(dev, &bus->devices, bus_list) {
if (dev->subordinate) {
if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE)
- pnv_ioda_setup_bus_PE(dev->subordinate, 1);
+ pnv_ioda_setup_bus_PE(dev->subordinate, true);
else
pnv_ioda_setup_PEs(dev->subordinate);
}
@@ -1142,7 +1117,7 @@ static void pnv_pci_ioda_setup_PEs(void)
/* M64 layout might affect PE allocation */
if (phb->reserve_m64_pe)
- phb->reserve_m64_pe(phb);
+ phb->reserve_m64_pe(hose->bus, NULL, true);
pnv_ioda_setup_PEs(hose->bus);
}
@@ -1283,36 +1258,27 @@ m64_failed:
return -EBUSY;
}
+static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
+ int num);
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
+
static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe)
{
- struct pci_bus *bus;
- struct pci_controller *hose;
- struct pnv_phb *phb;
struct iommu_table *tbl;
- unsigned long addr;
int64_t rc;
- bus = dev->bus;
- hose = pci_bus_to_host(bus);
- phb = hose->private_data;
- tbl = pe->tce32_table;
- addr = tbl->it_base;
-
- opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
- pe->pe_number << 1, 1, __pa(addr),
- 0, 0x1000);
-
- rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
- pe->pe_number,
- (pe->pe_number << 1) + 1,
- pe->tce_bypass_base,
- 0);
+ tbl = pe->table_group.tables[0];
+ rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
if (rc)
pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
+ pnv_pci_ioda2_set_bypass(pe, false);
+ if (pe->table_group.group) {
+ iommu_group_put(pe->table_group.group);
+ BUG_ON(pe->table_group.group);
+ }
+ pnv_pci_ioda2_table_free_pages(tbl);
iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
- free_pages(addr, get_order(TCE32_TABLE_SIZE));
- pe->tce32_table = NULL;
}
static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
@@ -1460,10 +1426,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
continue;
}
- pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
- GFP_KERNEL, hose->node);
- pe->tce32_table->data = pe;
-
/* Put PE to the list */
mutex_lock(&phb->ioda.pe_list_mutex);
list_add_tail(&pe->list, &phb->ioda.pe_list);
@@ -1598,12 +1560,20 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
pe = &phb->ioda.pe_array[pdn->pe_number];
WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
- set_iommu_table_base_and_group(&pdev->dev, pe->tce32_table);
+ set_dma_offset(&pdev->dev, pe->tce_bypass_base);
+ set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
+ /*
+ * Note: iommu_add_device() will fail here as
+ * for physical PE: the device is already added by now;
+ * for virtual PE: sysfs entries are not ready yet and
+ * tce_iommu_bus_notifier will add the device to a group later.
+ */
}
-static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
- struct pci_dev *pdev, u64 dma_mask)
+static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
{
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = pci_get_pdn(pdev);
struct pnv_ioda_pe *pe;
uint64_t top;
@@ -1621,19 +1591,18 @@ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
if (bypass) {
dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
set_dma_ops(&pdev->dev, &dma_direct_ops);
- set_dma_offset(&pdev->dev, pe->tce_bypass_base);
} else {
dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
set_dma_ops(&pdev->dev, &dma_iommu_ops);
- set_iommu_table_base(&pdev->dev, pe->tce32_table);
}
*pdev->dev.dma_mask = dma_mask;
return 0;
}
-static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb,
- struct pci_dev *pdev)
+static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev)
{
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ struct pnv_phb *phb = hose->private_data;
struct pci_dn *pdn = pci_get_pdn(pdev);
struct pnv_ioda_pe *pe;
u64 end, mask;
@@ -1654,36 +1623,37 @@ static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb,
}
static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
- struct pci_bus *bus,
- bool add_to_iommu_group)
+ struct pci_bus *bus)
{
struct pci_dev *dev;
list_for_each_entry(dev, &bus->devices, bus_list) {
- if (add_to_iommu_group)
- set_iommu_table_base_and_group(&dev->dev,
- pe->tce32_table);
- else
- set_iommu_table_base(&dev->dev, pe->tce32_table);
+ set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
+ set_dma_offset(&dev->dev, pe->tce_bypass_base);
+ iommu_add_device(&dev->dev);
- if (dev->subordinate)
- pnv_ioda_setup_bus_dma(pe, dev->subordinate,
- add_to_iommu_group);
+ if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
+ pnv_ioda_setup_bus_dma(pe, dev->subordinate);
}
}
-static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
- struct iommu_table *tbl,
- __be64 *startp, __be64 *endp, bool rm)
+static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
+ unsigned long index, unsigned long npages, bool rm)
{
+ struct iommu_table_group_link *tgl = list_first_entry_or_null(
+ &tbl->it_group_list, struct iommu_table_group_link,
+ next);
+ struct pnv_ioda_pe *pe = container_of(tgl->table_group,
+ struct pnv_ioda_pe, table_group);
__be64 __iomem *invalidate = rm ?
- (__be64 __iomem *)pe->tce_inval_reg_phys :
- (__be64 __iomem *)tbl->it_index;
+ (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
+ pe->phb->ioda.tce_inval_reg;
unsigned long start, end, inc;
const unsigned shift = tbl->it_page_shift;
- start = __pa(startp);
- end = __pa(endp);
+ start = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset);
+ end = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset +
+ npages - 1);
/* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
if (tbl->it_busno) {
@@ -1719,26 +1689,79 @@ static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
*/
}
-static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
- struct iommu_table *tbl,
- __be64 *startp, __be64 *endp, bool rm)
+static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
+ long npages, unsigned long uaddr,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
+ attrs);
+
+ if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE))
+ pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false);
+
+ return ret;
+}
+
+#ifdef CONFIG_IOMMU_API
+static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction)
+{
+ long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+
+ if (!ret && (tbl->it_type &
+ (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE)))
+ pnv_pci_ioda1_tce_invalidate(tbl, index, 1, false);
+
+ return ret;
+}
+#endif
+
+static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
+ long npages)
+{
+ pnv_tce_free(tbl, index, npages);
+
+ if (tbl->it_type & TCE_PCI_SWINV_FREE)
+ pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false);
+}
+
+static struct iommu_table_ops pnv_ioda1_iommu_ops = {
+ .set = pnv_ioda1_tce_build,
+#ifdef CONFIG_IOMMU_API
+ .exchange = pnv_ioda1_tce_xchg,
+#endif
+ .clear = pnv_ioda1_tce_free,
+ .get = pnv_tce_get,
+};
+
+static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
+{
+ /* 01xb - invalidate TCEs that match the specified PE# */
+ unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF);
+ struct pnv_phb *phb = pe->phb;
+
+ if (!phb->ioda.tce_inval_reg)
+ return;
+
+ mb(); /* Ensure above stores are visible */
+ __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg);
+}
+
+static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm,
+ __be64 __iomem *invalidate, unsigned shift,
+ unsigned long index, unsigned long npages)
{
unsigned long start, end, inc;
- __be64 __iomem *invalidate = rm ?
- (__be64 __iomem *)pe->tce_inval_reg_phys :
- (__be64 __iomem *)tbl->it_index;
- const unsigned shift = tbl->it_page_shift;
/* We'll invalidate DMA address in PE scope */
start = 0x2ull << 60;
- start |= (pe->pe_number & 0xFF);
+ start |= (pe_number & 0xFF);
end = start;
/* Figure out the start, end and step */
- inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
- start |= (inc << shift);
- inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
- end |= (inc << shift);
+ start |= (index << shift);
+ end |= ((index + npages - 1) << shift);
inc = (0x1ull << shift);
mb();
@@ -1751,25 +1774,83 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
}
}
-void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
- __be64 *startp, __be64 *endp, bool rm)
+static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
+ unsigned long index, unsigned long npages, bool rm)
{
- struct pnv_ioda_pe *pe = tbl->data;
- struct pnv_phb *phb = pe->phb;
+ struct iommu_table_group_link *tgl;
- if (phb->type == PNV_PHB_IODA1)
- pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm);
- else
- pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm);
+ list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
+ struct pnv_ioda_pe *pe = container_of(tgl->table_group,
+ struct pnv_ioda_pe, table_group);
+ __be64 __iomem *invalidate = rm ?
+ (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
+ pe->phb->ioda.tce_inval_reg;
+
+ pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm,
+ invalidate, tbl->it_page_shift,
+ index, npages);
+ }
}
+static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
+ long npages, unsigned long uaddr,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
+ attrs);
+
+ if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE))
+ pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
+
+ return ret;
+}
+
+#ifdef CONFIG_IOMMU_API
+static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction)
+{
+ long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+
+ if (!ret && (tbl->it_type &
+ (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE)))
+ pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false);
+
+ return ret;
+}
+#endif
+
+static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
+ long npages)
+{
+ pnv_tce_free(tbl, index, npages);
+
+ if (tbl->it_type & TCE_PCI_SWINV_FREE)
+ pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
+}
+
+static void pnv_ioda2_table_free(struct iommu_table *tbl)
+{
+ pnv_pci_ioda2_table_free_pages(tbl);
+ iommu_free_table(tbl, "pnv");
+}
+
+static struct iommu_table_ops pnv_ioda2_iommu_ops = {
+ .set = pnv_ioda2_tce_build,
+#ifdef CONFIG_IOMMU_API
+ .exchange = pnv_ioda2_tce_xchg,
+#endif
+ .clear = pnv_ioda2_tce_free,
+ .get = pnv_tce_get,
+ .free = pnv_ioda2_table_free,
+};
+
static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
struct pnv_ioda_pe *pe, unsigned int base,
unsigned int segs)
{
struct page *tce_mem = NULL;
- const __be64 *swinvp;
struct iommu_table *tbl;
unsigned int i;
int64_t rc;
@@ -1783,6 +1864,11 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
if (WARN_ON(pe->tce32_seg >= 0))
return;
+ tbl = pnv_pci_table_alloc(phb->hose->node);
+ iommu_register_group(&pe->table_group, phb->hose->global_number,
+ pe->pe_number);
+ pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
+
/* Grab a 32-bit TCE table */
pe->tce32_seg = base;
pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
@@ -1817,39 +1903,30 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
}
/* Setup linux iommu table */
- tbl = pe->tce32_table;
pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
base << 28, IOMMU_PAGE_SHIFT_4K);
/* OPAL variant of P7IOC SW invalidated TCEs */
- swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
- if (swinvp) {
- /* We need a couple more fields -- an address and a data
- * to or. Since the bus is only printed out on table free
- * errors, and on the first pass the data will be a relative
- * bus number, print that out instead.
- */
- pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
- tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
- 8);
+ if (phb->ioda.tce_inval_reg)
tbl->it_type |= (TCE_PCI_SWINV_CREATE |
TCE_PCI_SWINV_FREE |
TCE_PCI_SWINV_PAIR);
- }
+
+ tbl->it_ops = &pnv_ioda1_iommu_ops;
+ pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift;
+ pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
iommu_init_table(tbl, phb->hose->node);
if (pe->flags & PNV_IODA_PE_DEV) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
- } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
- } else if (pe->flags & PNV_IODA_PE_VF) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- }
+ /*
+ * Setting table base here only for carrying iommu_group
+ * further down to let iommu_add_device() do the job.
+ * pnv_pci_ioda_dma_dev_setup will override it later anyway.
+ */
+ set_iommu_table_base(&pe->pdev->dev, tbl);
+ iommu_add_device(&pe->pdev->dev);
+ } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
+ pnv_ioda_setup_bus_dma(pe, pe->pbus);
return;
fail:
@@ -1858,11 +1935,53 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
pe->tce32_seg = -1;
if (tce_mem)
__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
+ if (tbl) {
+ pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
+ iommu_free_table(tbl, "pnv");
+ }
+}
+
+static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
+ int num, struct iommu_table *tbl)
+{
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+ struct pnv_phb *phb = pe->phb;
+ int64_t rc;
+ const unsigned long size = tbl->it_indirect_levels ?
+ tbl->it_level_size : tbl->it_size;
+ const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
+ const __u64 win_size = tbl->it_size << tbl->it_page_shift;
+
+ pe_info(pe, "Setting up window#%d %llx..%llx pg=%x\n", num,
+ start_addr, start_addr + win_size - 1,
+ IOMMU_PAGE_SIZE(tbl));
+
+ /*
+ * Map TCE table through TVT. The TVE index is the PE number
+ * shifted by 1 bit for 32-bits DMA space.
+ */
+ rc = opal_pci_map_pe_dma_window(phb->opal_id,
+ pe->pe_number,
+ (pe->pe_number << 1) + num,
+ tbl->it_indirect_levels + 1,
+ __pa(tbl->it_base),
+ size << 3,
+ IOMMU_PAGE_SIZE(tbl));
+ if (rc) {
+ pe_err(pe, "Failed to configure TCE table, err %ld\n", rc);
+ return rc;
+ }
+
+ pnv_pci_link_table_and_group(phb->hose->node, num,
+ tbl, &pe->table_group);
+ pnv_pci_ioda2_tce_invalidate_entire(pe);
+
+ return 0;
}
-static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
{
- struct pnv_ioda_pe *pe = tbl->data;
uint16_t window_id = (pe->pe_number << 1 ) + 1;
int64_t rc;
@@ -1882,17 +2001,6 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
window_id,
pe->tce_bypass_base,
0);
-
- /*
- * EEH needs the mapping between IOMMU table and group
- * of those VFIO/KVM pass-through devices. We can postpone
- * resetting DMA ops until the DMA mask is configured in
- * host side.
- */
- if (pe->pdev)
- set_iommu_table_base(&pe->pdev->dev, tbl);
- else
- pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
}
if (rc)
pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
@@ -1900,106 +2008,378 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
pe->tce_bypass_enabled = enable;
}
-static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
- struct pnv_ioda_pe *pe)
+static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ __u32 page_shift, __u64 window_size, __u32 levels,
+ struct iommu_table *tbl);
+
+static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
+ int num, __u32 page_shift, __u64 window_size, __u32 levels,
+ struct iommu_table **ptbl)
{
- /* TVE #1 is selected by PCI address bit 59 */
- pe->tce_bypass_base = 1ull << 59;
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+ int nid = pe->phb->hose->node;
+ __u64 bus_offset = num ? pe->tce_bypass_base : table_group->tce32_start;
+ long ret;
+ struct iommu_table *tbl;
+
+ tbl = pnv_pci_table_alloc(nid);
+ if (!tbl)
+ return -ENOMEM;
+
+ ret = pnv_pci_ioda2_table_alloc_pages(nid,
+ bus_offset, page_shift, window_size,
+ levels, tbl);
+ if (ret) {
+ iommu_free_table(tbl, "pnv");
+ return ret;
+ }
+
+ tbl->it_ops = &pnv_ioda2_iommu_ops;
+ if (pe->phb->ioda.tce_inval_reg)
+ tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
- /* Install set_bypass callback for VFIO */
- pe->tce32_table->set_bypass = pnv_pci_ioda2_set_bypass;
+ *ptbl = tbl;
- /* Enable bypass by default */
- pnv_pci_ioda2_set_bypass(pe->tce32_table, true);
+ return 0;
}
-static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
- struct pnv_ioda_pe *pe)
+static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
+{
+ struct iommu_table *tbl = NULL;
+ long rc;
+
+ /*
+ * crashkernel= specifies the kdump kernel's maximum memory at
+ * some offset and there is no guaranteed the result is a power
+ * of 2, which will cause errors later.
+ */
+ const u64 max_memory = __rounddown_pow_of_two(memory_hotplug_max());
+
+ /*
+ * In memory constrained environments, e.g. kdump kernel, the
+ * DMA window can be larger than available memory, which will
+ * cause errors later.
+ */
+ const u64 window_size = min((u64)pe->table_group.tce32_size, max_memory);
+
+ rc = pnv_pci_ioda2_create_table(&pe->table_group, 0,
+ IOMMU_PAGE_SHIFT_4K,
+ window_size,
+ POWERNV_IOMMU_DEFAULT_LEVELS, &tbl);
+ if (rc) {
+ pe_err(pe, "Failed to create 32-bit TCE table, err %ld",
+ rc);
+ return rc;
+ }
+
+ iommu_init_table(tbl, pe->phb->hose->node);
+
+ rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
+ if (rc) {
+ pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n",
+ rc);
+ pnv_ioda2_table_free(tbl);
+ return rc;
+ }
+
+ if (!pnv_iommu_bypass_disabled)
+ pnv_pci_ioda2_set_bypass(pe, true);
+
+ /* OPAL variant of PHB3 invalidated TCEs */
+ if (pe->phb->ioda.tce_inval_reg)
+ tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
+
+ /*
+ * Setting table base here only for carrying iommu_group
+ * further down to let iommu_add_device() do the job.
+ * pnv_pci_ioda_dma_dev_setup will override it later anyway.
+ */
+ if (pe->flags & PNV_IODA_PE_DEV)
+ set_iommu_table_base(&pe->pdev->dev, tbl);
+
+ return 0;
+}
+
+#if defined(CONFIG_IOMMU_API) || defined(CONFIG_PCI_IOV)
+static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
+ int num)
+{
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+ struct pnv_phb *phb = pe->phb;
+ long ret;
+
+ pe_info(pe, "Removing DMA window #%d\n", num);
+
+ ret = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
+ (pe->pe_number << 1) + num,
+ 0/* levels */, 0/* table address */,
+ 0/* table size */, 0/* page size */);
+ if (ret)
+ pe_warn(pe, "Unmapping failed, ret = %ld\n", ret);
+ else
+ pnv_pci_ioda2_tce_invalidate_entire(pe);
+
+ pnv_pci_unlink_table_and_group(table_group->tables[num], table_group);
+
+ return ret;
+}
+#endif
+
+#ifdef CONFIG_IOMMU_API
+static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
+ __u64 window_size, __u32 levels)
+{
+ unsigned long bytes = 0;
+ const unsigned window_shift = ilog2(window_size);
+ unsigned entries_shift = window_shift - page_shift;
+ unsigned table_shift = entries_shift + 3;
+ unsigned long tce_table_size = max(0x1000UL, 1UL << table_shift);
+ unsigned long direct_table_size;
+
+ if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) ||
+ (window_size > memory_hotplug_max()) ||
+ !is_power_of_2(window_size))
+ return 0;
+
+ /* Calculate a direct table size from window_size and levels */
+ entries_shift = (entries_shift + levels - 1) / levels;
+ table_shift = entries_shift + 3;
+ table_shift = max_t(unsigned, table_shift, PAGE_SHIFT);
+ direct_table_size = 1UL << table_shift;
+
+ for ( ; levels; --levels) {
+ bytes += _ALIGN_UP(tce_table_size, direct_table_size);
+
+ tce_table_size /= direct_table_size;
+ tce_table_size <<= 3;
+ tce_table_size = _ALIGN_UP(tce_table_size, direct_table_size);
+ }
+
+ return bytes;
+}
+
+static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
+{
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+ /* Store @tbl as pnv_pci_ioda2_unset_window() resets it */
+ struct iommu_table *tbl = pe->table_group.tables[0];
+
+ pnv_pci_ioda2_set_bypass(pe, false);
+ pnv_pci_ioda2_unset_window(&pe->table_group, 0);
+ pnv_ioda2_table_free(tbl);
+}
+
+static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
+{
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+
+ pnv_pci_ioda2_setup_default_config(pe);
+}
+
+static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
+ .get_table_size = pnv_pci_ioda2_get_table_size,
+ .create_table = pnv_pci_ioda2_create_table,
+ .set_window = pnv_pci_ioda2_set_window,
+ .unset_window = pnv_pci_ioda2_unset_window,
+ .take_ownership = pnv_ioda2_take_ownership,
+ .release_ownership = pnv_ioda2_release_ownership,
+};
+#endif
+
+static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb)
{
- struct page *tce_mem = NULL;
- void *addr;
const __be64 *swinvp;
- struct iommu_table *tbl;
- unsigned int tce_table_size, end;
- int64_t rc;
- /* We shouldn't already have a 32-bit DMA associated */
- if (WARN_ON(pe->tce32_seg >= 0))
+ /* OPAL variant of PHB3 invalidated TCEs */
+ swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
+ if (!swinvp)
return;
- /* The PE will reserve all possible 32-bits space */
- pe->tce32_seg = 0;
- end = (1 << ilog2(phb->ioda.m32_pci_base));
- tce_table_size = (end / 0x1000) * 8;
- pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
- end);
+ phb->ioda.tce_inval_reg_phys = be64_to_cpup(swinvp);
+ phb->ioda.tce_inval_reg = ioremap(phb->ioda.tce_inval_reg_phys, 8);
+}
- /* Allocate TCE table */
- tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
- get_order(tce_table_size));
+static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift,
+ unsigned levels, unsigned long limit,
+ unsigned long *current_offset, unsigned long *total_allocated)
+{
+ struct page *tce_mem = NULL;
+ __be64 *addr, *tmp;
+ unsigned order = max_t(unsigned, shift, PAGE_SHIFT) - PAGE_SHIFT;
+ unsigned long allocated = 1UL << (order + PAGE_SHIFT);
+ unsigned entries = 1UL << (shift - 3);
+ long i;
+
+ tce_mem = alloc_pages_node(nid, GFP_KERNEL, order);
if (!tce_mem) {
- pe_err(pe, "Failed to allocate a 32-bit TCE memory\n");
- goto fail;
+ pr_err("Failed to allocate a TCE memory, order=%d\n", order);
+ return NULL;
}
addr = page_address(tce_mem);
- memset(addr, 0, tce_table_size);
+ memset(addr, 0, allocated);
+ *total_allocated += allocated;
+
+ --levels;
+ if (!levels) {
+ *current_offset += allocated;
+ return addr;
+ }
+
+ for (i = 0; i < entries; ++i) {
+ tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
+ levels, limit, current_offset, total_allocated);
+ if (!tmp)
+ break;
+
+ addr[i] = cpu_to_be64(__pa(tmp) |
+ TCE_PCI_READ | TCE_PCI_WRITE);
+
+ if (*current_offset >= limit)
+ break;
+ }
+
+ return addr;
+}
+
+static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+ unsigned long size, unsigned level);
+
+static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ __u32 page_shift, __u64 window_size, __u32 levels,
+ struct iommu_table *tbl)
+{
+ void *addr;
+ unsigned long offset = 0, level_shift, total_allocated = 0;
+ const unsigned window_shift = ilog2(window_size);
+ unsigned entries_shift = window_shift - page_shift;
+ unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT);
+ const unsigned long tce_table_size = 1UL << table_shift;
+
+ if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
+ return -EINVAL;
+
+ if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size))
+ return -EINVAL;
+
+ /* Adjust direct table size from window_size and levels */
+ entries_shift = (entries_shift + levels - 1) / levels;
+ level_shift = entries_shift + 3;
+ level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
+
+ /* Allocate TCE table */
+ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
+ levels, tce_table_size, &offset, &total_allocated);
+
+ /* addr==NULL means that the first level allocation failed */
+ if (!addr)
+ return -ENOMEM;
/*
- * Map TCE table through TVT. The TVE index is the PE number
- * shifted by 1 bit for 32-bits DMA space.
+ * First level was allocated but some lower level failed as
+ * we did not allocate as much as we wanted,
+ * release partially allocated table.
*/
- rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
- pe->pe_number << 1, 1, __pa(addr),
- tce_table_size, 0x1000);
- if (rc) {
- pe_err(pe, "Failed to configure 32-bit TCE table,"
- " err %ld\n", rc);
- goto fail;
+ if (offset < tce_table_size) {
+ pnv_pci_ioda2_table_do_free_pages(addr,
+ 1ULL << (level_shift - 3), levels - 1);
+ return -ENOMEM;
}
/* Setup linux iommu table */
- tbl = pe->tce32_table;
- pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
- IOMMU_PAGE_SHIFT_4K);
+ pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
+ page_shift);
+ tbl->it_level_size = 1ULL << (level_shift - 3);
+ tbl->it_indirect_levels = levels - 1;
+ tbl->it_allocated_size = total_allocated;
- /* OPAL variant of PHB3 invalidated TCEs */
- swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
- if (swinvp) {
- /* We need a couple more fields -- an address and a data
- * to or. Since the bus is only printed out on table free
- * errors, and on the first pass the data will be a relative
- * bus number, print that out instead.
- */
- pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
- tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
- 8);
- tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
+ pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n",
+ window_size, tce_table_size, bus_offset);
+
+ return 0;
+}
+
+static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+ unsigned long size, unsigned level)
+{
+ const unsigned long addr_ul = (unsigned long) addr &
+ ~(TCE_PCI_READ | TCE_PCI_WRITE);
+
+ if (level) {
+ long i;
+ u64 *tmp = (u64 *) addr_ul;
+
+ for (i = 0; i < size; ++i) {
+ unsigned long hpa = be64_to_cpu(tmp[i]);
+
+ if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
+ continue;
+
+ pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
+ level - 1);
+ }
}
- iommu_init_table(tbl, phb->hose->node);
- if (pe->flags & PNV_IODA_PE_DEV) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
- } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
- } else if (pe->flags & PNV_IODA_PE_VF) {
- iommu_register_group(tbl, phb->hose->global_number,
- pe->pe_number);
- }
-
- /* Also create a bypass window */
- if (!pnv_iommu_bypass_disabled)
- pnv_pci_ioda2_setup_bypass_pe(phb, pe);
+ free_pages(addr_ul, get_order(size << 3));
+}
- return;
-fail:
- if (pe->tce32_seg >= 0)
- pe->tce32_seg = -1;
- if (tce_mem)
- __free_pages(tce_mem, get_order(tce_table_size));
+static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
+{
+ const unsigned long size = tbl->it_indirect_levels ?
+ tbl->it_level_size : tbl->it_size;
+
+ if (!tbl->it_size)
+ return;
+
+ pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
+ tbl->it_indirect_levels);
+}
+
+static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+ struct pnv_ioda_pe *pe)
+{
+ int64_t rc;
+
+ /* We shouldn't already have a 32-bit DMA associated */
+ if (WARN_ON(pe->tce32_seg >= 0))
+ return;
+
+ /* TVE #1 is selected by PCI address bit 59 */
+ pe->tce_bypass_base = 1ull << 59;
+
+ iommu_register_group(&pe->table_group, phb->hose->global_number,
+ pe->pe_number);
+
+ /* The PE will reserve all possible 32-bits space */
+ pe->tce32_seg = 0;
+ pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
+ phb->ioda.m32_pci_base);
+
+ /* Setup linux iommu table */
+ pe->table_group.tce32_start = 0;
+ pe->table_group.tce32_size = phb->ioda.m32_pci_base;
+ pe->table_group.max_dynamic_windows_supported =
+ IOMMU_TABLE_GROUP_MAX_TABLES;
+ pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS;
+ pe->table_group.pgsizes = SZ_4K | SZ_64K | SZ_16M;
+#ifdef CONFIG_IOMMU_API
+ pe->table_group.ops = &pnv_pci_ioda2_ops;
+#endif
+
+ rc = pnv_pci_ioda2_setup_default_config(pe);
+ if (rc) {
+ if (pe->tce32_seg >= 0)
+ pe->tce32_seg = -1;
+ return;
+ }
+
+ if (pe->flags & PNV_IODA_PE_DEV)
+ iommu_add_device(&pe->pdev->dev);
+ else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
+ pnv_ioda_setup_bus_dma(pe, pe->pbus);
}
static void pnv_ioda_setup_dma(struct pnv_phb *phb)
@@ -2024,6 +2404,8 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
pr_info("PCI: %d PE# for a total weight of %d\n",
phb->ioda.dma_pe_count, phb->ioda.dma_weight);
+ pnv_pci_ioda_setup_opal_tce_kill(phb);
+
/* Walk our PE list and configure their DMA segments, hand them
* out one base segment plus any residual segments based on
* weight
@@ -2642,12 +3024,29 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
}
-static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
+static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
{
+ struct pnv_phb *phb = hose->private_data;
+
opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE,
OPAL_ASSERT_RESET);
}
+static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
+ .dma_dev_setup = pnv_pci_dma_dev_setup,
+ .dma_bus_setup = pnv_pci_dma_bus_setup,
+#ifdef CONFIG_PCI_MSI
+ .setup_msi_irqs = pnv_setup_msi_irqs,
+ .teardown_msi_irqs = pnv_teardown_msi_irqs,
+#endif
+ .enable_device_hook = pnv_pci_enable_device_hook,
+ .window_alignment = pnv_pci_window_alignment,
+ .reset_secondary_bus = pnv_pci_reset_secondary_bus,
+ .dma_set_mask = pnv_pci_ioda_dma_set_mask,
+ .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask,
+ .shutdown = pnv_pci_ioda_shutdown,
+};
+
static void __init pnv_pci_init_ioda_phb(struct device_node *np,
u64 hub_id, int ioda_type)
{
@@ -2791,11 +3190,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
/* Setup TCEs */
phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
- phb->dma_set_mask = pnv_pci_ioda_dma_set_mask;
- phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask;
-
- /* Setup shutdown function for kexec */
- phb->shutdown = pnv_pci_ioda_shutdown;
/* Setup MSI support */
pnv_pci_init_ioda_msis(phb);
@@ -2808,10 +3202,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
* the child P2P bridges) can form individual PE.
*/
ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
- pnv_pci_controller_ops.enable_device_hook = pnv_pci_enable_device_hook;
- pnv_pci_controller_ops.window_alignment = pnv_pci_window_alignment;
- pnv_pci_controller_ops.reset_secondary_bus = pnv_pci_reset_secondary_bus;
- hose->controller_ops = pnv_pci_controller_ops;
+ hose->controller_ops = pnv_pci_ioda_controller_ops;
#ifdef CONFIG_PCI_IOV
ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
diff --git a/kernel/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/kernel/arch/powerpc/platforms/powernv/pci-p5ioc2.c
index 4729ca793..f2bdfea3b 100644
--- a/kernel/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ b/kernel/arch/powerpc/platforms/powernv/pci-p5ioc2.c
@@ -83,18 +83,42 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb)
static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { }
#endif /* CONFIG_PCI_MSI */
+static struct iommu_table_ops pnv_p5ioc2_iommu_ops = {
+ .set = pnv_tce_build,
+#ifdef CONFIG_IOMMU_API
+ .exchange = pnv_tce_xchg,
+#endif
+ .clear = pnv_tce_free,
+ .get = pnv_tce_get,
+};
+
static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
struct pci_dev *pdev)
{
- if (phb->p5ioc2.iommu_table.it_map == NULL) {
- iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node);
- iommu_register_group(&phb->p5ioc2.iommu_table,
+ struct iommu_table *tbl = phb->p5ioc2.table_group.tables[0];
+
+ if (!tbl->it_map) {
+ tbl->it_ops = &pnv_p5ioc2_iommu_ops;
+ iommu_init_table(tbl, phb->hose->node);
+ iommu_register_group(&phb->p5ioc2.table_group,
pci_domain_nr(phb->hose->bus), phb->opal_id);
+ INIT_LIST_HEAD_RCU(&tbl->it_group_list);
+ pnv_pci_link_table_and_group(phb->hose->node, 0,
+ tbl, &phb->p5ioc2.table_group);
}
- set_iommu_table_base_and_group(&pdev->dev, &phb->p5ioc2.iommu_table);
+ set_iommu_table_base(&pdev->dev, tbl);
+ iommu_add_device(&pdev->dev);
}
+static const struct pci_controller_ops pnv_pci_p5ioc2_controller_ops = {
+ .dma_dev_setup = pnv_pci_dma_dev_setup,
+#ifdef CONFIG_PCI_MSI
+ .setup_msi_irqs = pnv_setup_msi_irqs,
+ .teardown_msi_irqs = pnv_teardown_msi_irqs,
+#endif
+};
+
static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
void *tce_mem, u64 tce_size)
{
@@ -103,6 +127,8 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
u64 phb_id;
int64_t rc;
static int primary = 1;
+ struct iommu_table_group *table_group;
+ struct iommu_table *tbl;
pr_info(" Initializing p5ioc2 PHB %s\n", np->full_name);
@@ -133,7 +159,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
phb->hose->first_busno = 0;
phb->hose->last_busno = 0xff;
phb->hose->private_data = phb;
- phb->hose->controller_ops = pnv_pci_controller_ops;
+ phb->hose->controller_ops = pnv_pci_p5ioc2_controller_ops;
phb->hub_id = hub_id;
phb->opal_id = phb_id;
phb->type = PNV_PHB_P5IOC2;
@@ -172,6 +198,15 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table,
tce_mem, tce_size, 0,
IOMMU_PAGE_SHIFT_4K);
+ /*
+ * We do not allocate iommu_table as we do not support
+ * hotplug or SRIOV on P5IOC2 and therefore iommu_free_table()
+ * should not be called for phb->p5ioc2.table_group.tables[0] ever.
+ */
+ tbl = phb->p5ioc2.table_group.tables[0] = &phb->p5ioc2.iommu_table;
+ table_group = &phb->p5ioc2.table_group;
+ table_group->tce32_start = tbl->it_offset << tbl->it_page_shift;
+ table_group->tce32_size = tbl->it_size << tbl->it_page_shift;
}
void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
diff --git a/kernel/arch/powerpc/platforms/powernv/pci.c b/kernel/arch/powerpc/platforms/powernv/pci.c
index bca2aeb6e..ad8c3f4a5 100644
--- a/kernel/arch/powerpc/platforms/powernv/pci.c
+++ b/kernel/arch/powerpc/platforms/powernv/pci.c
@@ -45,7 +45,7 @@
//#define cfg_dbg(fmt...) printk(fmt)
#ifdef CONFIG_PCI_MSI
-static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
struct pnv_phb *phb = hose->private_data;
@@ -61,7 +61,7 @@ static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
if (pdev->no_64bit_msi && !phb->msi32_support)
return -ENODEV;
- list_for_each_entry(entry, &pdev->msi_list, list) {
+ for_each_pci_msi_entry(entry, pdev) {
if (!entry->msi_attrib.is_64 && !phb->msi32_support) {
pr_warn("%s: Supports only 64-bit MSIs\n",
pci_name(pdev));
@@ -94,22 +94,23 @@ static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
return 0;
}
-static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
+void pnv_teardown_msi_irqs(struct pci_dev *pdev)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
struct pnv_phb *phb = hose->private_data;
struct msi_desc *entry;
+ irq_hw_number_t hwirq;
if (WARN_ON(!phb))
return;
- list_for_each_entry(entry, &pdev->msi_list, list) {
+ for_each_pci_msi_entry(entry, pdev) {
if (entry->irq == NO_IRQ)
continue;
+ hwirq = virq_to_hw(entry->irq);
irq_set_msi_desc(entry->irq, NULL);
- msi_bitmap_free_hwirqs(&phb->msi_bmp,
- virq_to_hw(entry->irq) - phb->msi_base, 1);
irq_dispose_mapping(entry->irq);
+ msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1);
}
}
#endif /* CONFIG_PCI_MSI */
@@ -572,80 +573,158 @@ struct pci_ops pnv_pci_ops = {
.write = pnv_pci_write_config,
};
-static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
- unsigned long uaddr, enum dma_data_direction direction,
- struct dma_attrs *attrs, bool rm)
+static __be64 *pnv_tce(struct iommu_table *tbl, long idx)
{
- u64 proto_tce;
- __be64 *tcep, *tces;
- u64 rpn;
+ __be64 *tmp = ((__be64 *)tbl->it_base);
+ int level = tbl->it_indirect_levels;
+ const long shift = ilog2(tbl->it_level_size);
+ unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
+
+ while (level) {
+ int n = (idx & mask) >> (level * shift);
+ unsigned long tce = be64_to_cpu(tmp[n]);
+
+ tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
+ idx &= ~mask;
+ mask >>= shift;
+ --level;
+ }
- proto_tce = TCE_PCI_READ; // Read allowed
+ return tmp + idx;
+}
- if (direction != DMA_TO_DEVICE)
- proto_tce |= TCE_PCI_WRITE;
+int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+ unsigned long uaddr, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ u64 proto_tce = iommu_direction_to_tce_perm(direction);
+ u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
+ long i;
- tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
- rpn = __pa(uaddr) >> tbl->it_page_shift;
+ if (proto_tce & TCE_PCI_WRITE)
+ proto_tce |= TCE_PCI_READ;
- while (npages--)
- *(tcep++) = cpu_to_be64(proto_tce |
- (rpn++ << tbl->it_page_shift));
+ for (i = 0; i < npages; i++) {
+ unsigned long newtce = proto_tce |
+ ((rpn + i) << tbl->it_page_shift);
+ unsigned long idx = index - tbl->it_offset + i;
- /* Some implementations won't cache invalid TCEs and thus may not
- * need that flush. We'll probably turn it_type into a bit mask
- * of flags if that becomes the case
- */
- if (tbl->it_type & TCE_PCI_SWINV_CREATE)
- pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
+ *(pnv_tce(tbl, idx)) = cpu_to_be64(newtce);
+ }
return 0;
}
-static int pnv_tce_build_vm(struct iommu_table *tbl, long index, long npages,
- unsigned long uaddr,
- enum dma_data_direction direction,
- struct dma_attrs *attrs)
+#ifdef CONFIG_IOMMU_API
+int pnv_tce_xchg(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction)
{
- return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs,
- false);
+ u64 proto_tce = iommu_direction_to_tce_perm(*direction);
+ unsigned long newtce = *hpa | proto_tce, oldtce;
+ unsigned long idx = index - tbl->it_offset;
+
+ BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
+
+ if (newtce & TCE_PCI_WRITE)
+ newtce |= TCE_PCI_READ;
+
+ oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce));
+ *hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+ *direction = iommu_tce_direction(oldtce);
+
+ return 0;
}
+#endif
-static void pnv_tce_free(struct iommu_table *tbl, long index, long npages,
- bool rm)
+void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
{
- __be64 *tcep, *tces;
+ long i;
- tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
+ for (i = 0; i < npages; i++) {
+ unsigned long idx = index - tbl->it_offset + i;
- while (npages--)
- *(tcep++) = cpu_to_be64(0);
+ *(pnv_tce(tbl, idx)) = cpu_to_be64(0);
+ }
+}
- if (tbl->it_type & TCE_PCI_SWINV_FREE)
- pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
+unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+{
+ return *(pnv_tce(tbl, index - tbl->it_offset));
}
-static void pnv_tce_free_vm(struct iommu_table *tbl, long index, long npages)
+struct iommu_table *pnv_pci_table_alloc(int nid)
{
- pnv_tce_free(tbl, index, npages, false);
+ struct iommu_table *tbl;
+
+ tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid);
+ INIT_LIST_HEAD_RCU(&tbl->it_group_list);
+
+ return tbl;
}
-static unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+long pnv_pci_link_table_and_group(int node, int num,
+ struct iommu_table *tbl,
+ struct iommu_table_group *table_group)
{
- return ((u64 *)tbl->it_base)[index - tbl->it_offset];
+ struct iommu_table_group_link *tgl = NULL;
+
+ if (WARN_ON(!tbl || !table_group))
+ return -EINVAL;
+
+ tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
+ node);
+ if (!tgl)
+ return -ENOMEM;
+
+ tgl->table_group = table_group;
+ list_add_rcu(&tgl->next, &tbl->it_group_list);
+
+ table_group->tables[num] = tbl;
+
+ return 0;
}
-static int pnv_tce_build_rm(struct iommu_table *tbl, long index, long npages,
- unsigned long uaddr,
- enum dma_data_direction direction,
- struct dma_attrs *attrs)
+static void pnv_iommu_table_group_link_free(struct rcu_head *head)
{
- return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, true);
+ struct iommu_table_group_link *tgl = container_of(head,
+ struct iommu_table_group_link, rcu);
+
+ kfree(tgl);
}
-static void pnv_tce_free_rm(struct iommu_table *tbl, long index, long npages)
+void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
+ struct iommu_table_group *table_group)
{
- pnv_tce_free(tbl, index, npages, true);
+ long i;
+ bool found;
+ struct iommu_table_group_link *tgl;
+
+ if (!tbl || !table_group)
+ return;
+
+ /* Remove link to a group from table's list of attached groups */
+ found = false;
+ list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
+ if (tgl->table_group == table_group) {
+ list_del_rcu(&tgl->next);
+ call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free);
+ found = true;
+ break;
+ }
+ }
+ if (WARN_ON(!found))
+ return;
+
+ /* Clean a pointer to iommu_table in iommu_table_group::tables[] */
+ found = false;
+ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+ if (table_group->tables[i] == tbl) {
+ table_group->tables[i] = NULL;
+ found = true;
+ break;
+ }
+ }
+ WARN_ON(!found);
}
void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
@@ -662,7 +741,7 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
tbl->it_type = TCE_PCI;
}
-static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
+void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
struct pnv_phb *phb = hose->private_data;
@@ -689,37 +768,33 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
phb->dma_dev_setup(phb, pdev);
}
-int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
+void pnv_pci_dma_bus_setup(struct pci_bus *bus)
{
- struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ struct pci_controller *hose = bus->sysdata;
struct pnv_phb *phb = hose->private_data;
+ struct pnv_ioda_pe *pe;
- if (phb && phb->dma_set_mask)
- return phb->dma_set_mask(phb, pdev, dma_mask);
- return __dma_set_mask(&pdev->dev, dma_mask);
-}
-
-u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev)
-{
- struct pci_controller *hose = pci_bus_to_host(pdev->bus);
- struct pnv_phb *phb = hose->private_data;
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)))
+ continue;
- if (phb && phb->dma_get_required_mask)
- return phb->dma_get_required_mask(phb, pdev);
+ if (!pe->pbus)
+ continue;
- return __dma_get_required_mask(&pdev->dev);
+ if (bus->number == ((pe->rid >> 8) & 0xFF)) {
+ pe->pbus = bus;
+ break;
+ }
+ }
}
void pnv_pci_shutdown(void)
{
struct pci_controller *hose;
- list_for_each_entry(hose, &hose_list, list_node) {
- struct pnv_phb *phb = hose->private_data;
-
- if (phb && phb->shutdown)
- phb->shutdown(phb);
- }
+ list_for_each_entry(hose, &hose_list, list_node)
+ if (hose->controller_ops.shutdown)
+ hose->controller_ops.shutdown(hose);
}
/* Fixup wrong class code in p7ioc and p8 root complex */
@@ -762,22 +837,7 @@ void __init pnv_pci_init(void)
pci_devs_phb_init();
/* Configure IOMMU DMA hooks */
- ppc_md.tce_build = pnv_tce_build_vm;
- ppc_md.tce_free = pnv_tce_free_vm;
- ppc_md.tce_build_rm = pnv_tce_build_rm;
- ppc_md.tce_free_rm = pnv_tce_free_rm;
- ppc_md.tce_get = pnv_tce_get;
set_pci_dma_ops(&dma_iommu_ops);
-
- /* Configure MSIs */
-#ifdef CONFIG_PCI_MSI
- ppc_md.setup_msi_irqs = pnv_setup_msi_irqs;
- ppc_md.teardown_msi_irqs = pnv_teardown_msi_irqs;
-#endif
}
machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init);
-
-struct pci_controller_ops pnv_pci_controller_ops = {
- .dma_dev_setup = pnv_pci_dma_dev_setup,
-};
diff --git a/kernel/arch/powerpc/platforms/powernv/pci.h b/kernel/arch/powerpc/platforms/powernv/pci.h
index 070ee888f..36a99feab 100644
--- a/kernel/arch/powerpc/platforms/powernv/pci.h
+++ b/kernel/arch/powerpc/platforms/powernv/pci.h
@@ -57,8 +57,7 @@ struct pnv_ioda_pe {
/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
int tce32_seg;
int tce32_segcount;
- struct iommu_table *tce32_table;
- phys_addr_t tce_inval_reg_phys;
+ struct iommu_table_group table_group;
/* 64-bit TCE bypass region */
bool tce_bypass_enabled;
@@ -106,16 +105,12 @@ struct pnv_phb {
unsigned int hwirq, unsigned int virq,
unsigned int is_64, struct msi_msg *msg);
void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
- int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev,
- u64 dma_mask);
- u64 (*dma_get_required_mask)(struct pnv_phb *phb,
- struct pci_dev *pdev);
void (*fixup_phb)(struct pci_controller *hose);
u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
- void (*shutdown)(struct pnv_phb *phb);
int (*init_m64)(struct pnv_phb *phb);
- void (*reserve_m64_pe)(struct pnv_phb *phb);
- int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all);
+ void (*reserve_m64_pe)(struct pci_bus *bus,
+ unsigned long *pe_bitmap, bool all);
+ int (*pick_m64_pe)(struct pci_bus *bus, bool all);
int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt);
@@ -123,6 +118,7 @@ struct pnv_phb {
union {
struct {
struct iommu_table iommu_table;
+ struct iommu_table_group table_group;
} p5ioc2;
struct {
@@ -186,6 +182,12 @@ struct pnv_phb {
* boot for resource allocation purposes
*/
struct list_head pe_dma_list;
+
+ /* TCE cache invalidate registers (physical and
+ * remapped)
+ */
+ phys_addr_t tce_inval_reg_phys;
+ __be64 __iomem *tce_inval_reg;
} ioda;
};
@@ -200,6 +202,13 @@ struct pnv_phb {
};
extern struct pci_ops pnv_pci_ops;
+extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+ unsigned long uaddr, enum dma_data_direction direction,
+ struct dma_attrs *attrs);
+extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
+extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction);
+extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
unsigned char *log_buff);
@@ -207,6 +216,13 @@ int pnv_pci_cfg_read(struct pci_dn *pdn,
int where, int size, u32 *val);
int pnv_pci_cfg_write(struct pci_dn *pdn,
int where, int size, u32 val);
+extern struct iommu_table *pnv_pci_table_alloc(int nid);
+
+extern long pnv_pci_link_table_and_group(int node, int num,
+ struct iommu_table *tbl,
+ struct iommu_table_group *table_group);
+extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
+ struct iommu_table_group *table_group);
extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
void *tce_mem, u64 tce_size,
u64 dma_offset, unsigned page_shift);
@@ -218,4 +234,9 @@ extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
+extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev);
+extern void pnv_pci_dma_bus_setup(struct pci_bus *bus);
+extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
+extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
+
#endif /* __POWERNV_PCI_H */
diff --git a/kernel/arch/powerpc/platforms/powernv/powernv.h b/kernel/arch/powerpc/platforms/powernv/powernv.h
index 826d2c9be..6dbc0a1da 100644
--- a/kernel/arch/powerpc/platforms/powernv/powernv.h
+++ b/kernel/arch/powerpc/platforms/powernv/powernv.h
@@ -12,29 +12,18 @@ struct pci_dev;
#ifdef CONFIG_PCI
extern void pnv_pci_init(void);
extern void pnv_pci_shutdown(void);
-extern int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask);
-extern u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev);
#else
static inline void pnv_pci_init(void) { }
static inline void pnv_pci_shutdown(void) { }
-
-static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
-{
- return -ENODEV;
-}
-
-static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev)
-{
- return 0;
-}
#endif
-extern struct pci_controller_ops pnv_pci_controller_ops;
-
extern u32 pnv_get_supported_cpuidle_states(void);
extern void pnv_lpc_init(void);
+extern void opal_handle_events(uint64_t events);
+extern void opal_event_shutdown(void);
+
bool cpu_core_split_required(void);
#endif /* _POWERNV_H */
diff --git a/kernel/arch/powerpc/platforms/powernv/rng.c b/kernel/arch/powerpc/platforms/powernv/rng.c
index 6eb808ff6..5dcbdea1a 100644
--- a/kernel/arch/powerpc/platforms/powernv/rng.c
+++ b/kernel/arch/powerpc/platforms/powernv/rng.c
@@ -128,7 +128,7 @@ static __init int rng_create(struct device_node *dn)
pr_info_once("Registering arch random hook.\n");
- ppc_md.get_random_long = powernv_get_random_long;
+ ppc_md.get_random_seed = powernv_get_random_long;
return 0;
}
diff --git a/kernel/arch/powerpc/platforms/powernv/setup.c b/kernel/arch/powerpc/platforms/powernv/setup.c
index 16fdcb23f..a9a8fa37a 100644
--- a/kernel/arch/powerpc/platforms/powernv/setup.c
+++ b/kernel/arch/powerpc/platforms/powernv/setup.c
@@ -35,12 +35,8 @@
#include <asm/opal.h>
#include <asm/kexec.h>
#include <asm/smp.h>
-#include <asm/cputhreads.h>
-#include <asm/cpuidle.h>
-#include <asm/code-patching.h>
#include "powernv.h"
-#include "subcore.h"
static void __init pnv_setup_arch(void)
{
@@ -111,7 +107,7 @@ static void pnv_prepare_going_down(void)
* Disable all notifiers from OPAL, we can't
* service interrupts anymore anyway
*/
- opal_notifier_disable();
+ opal_event_shutdown();
/* Soft disable interrupts */
local_irq_disable();
@@ -169,21 +165,6 @@ static void pnv_progress(char *s, unsigned short hex)
{
}
-static int pnv_dma_set_mask(struct device *dev, u64 dma_mask)
-{
- if (dev_is_pci(dev))
- return pnv_pci_dma_set_mask(to_pci_dev(dev), dma_mask);
- return __dma_set_mask(dev, dma_mask);
-}
-
-static u64 pnv_dma_get_required_mask(struct device *dev)
-{
- if (dev_is_pci(dev))
- return pnv_pci_dma_get_required_mask(to_pci_dev(dev));
-
- return __dma_get_required_mask(dev);
-}
-
static void pnv_shutdown(void)
{
/* Let the PCI code clear up IODA tables */
@@ -206,7 +187,7 @@ static void pnv_kexec_wait_secondaries_down(void)
for_each_online_cpu(i) {
uint8_t status;
- int64_t rc;
+ int64_t rc, timeout = 1000;
if (i == my_cpu)
continue;
@@ -223,6 +204,18 @@ static void pnv_kexec_wait_secondaries_down(void)
i, paca[i].hw_cpu_id);
notified = i;
}
+
+ /*
+ * On crash secondaries might be unreachable or hung,
+ * so timeout if we've waited too long
+ * */
+ mdelay(1);
+ if (timeout-- == 0) {
+ printk(KERN_ERR "kexec: timed out waiting for "
+ "cpu %d (physical %d) to enter OPAL\n",
+ i, paca[i].hw_cpu_id);
+ break;
+ }
}
}
}
@@ -244,16 +237,16 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
/* Return the CPU to OPAL */
opal_return_cpu();
- } else if (crash_shutdown) {
- /*
- * On crash, we don't wait for secondaries to go
- * down as they might be unreachable or hung, so
- * instead we just wait a bit and move on.
- */
- mdelay(1);
} else {
/* Primary waits for the secondaries to have reached OPAL */
pnv_kexec_wait_secondaries_down();
+
+ /*
+ * We might be running as little-endian - now that interrupts
+ * are disabled, reset the HILE bit to big-endian so we don't
+ * take interrupts in the wrong endian later
+ */
+ opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
}
}
#endif /* CONFIG_KEXEC */
@@ -277,173 +270,6 @@ static void __init pnv_setup_machdep_opal(void)
ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
}
-static u32 supported_cpuidle_states;
-
-int pnv_save_sprs_for_winkle(void)
-{
- int cpu;
- int rc;
-
- /*
- * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
- * all cpus at boot. Get these reg values of current cpu and use the
- * same accross all cpus.
- */
- uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
- uint64_t hid0_val = mfspr(SPRN_HID0);
- uint64_t hid1_val = mfspr(SPRN_HID1);
- uint64_t hid4_val = mfspr(SPRN_HID4);
- uint64_t hid5_val = mfspr(SPRN_HID5);
- uint64_t hmeer_val = mfspr(SPRN_HMEER);
-
- for_each_possible_cpu(cpu) {
- uint64_t pir = get_hard_smp_processor_id(cpu);
- uint64_t hsprg0_val = (uint64_t)&paca[cpu];
-
- /*
- * HSPRG0 is used to store the cpu's pointer to paca. Hence last
- * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0
- * with 63rd bit set, so that when a thread wakes up at 0x100 we
- * can use this bit to distinguish between fastsleep and
- * deep winkle.
- */
- hsprg0_val |= 1;
-
- rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
- if (rc != 0)
- return rc;
-
- rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
- if (rc != 0)
- return rc;
-
- /* HIDs are per core registers */
- if (cpu_thread_in_core(cpu) == 0) {
-
- rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
- if (rc != 0)
- return rc;
-
- rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
- if (rc != 0)
- return rc;
-
- rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
- if (rc != 0)
- return rc;
-
- rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
- if (rc != 0)
- return rc;
-
- rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
- if (rc != 0)
- return rc;
- }
- }
-
- return 0;
-}
-
-static void pnv_alloc_idle_core_states(void)
-{
- int i, j;
- int nr_cores = cpu_nr_cores();
- u32 *core_idle_state;
-
- /*
- * core_idle_state - First 8 bits track the idle state of each thread
- * of the core. The 8th bit is the lock bit. Initially all thread bits
- * are set. They are cleared when the thread enters deep idle state
- * like sleep and winkle. Initially the lock bit is cleared.
- * The lock bit has 2 purposes
- * a. While the first thread is restoring core state, it prevents
- * other threads in the core from switching to process context.
- * b. While the last thread in the core is saving the core state, it
- * prevents a different thread from waking up.
- */
- for (i = 0; i < nr_cores; i++) {
- int first_cpu = i * threads_per_core;
- int node = cpu_to_node(first_cpu);
-
- core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
- *core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
-
- for (j = 0; j < threads_per_core; j++) {
- int cpu = first_cpu + j;
-
- paca[cpu].core_idle_state_ptr = core_idle_state;
- paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
- paca[cpu].thread_mask = 1 << j;
- }
- }
-
- update_subcore_sibling_mask();
-
- if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED)
- pnv_save_sprs_for_winkle();
-}
-
-u32 pnv_get_supported_cpuidle_states(void)
-{
- return supported_cpuidle_states;
-}
-EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
-
-static int __init pnv_init_idle_states(void)
-{
- struct device_node *power_mgt;
- int dt_idle_states;
- u32 *flags;
- int i;
-
- supported_cpuidle_states = 0;
-
- if (cpuidle_disable != IDLE_NO_OVERRIDE)
- goto out;
-
- if (!firmware_has_feature(FW_FEATURE_OPALv3))
- goto out;
-
- power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
- if (!power_mgt) {
- pr_warn("opal: PowerMgmt Node not found\n");
- goto out;
- }
- dt_idle_states = of_property_count_u32_elems(power_mgt,
- "ibm,cpu-idle-state-flags");
- if (dt_idle_states < 0) {
- pr_warn("cpuidle-powernv: no idle states found in the DT\n");
- goto out;
- }
-
- flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL);
- if (of_property_read_u32_array(power_mgt,
- "ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
- pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
- goto out_free;
- }
-
- for (i = 0; i < dt_idle_states; i++)
- supported_cpuidle_states |= flags[i];
-
- if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
- patch_instruction(
- (unsigned int *)pnv_fastsleep_workaround_at_entry,
- PPC_INST_NOP);
- patch_instruction(
- (unsigned int *)pnv_fastsleep_workaround_at_exit,
- PPC_INST_NOP);
- }
- pnv_alloc_idle_core_states();
-out_free:
- kfree(flags);
-out:
- return 0;
-}
-
-subsys_initcall(pnv_init_idle_states);
-
static int __init pnv_probe(void)
{
unsigned long root = of_get_flat_dt_root();
@@ -492,8 +318,6 @@ define_machine(powernv) {
.machine_shutdown = pnv_shutdown,
.power_save = power7_idle,
.calibrate_decr = generic_calibrate_decr,
- .dma_set_mask = pnv_dma_set_mask,
- .dma_get_required_mask = pnv_dma_get_required_mask,
#ifdef CONFIG_KEXEC
.kexec_cpu_down = pnv_kexec_cpu_down,
#endif
diff --git a/kernel/arch/powerpc/platforms/powernv/smp.c b/kernel/arch/powerpc/platforms/powernv/smp.c
index 8f70ba681..ca264833e 100644
--- a/kernel/arch/powerpc/platforms/powernv/smp.c
+++ b/kernel/arch/powerpc/platforms/powernv/smp.c
@@ -171,7 +171,26 @@ static void pnv_smp_cpu_kill_self(void)
* so clear LPCR:PECE1. We keep PECE2 enabled.
*/
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
+
+ /*
+ * Hard-disable interrupts, and then clear irq_happened flags
+ * that we can safely ignore while off-line, since they
+ * are for things for which we do no processing when off-line
+ * (or in the case of HMI, all the processing we need to do
+ * is done in lower-level real-mode code).
+ */
+ hard_irq_disable();
+ local_paca->irq_happened &= ~(PACA_IRQ_DEC | PACA_IRQ_HMI);
+
while (!generic_check_cpu_restart(cpu)) {
+ /*
+ * Clear IPI flag, since we don't handle IPIs while
+ * offline, except for those when changing micro-threading
+ * mode, which are handled explicitly below, and those
+ * for coming online, which are handled via
+ * generic_check_cpu_restart() calls.
+ */
+ kvmppc_set_host_ipi(cpu, 0);
ppc64_runlatch_off();
@@ -196,20 +215,20 @@ static void pnv_smp_cpu_kill_self(void)
* having finished executing in a KVM guest, then srr1
* contains 0.
*/
- if ((srr1 & wmask) == SRR1_WAKEEE) {
+ if (((srr1 & wmask) == SRR1_WAKEEE) ||
+ (local_paca->irq_happened & PACA_IRQ_EE)) {
icp_native_flush_interrupt();
- local_paca->irq_happened &= PACA_IRQ_HARD_DIS;
- smp_mb();
} else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
- kvmppc_set_host_ipi(cpu, 0);
}
+ local_paca->irq_happened &= ~(PACA_IRQ_EE | PACA_IRQ_DBELL);
+ smp_mb();
if (cpu_core_split_required())
continue;
- if (!generic_check_cpu_restart(cpu))
+ if (srr1 && !generic_check_cpu_restart(cpu))
DBG("CPU%d Unexpected exit while offline !\n", cpu);
}
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_PECE1);
diff --git a/kernel/arch/powerpc/platforms/powernv/subcore.c b/kernel/arch/powerpc/platforms/powernv/subcore.c
index f60f80ada..503a73f59 100644
--- a/kernel/arch/powerpc/platforms/powernv/subcore.c
+++ b/kernel/arch/powerpc/platforms/powernv/subcore.c
@@ -190,7 +190,7 @@ static void unsplit_core(void)
hid0 = mfspr(SPRN_HID0);
hid0 &= ~HID0_POWER8_DYNLPARDIS;
- mtspr(SPRN_HID0, hid0);
+ update_power8_hid0(hid0);
update_hid_in_slw(hid0);
while (mfspr(SPRN_HID0) & mask)
@@ -227,7 +227,7 @@ static void split_core(int new_mode)
/* Write new mode */
hid0 = mfspr(SPRN_HID0);
hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
- mtspr(SPRN_HID0, hid0);
+ update_power8_hid0(hid0);
update_hid_in_slw(hid0);
/* Wait for it to happen */
diff --git a/kernel/arch/powerpc/platforms/ps3/Kconfig b/kernel/arch/powerpc/platforms/ps3/Kconfig
index 56f274064..b27f40f26 100644
--- a/kernel/arch/powerpc/platforms/ps3/Kconfig
+++ b/kernel/arch/powerpc/platforms/ps3/Kconfig
@@ -1,6 +1,6 @@
config PPC_PS3
bool "Sony PS3"
- depends on PPC64 && PPC_BOOK3S
+ depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
select PPC_CELL
select USB_OHCI_LITTLE_ENDIAN
select USB_OHCI_BIG_ENDIAN_MMIO
diff --git a/kernel/arch/powerpc/platforms/ps3/interrupt.c b/kernel/arch/powerpc/platforms/ps3/interrupt.c
index a6c42f343..638c40609 100644
--- a/kernel/arch/powerpc/platforms/ps3/interrupt.c
+++ b/kernel/arch/powerpc/platforms/ps3/interrupt.c
@@ -678,7 +678,8 @@ static int ps3_host_map(struct irq_domain *h, unsigned int virq,
return 0;
}
-static int ps3_host_match(struct irq_domain *h, struct device_node *np)
+static int ps3_host_match(struct irq_domain *h, struct device_node *np,
+ enum irq_domain_bus_token bus_token)
{
/* Match all */
return 1;
diff --git a/kernel/arch/powerpc/platforms/ps3/os-area.c b/kernel/arch/powerpc/platforms/ps3/os-area.c
index 097871398..3db53e8af 100644
--- a/kernel/arch/powerpc/platforms/ps3/os-area.c
+++ b/kernel/arch/powerpc/platforms/ps3/os-area.c
@@ -194,11 +194,6 @@ static const struct os_area_db_id os_area_db_id_rtc_diff = {
.key = OS_AREA_DB_KEY_RTC_DIFF
};
-static const struct os_area_db_id os_area_db_id_video_mode = {
- .owner = OS_AREA_DB_OWNER_LINUX,
- .key = OS_AREA_DB_KEY_VIDEO_MODE
-};
-
#define SECONDS_FROM_1970_TO_2000 946684800LL
/**
diff --git a/kernel/arch/powerpc/platforms/ps3/time.c b/kernel/arch/powerpc/platforms/ps3/time.c
index ce73ce865..791c6142c 100644
--- a/kernel/arch/powerpc/platforms/ps3/time.c
+++ b/kernel/arch/powerpc/platforms/ps3/time.c
@@ -92,5 +92,4 @@ static int __init ps3_rtc_init(void)
return PTR_ERR_OR_ZERO(pdev);
}
-
-module_init(ps3_rtc_init);
+device_initcall(ps3_rtc_init);
diff --git a/kernel/arch/powerpc/platforms/pseries/Kconfig b/kernel/arch/powerpc/platforms/pseries/Kconfig
index 54c87d5d3..bec90fb30 100644
--- a/kernel/arch/powerpc/platforms/pseries/Kconfig
+++ b/kernel/arch/powerpc/platforms/pseries/Kconfig
@@ -4,6 +4,7 @@ config PPC_PSERIES
select HAVE_PCSPKR_PLATFORM
select MPIC
select OF_DYNAMIC
+ select PCI
select PCI_MSI
select PPC_XICS
select PPC_ICP_NATIVE
@@ -15,7 +16,6 @@ config PPC_PSERIES
select RTAS_ERROR_LOGGING
select PPC_UDBG_16550
select PPC_NATIVE
- select PPC_PCI_CHOICE if EXPERT
select PPC_DOORBELL
select HAVE_CONTEXT_TRACKING
select HOTPLUG_CPU if SMP
@@ -43,11 +43,6 @@ config DTL
Say N if you are unsure.
-config PSERIES_MSI
- bool
- depends on PCI_MSI && PPC_PSERIES && EEH
- default y
-
config PSERIES_ENERGY
tristate "pSeries energy management capabilities driver"
depends on PPC_PSERIES
diff --git a/kernel/arch/powerpc/platforms/pseries/Makefile b/kernel/arch/powerpc/platforms/pseries/Makefile
index 03480796a..fedc2ccf0 100644
--- a/kernel/arch/powerpc/platforms/pseries/Makefile
+++ b/kernel/arch/powerpc/platforms/pseries/Makefile
@@ -2,14 +2,13 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG
obj-y := lpar.o hvCall.o nvram.o reconfig.o \
+ of_helpers.o \
setup.o iommu.o event_sources.o ras.o \
- firmware.o power.o dlpar.o mobility.o rng.o
+ firmware.o power.o dlpar.o mobility.o rng.o \
+ pci.o pci_dlpar.o eeh_pseries.o msi.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SCANLOG) += scanlog.o
-obj-$(CONFIG_EEH) += eeh_pseries.o
obj-$(CONFIG_KEXEC) += kexec.o
-obj-$(CONFIG_PCI) += pci.o pci_dlpar.o
-obj-$(CONFIG_PSERIES_MSI) += msi.o
obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o
obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o
diff --git a/kernel/arch/powerpc/platforms/pseries/dlpar.c b/kernel/arch/powerpc/platforms/pseries/dlpar.c
index 019d34aaf..f244dcb4f 100644
--- a/kernel/arch/powerpc/platforms/pseries/dlpar.c
+++ b/kernel/arch/powerpc/platforms/pseries/dlpar.c
@@ -18,6 +18,8 @@
#include <linux/cpu.h>
#include <linux/slab.h>
#include <linux/of.h>
+
+#include "of_helpers.h"
#include "offline_states.h"
#include "pseries.h"
@@ -244,36 +246,13 @@ cc_error:
return first_dn;
}
-static struct device_node *derive_parent(const char *path)
-{
- struct device_node *parent;
- char *last_slash;
-
- last_slash = strrchr(path, '/');
- if (last_slash == path) {
- parent = of_find_node_by_path("/");
- } else {
- char *parent_path;
- int parent_path_len = last_slash - path + 1;
- parent_path = kmalloc(parent_path_len, GFP_KERNEL);
- if (!parent_path)
- return NULL;
-
- strlcpy(parent_path, path, parent_path_len);
- parent = of_find_node_by_path(parent_path);
- kfree(parent_path);
- }
-
- return parent;
-}
-
int dlpar_attach_node(struct device_node *dn)
{
int rc;
- dn->parent = derive_parent(dn->full_name);
- if (!dn->parent)
- return -ENOMEM;
+ dn->parent = pseries_of_derive_parent(dn->full_name);
+ if (IS_ERR(dn->parent))
+ return PTR_ERR(dn->parent);
rc = of_attach_node(dn);
if (rc) {
@@ -421,10 +400,11 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
return -ENODEV;
dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
- if (!dn)
- return -EINVAL;
-
of_node_put(parent);
+ if (!dn) {
+ dlpar_release_drc(drc_index);
+ return -EINVAL;
+ }
rc = dlpar_attach_node(dn);
if (rc) {
diff --git a/kernel/arch/powerpc/platforms/pseries/eeh_pseries.c b/kernel/arch/powerpc/platforms/pseries/eeh_pseries.c
index 2039397cc..ac3ffd97e 100644
--- a/kernel/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/kernel/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -433,42 +433,34 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *state)
return ret;
/* Parse the result out */
- result = 0;
- if (rets[1]) {
- switch(rets[0]) {
- case 0:
- result &= ~EEH_STATE_RESET_ACTIVE;
- result |= EEH_STATE_MMIO_ACTIVE;
- result |= EEH_STATE_DMA_ACTIVE;
- break;
- case 1:
- result |= EEH_STATE_RESET_ACTIVE;
- result |= EEH_STATE_MMIO_ACTIVE;
- result |= EEH_STATE_DMA_ACTIVE;
- break;
- case 2:
- result &= ~EEH_STATE_RESET_ACTIVE;
- result &= ~EEH_STATE_MMIO_ACTIVE;
- result &= ~EEH_STATE_DMA_ACTIVE;
- break;
- case 4:
- result &= ~EEH_STATE_RESET_ACTIVE;
- result &= ~EEH_STATE_MMIO_ACTIVE;
- result &= ~EEH_STATE_DMA_ACTIVE;
- result |= EEH_STATE_MMIO_ENABLED;
- break;
- case 5:
- if (rets[2]) {
- if (state) *state = rets[2];
- result = EEH_STATE_UNAVAILABLE;
- } else {
- result = EEH_STATE_NOT_SUPPORT;
- }
- break;
- default:
+ if (!rets[1])
+ return EEH_STATE_NOT_SUPPORT;
+
+ switch(rets[0]) {
+ case 0:
+ result = EEH_STATE_MMIO_ACTIVE |
+ EEH_STATE_DMA_ACTIVE;
+ break;
+ case 1:
+ result = EEH_STATE_RESET_ACTIVE |
+ EEH_STATE_MMIO_ACTIVE |
+ EEH_STATE_DMA_ACTIVE;
+ break;
+ case 2:
+ result = 0;
+ break;
+ case 4:
+ result = EEH_STATE_MMIO_ENABLED;
+ break;
+ case 5:
+ if (rets[2]) {
+ if (state) *state = rets[2];
+ result = EEH_STATE_UNAVAILABLE;
+ } else {
result = EEH_STATE_NOT_SUPPORT;
}
- } else {
+ break;
+ default:
result = EEH_STATE_NOT_SUPPORT;
}
@@ -519,7 +511,7 @@ static int pseries_eeh_reset(struct eeh_pe *pe, int option)
/**
* pseries_eeh_wait_state - Wait for PE state
* @pe: EEH PE
- * @max_wait: maximal period in microsecond
+ * @max_wait: maximal period in millisecond
*
* Wait for the state of associated PE. It might take some time
* to retrieve the PE's state.
diff --git a/kernel/arch/powerpc/platforms/pseries/hotplug-memory.c b/kernel/arch/powerpc/platforms/pseries/hotplug-memory.c
index 0ced387e1..e9ff44cd5 100644
--- a/kernel/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/kernel/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -92,13 +92,12 @@ static struct property *dlpar_clone_drconf_property(struct device_node *dn)
return NULL;
new_prop->name = kstrdup(prop->name, GFP_KERNEL);
- new_prop->value = kmalloc(prop->length, GFP_KERNEL);
+ new_prop->value = kmemdup(prop->value, prop->length, GFP_KERNEL);
if (!new_prop->name || !new_prop->value) {
dlpar_free_drconf_property(new_prop);
return NULL;
}
- memcpy(new_prop->value, prop->value, prop->length);
new_prop->length = prop->length;
/* Convert the property to cpu endian-ness */
diff --git a/kernel/arch/powerpc/platforms/pseries/hvcserver.c b/kernel/arch/powerpc/platforms/pseries/hvcserver.c
index eedb64594..94a6e5612 100644
--- a/kernel/arch/powerpc/platforms/pseries/hvcserver.c
+++ b/kernel/arch/powerpc/platforms/pseries/hvcserver.c
@@ -142,11 +142,11 @@ int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head,
int more = 1;
int retval;
- memset(pi_buff, 0x00, PAGE_SIZE);
/* invalid parameters */
if (!head || !pi_buff)
return -EINVAL;
+ memset(pi_buff, 0x00, PAGE_SIZE);
last_p_partition_ID = last_p_unit_address = ~0UL;
INIT_LIST_HEAD(head);
diff --git a/kernel/arch/powerpc/platforms/pseries/iommu.c b/kernel/arch/powerpc/platforms/pseries/iommu.c
index 61d5a17f4..bd98ce2be 100644
--- a/kernel/arch/powerpc/platforms/pseries/iommu.c
+++ b/kernel/arch/powerpc/platforms/pseries/iommu.c
@@ -36,6 +36,8 @@
#include <linux/crash_dump.h>
#include <linux/memory.h>
#include <linux/of.h>
+#include <linux/iommu.h>
+#include <linux/rculist.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/rtas.h>
@@ -51,6 +53,73 @@
#include "pseries.h"
+static struct iommu_table_group *iommu_pseries_alloc_group(int node)
+{
+ struct iommu_table_group *table_group = NULL;
+ struct iommu_table *tbl = NULL;
+ struct iommu_table_group_link *tgl = NULL;
+
+ table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
+ node);
+ if (!table_group)
+ goto fail_exit;
+
+ tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
+ if (!tbl)
+ goto fail_exit;
+
+ tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
+ node);
+ if (!tgl)
+ goto fail_exit;
+
+ INIT_LIST_HEAD_RCU(&tbl->it_group_list);
+ tgl->table_group = table_group;
+ list_add_rcu(&tgl->next, &tbl->it_group_list);
+
+ table_group->tables[0] = tbl;
+
+ return table_group;
+
+fail_exit:
+ kfree(tgl);
+ kfree(table_group);
+ kfree(tbl);
+
+ return NULL;
+}
+
+static void iommu_pseries_free_group(struct iommu_table_group *table_group,
+ const char *node_name)
+{
+ struct iommu_table *tbl;
+#ifdef CONFIG_IOMMU_API
+ struct iommu_table_group_link *tgl;
+#endif
+
+ if (!table_group)
+ return;
+
+ tbl = table_group->tables[0];
+#ifdef CONFIG_IOMMU_API
+ tgl = list_first_entry_or_null(&tbl->it_group_list,
+ struct iommu_table_group_link, next);
+
+ WARN_ON_ONCE(!tgl);
+ if (tgl) {
+ list_del_rcu(&tgl->next);
+ kfree(tgl);
+ }
+ if (table_group->group) {
+ iommu_group_put(table_group->group);
+ BUG_ON(table_group->group);
+ }
+#endif
+ iommu_free_table(tbl, node_name);
+
+ kfree(table_group);
+}
+
static void tce_invalidate_pSeries_sw(struct iommu_table *tbl,
__be64 *startp, __be64 *endp)
{
@@ -193,7 +262,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
int ret = 0;
unsigned long flags;
- if (npages == 1) {
+ if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) {
return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
direction, attrs);
}
@@ -285,6 +354,9 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
{
u64 rc;
+ if (!firmware_has_feature(FW_FEATURE_MULTITCE))
+ return tce_free_pSeriesLP(tbl, tcenum, npages);
+
rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
if (rc && printk_ratelimit()) {
@@ -460,8 +532,6 @@ static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn,
return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg);
}
-
-#ifdef CONFIG_PCI
static void iommu_table_setparms(struct pci_controller *phb,
struct device_node *dn,
struct iommu_table *tbl)
@@ -546,6 +616,12 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb,
tbl->it_size = size >> tbl->it_page_shift;
}
+struct iommu_table_ops iommu_table_pseries_ops = {
+ .set = tce_build_pSeries,
+ .clear = tce_free_pSeries,
+ .get = tce_get_pseries
+};
+
static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
{
struct device_node *dn;
@@ -610,12 +686,13 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
pci->phb->dma_window_size = 0x8000000ul;
pci->phb->dma_window_base_cur = 0x8000000ul;
- tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
- pci->phb->node);
+ pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
+ tbl = pci->table_group->tables[0];
iommu_table_setparms(pci->phb, dn, tbl);
- pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
- iommu_register_group(tbl, pci_domain_nr(bus), 0);
+ tbl->it_ops = &iommu_table_pseries_ops;
+ iommu_init_table(tbl, pci->phb->node);
+ iommu_register_group(pci->table_group, pci_domain_nr(bus), 0);
/* Divide the rest (1.75GB) among the children */
pci->phb->dma_window_size = 0x80000000ul;
@@ -625,6 +702,11 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size);
}
+struct iommu_table_ops iommu_table_lpar_multi_ops = {
+ .set = tce_buildmulti_pSeriesLP,
+ .clear = tce_freemulti_pSeriesLP,
+ .get = tce_get_pSeriesLP
+};
static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
{
@@ -653,15 +735,17 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
ppci = PCI_DN(pdn);
pr_debug(" parent is %s, iommu_table: 0x%p\n",
- pdn->full_name, ppci->iommu_table);
+ pdn->full_name, ppci->table_group);
- if (!ppci->iommu_table) {
- tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
- ppci->phb->node);
+ if (!ppci->table_group) {
+ ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
+ tbl = ppci->table_group->tables[0];
iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
- ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node);
- iommu_register_group(tbl, pci_domain_nr(bus), 0);
- pr_debug(" created table: %p\n", ppci->iommu_table);
+ tbl->it_ops = &iommu_table_lpar_multi_ops;
+ iommu_init_table(tbl, ppci->phb->node);
+ iommu_register_group(ppci->table_group,
+ pci_domain_nr(bus), 0);
+ pr_debug(" created table: %p\n", ppci->table_group);
}
}
@@ -683,13 +767,15 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
struct pci_controller *phb = PCI_DN(dn)->phb;
pr_debug(" --> first child, no bridge. Allocating iommu table.\n");
- tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
- phb->node);
+ PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node);
+ tbl = PCI_DN(dn)->table_group->tables[0];
iommu_table_setparms(phb, dn, tbl);
- PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node);
- iommu_register_group(tbl, pci_domain_nr(phb->bus), 0);
- set_iommu_table_base_and_group(&dev->dev,
- PCI_DN(dn)->iommu_table);
+ tbl->it_ops = &iommu_table_pseries_ops;
+ iommu_init_table(tbl, phb->node);
+ iommu_register_group(PCI_DN(dn)->table_group,
+ pci_domain_nr(phb->bus), 0);
+ set_iommu_table_base(&dev->dev, tbl);
+ iommu_add_device(&dev->dev);
return;
}
@@ -697,13 +783,14 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
* an already allocated iommu table is found and use that.
*/
- while (dn && PCI_DN(dn) && PCI_DN(dn)->iommu_table == NULL)
+ while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL)
dn = dn->parent;
- if (dn && PCI_DN(dn))
- set_iommu_table_base_and_group(&dev->dev,
- PCI_DN(dn)->iommu_table);
- else
+ if (dn && PCI_DN(dn)) {
+ set_iommu_table_base(&dev->dev,
+ PCI_DN(dn)->table_group->tables[0]);
+ iommu_add_device(&dev->dev);
+ } else
printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
pci_name(dev));
}
@@ -1088,7 +1175,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
dn = pci_device_to_OF_node(dev);
pr_debug(" node is %s\n", dn->full_name);
- for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
+ for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
pdn = pdn->parent) {
dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
if (dma_window)
@@ -1104,18 +1191,21 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
pr_debug(" parent is %s\n", pdn->full_name);
pci = PCI_DN(pdn);
- if (!pci->iommu_table) {
- tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
- pci->phb->node);
+ if (!pci->table_group) {
+ pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
+ tbl = pci->table_group->tables[0];
iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window);
- pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
- iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0);
- pr_debug(" created table: %p\n", pci->iommu_table);
+ tbl->it_ops = &iommu_table_lpar_multi_ops;
+ iommu_init_table(tbl, pci->phb->node);
+ iommu_register_group(pci->table_group,
+ pci_domain_nr(pci->phb->bus), 0);
+ pr_debug(" created table: %p\n", pci->table_group);
} else {
- pr_debug(" found DMA window, table: %p\n", pci->iommu_table);
+ pr_debug(" found DMA window, table: %p\n", pci->table_group);
}
- set_iommu_table_base_and_group(&dev->dev, pci->iommu_table);
+ set_iommu_table_base(&dev->dev, pci->table_group->tables[0]);
+ iommu_add_device(&dev->dev);
}
static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
@@ -1145,7 +1235,7 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
* search upwards in the tree until we either hit a dma-window
* property, OR find a parent with a table already allocated.
*/
- for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
+ for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
pdn = pdn->parent) {
dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
if (dma_window)
@@ -1162,11 +1252,10 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
}
}
- /* fall back on iommu ops, restore table pointer with ops */
+ /* fall back on iommu ops */
if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) {
dev_info(dev, "Restoring 32-bit DMA via iommu\n");
set_dma_ops(dev, &dma_iommu_ops);
- pci_dma_dev_setup_pSeriesLP(pdev);
}
check_mask:
@@ -1189,7 +1278,7 @@ static u64 dma_get_required_mask_pSeriesLP(struct device *dev)
dn = pci_device_to_OF_node(pdev);
/* search upwards for ibm,dma-window */
- for (; dn && PCI_DN(dn) && !PCI_DN(dn)->iommu_table;
+ for (; dn && PCI_DN(dn) && !PCI_DN(dn)->table_group;
dn = dn->parent)
if (of_get_property(dn, "ibm,dma-window", NULL))
break;
@@ -1202,15 +1291,6 @@ static u64 dma_get_required_mask_pSeriesLP(struct device *dev)
return dma_iommu_ops.get_required_mask(dev);
}
-#else /* CONFIG_PCI */
-#define pci_dma_bus_setup_pSeries NULL
-#define pci_dma_dev_setup_pSeries NULL
-#define pci_dma_bus_setup_pSeriesLP NULL
-#define pci_dma_dev_setup_pSeriesLP NULL
-#define dma_set_mask_pSeriesLP NULL
-#define dma_get_required_mask_pSeriesLP NULL
-#endif /* !CONFIG_PCI */
-
static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
void *data)
{
@@ -1269,8 +1349,9 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
* the device node.
*/
remove_ddw(np, false);
- if (pci && pci->iommu_table)
- iommu_free_table(pci->iommu_table, np->full_name);
+ if (pci && pci->table_group)
+ iommu_pseries_free_group(pci->table_group,
+ np->full_name);
spin_lock(&direct_window_list_lock);
list_for_each_entry(window, &direct_window_list, list) {
@@ -1300,22 +1381,11 @@ void iommu_init_early_pSeries(void)
return;
if (firmware_has_feature(FW_FEATURE_LPAR)) {
- if (firmware_has_feature(FW_FEATURE_MULTITCE)) {
- ppc_md.tce_build = tce_buildmulti_pSeriesLP;
- ppc_md.tce_free = tce_freemulti_pSeriesLP;
- } else {
- ppc_md.tce_build = tce_build_pSeriesLP;
- ppc_md.tce_free = tce_free_pSeriesLP;
- }
- ppc_md.tce_get = tce_get_pSeriesLP;
pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
ppc_md.dma_set_mask = dma_set_mask_pSeriesLP;
ppc_md.dma_get_required_mask = dma_get_required_mask_pSeriesLP;
} else {
- ppc_md.tce_build = tce_build_pSeries;
- ppc_md.tce_free = tce_free_pSeries;
- ppc_md.tce_get = tce_get_pseries;
pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries;
pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries;
}
@@ -1333,8 +1403,6 @@ static int __init disable_multitce(char *str)
firmware_has_feature(FW_FEATURE_LPAR) &&
firmware_has_feature(FW_FEATURE_MULTITCE)) {
printk(KERN_INFO "Disabling MULTITCE firmware feature\n");
- ppc_md.tce_build = tce_build_pSeriesLP;
- ppc_md.tce_free = tce_free_pSeriesLP;
powerpc_firmware_features &= ~FW_FEATURE_MULTITCE;
}
return 1;
diff --git a/kernel/arch/powerpc/platforms/pseries/msi.c b/kernel/arch/powerpc/platforms/pseries/msi.c
index c8d24f9a6..272e9ec1a 100644
--- a/kernel/arch/powerpc/platforms/pseries/msi.c
+++ b/kernel/arch/powerpc/platforms/pseries/msi.c
@@ -18,6 +18,8 @@
#include <asm/ppc-pci.h>
#include <asm/machdep.h>
+#include "pseries.h"
+
static int query_token, change_token;
#define RTAS_QUERY_FN 0
@@ -116,7 +118,7 @@ static void rtas_teardown_msi_irqs(struct pci_dev *pdev)
{
struct msi_desc *entry;
- list_for_each_entry(entry, &pdev->msi_list, list) {
+ for_each_pci_msi_entry(entry, pdev) {
if (entry->irq == NO_IRQ)
continue;
@@ -348,7 +350,7 @@ static int check_msix_entries(struct pci_dev *pdev)
* So we must reject such requests. */
expected = 0;
- list_for_each_entry(entry, &pdev->msi_list, list) {
+ for_each_pci_msi_entry(entry, pdev) {
if (entry->msi_attrib.entry_nr != expected) {
pr_debug("rtas_msi: bad MSI-X entries.\n");
return -EINVAL;
@@ -460,7 +462,7 @@ again:
}
i = 0;
- list_for_each_entry(entry, &pdev->msi_list, list) {
+ for_each_pci_msi_entry(entry, pdev) {
hwirq = rtas_query_irq_number(pdn, i++);
if (hwirq < 0) {
pr_debug("rtas_msi: error (%d) getting hwirq\n", rc);
@@ -505,6 +507,8 @@ static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev)
static int rtas_msi_init(void)
{
+ struct pci_controller *phb;
+
query_token = rtas_token("ibm,query-interrupt-source-number");
change_token = rtas_token("ibm,change-msi");
@@ -516,9 +520,15 @@ static int rtas_msi_init(void)
pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n");
- WARN_ON(ppc_md.setup_msi_irqs);
- ppc_md.setup_msi_irqs = rtas_setup_msi_irqs;
- ppc_md.teardown_msi_irqs = rtas_teardown_msi_irqs;
+ WARN_ON(pseries_pci_controller_ops.setup_msi_irqs);
+ pseries_pci_controller_ops.setup_msi_irqs = rtas_setup_msi_irqs;
+ pseries_pci_controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs;
+
+ list_for_each_entry(phb, &hose_list, list_node) {
+ WARN_ON(phb->controller_ops.setup_msi_irqs);
+ phb->controller_ops.setup_msi_irqs = rtas_setup_msi_irqs;
+ phb->controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs;
+ }
WARN_ON(ppc_md.pci_irq_fixup);
ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup;
diff --git a/kernel/arch/powerpc/platforms/pseries/of_helpers.c b/kernel/arch/powerpc/platforms/pseries/of_helpers.c
new file mode 100644
index 000000000..2798933c0
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/pseries/of_helpers.c
@@ -0,0 +1,38 @@
+#include <linux/string.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+
+#include "of_helpers.h"
+
+/**
+ * pseries_of_derive_parent - basically like dirname(1)
+ * @path: the full_name of a node to be added to the tree
+ *
+ * Returns the node which should be the parent of the node
+ * described by path. E.g., for path = "/foo/bar", returns
+ * the node with full_name = "/foo".
+ */
+struct device_node *pseries_of_derive_parent(const char *path)
+{
+ struct device_node *parent;
+ char *parent_path = "/";
+ const char *tail;
+
+ /* We do not want the trailing '/' character */
+ tail = kbasename(path) - 1;
+
+ /* reject if path is "/" */
+ if (!strcmp(path, "/"))
+ return ERR_PTR(-EINVAL);
+
+ if (tail > path) {
+ parent_path = kstrndup(path, tail - path, GFP_KERNEL);
+ if (!parent_path)
+ return ERR_PTR(-ENOMEM);
+ }
+ parent = of_find_node_by_path(parent_path);
+ if (strcmp(parent_path, "/"))
+ kfree(parent_path);
+ return parent ? parent : ERR_PTR(-EINVAL);
+}
diff --git a/kernel/arch/powerpc/platforms/pseries/of_helpers.h b/kernel/arch/powerpc/platforms/pseries/of_helpers.h
new file mode 100644
index 000000000..bb83d39ae
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/pseries/of_helpers.h
@@ -0,0 +1,8 @@
+#ifndef _PSERIES_OF_HELPERS_H
+#define _PSERIES_OF_HELPERS_H
+
+#include <linux/of.h>
+
+struct device_node *pseries_of_derive_parent(const char *path);
+
+#endif /* _PSERIES_OF_HELPERS_H */
diff --git a/kernel/arch/powerpc/platforms/pseries/reconfig.c b/kernel/arch/powerpc/platforms/pseries/reconfig.c
index 0f319521e..7c7fcc042 100644
--- a/kernel/arch/powerpc/platforms/pseries/reconfig.c
+++ b/kernel/arch/powerpc/platforms/pseries/reconfig.c
@@ -22,37 +22,7 @@
#include <asm/uaccess.h>
#include <asm/mmu.h>
-/**
- * derive_parent - basically like dirname(1)
- * @path: the full_name of a node to be added to the tree
- *
- * Returns the node which should be the parent of the node
- * described by path. E.g., for path = "/foo/bar", returns
- * the node with full_name = "/foo".
- */
-static struct device_node *derive_parent(const char *path)
-{
- struct device_node *parent = NULL;
- char *parent_path = "/";
- size_t parent_path_len = strrchr(path, '/') - path + 1;
-
- /* reject if path is "/" */
- if (!strcmp(path, "/"))
- return ERR_PTR(-EINVAL);
-
- if (strrchr(path, '/') != path) {
- parent_path = kmalloc(parent_path_len, GFP_KERNEL);
- if (!parent_path)
- return ERR_PTR(-ENOMEM);
- strlcpy(parent_path, path, parent_path_len);
- }
- parent = of_find_node_by_path(parent_path);
- if (!parent)
- return ERR_PTR(-EINVAL);
- if (strcmp(parent_path, "/"))
- kfree(parent_path);
- return parent;
-}
+#include "of_helpers.h"
static int pSeries_reconfig_add_node(const char *path, struct property *proplist)
{
@@ -71,7 +41,7 @@ static int pSeries_reconfig_add_node(const char *path, struct property *proplist
of_node_set_flag(np, OF_DYNAMIC);
of_node_init(np);
- np->parent = derive_parent(path);
+ np->parent = pseries_of_derive_parent(path);
if (IS_ERR(np->parent)) {
err = PTR_ERR(np->parent);
goto out_err;
diff --git a/kernel/arch/powerpc/platforms/pseries/rng.c b/kernel/arch/powerpc/platforms/pseries/rng.c
index e09608770..31ca557af 100644
--- a/kernel/arch/powerpc/platforms/pseries/rng.c
+++ b/kernel/arch/powerpc/platforms/pseries/rng.c
@@ -38,7 +38,7 @@ static __init int rng_init(void)
pr_info("Registering arch random hook.\n");
- ppc_md.get_random_long = pseries_get_random_long;
+ ppc_md.get_random_seed = pseries_get_random_long;
return 0;
}
diff --git a/kernel/arch/powerpc/platforms/pseries/setup.c b/kernel/arch/powerpc/platforms/pseries/setup.c
index e6e8b241d..36df46eab 100644
--- a/kernel/arch/powerpc/platforms/pseries/setup.c
+++ b/kernel/arch/powerpc/platforms/pseries/setup.c
@@ -40,6 +40,7 @@
#include <linux/seq_file.h>
#include <linux/root_dev.h>
#include <linux/of.h>
+#include <linux/of_pci.h>
#include <linux/kexec.h>
#include <asm/mmu.h>
@@ -111,7 +112,7 @@ static void __init fwnmi_init(void)
fwnmi_active = 1;
}
-static void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc)
+static void pseries_8259_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int cascade_irq = i8259_irq();
@@ -254,24 +255,26 @@ static void __init pseries_discover_pic(void)
static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
{
struct of_reconfig_data *rd = data;
- struct device_node *np = rd->dn;
- struct pci_dn *pci = NULL;
+ struct device_node *parent, *np = rd->dn;
+ struct pci_dn *pdn;
int err = NOTIFY_OK;
switch (action) {
case OF_RECONFIG_ATTACH_NODE:
- pci = np->parent->data;
- if (pci) {
- update_dn_pci_info(np, pci->phb);
-
- /* Create EEH device for the OF node */
- eeh_dev_init(PCI_DN(np), pci->phb);
+ parent = of_get_parent(np);
+ pdn = parent ? PCI_DN(parent) : NULL;
+ if (pdn) {
+ /* Create pdn and EEH device */
+ update_dn_pci_info(np, pdn->phb);
+ eeh_dev_init(PCI_DN(np), pdn->phb);
}
+
+ of_node_put(parent);
break;
case OF_RECONFIG_DETACH_NODE:
- pci = PCI_DN(np);
- if (pci)
- list_del(&pci->list);
+ pdn = PCI_DN(np);
+ if (pdn)
+ list_del(&pdn->list);
break;
default:
err = NOTIFY_DONE;
@@ -493,18 +496,7 @@ static void __init find_and_init_phbs(void)
* PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
* in chosen.
*/
- if (of_chosen) {
- const int *prop;
-
- prop = of_get_property(of_chosen,
- "linux,pci-probe-only", NULL);
- if (prop) {
- if (*prop)
- pci_add_flags(PCI_PROBE_ONLY);
- else
- pci_clear_flags(PCI_PROBE_ONLY);
- }
- }
+ of_pci_check_probe_only();
}
static void __init pSeries_setup_arch(void)
@@ -835,10 +827,6 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus)
return PCI_PROBE_NORMAL;
}
-#ifndef CONFIG_PCI
-void pSeries_final_fixup(void) { }
-#endif
-
struct pci_controller_ops pseries_pci_controller_ops = {
.probe_mode = pSeries_pci_probe_mode,
};