diff options
Diffstat (limited to 'kernel/arch/powerpc/platforms')
92 files changed, 3741 insertions, 1506 deletions
diff --git a/kernel/arch/powerpc/platforms/512x/Kconfig b/kernel/arch/powerpc/platforms/512x/Kconfig index 5aa3f4b53..f09016f6b 100644 --- a/kernel/arch/powerpc/platforms/512x/Kconfig +++ b/kernel/arch/powerpc/platforms/512x/Kconfig @@ -7,8 +7,14 @@ config PPC_MPC512x select PPC_PCI_CHOICE select FSL_PCI if PCI select ARCH_WANT_OPTIONAL_GPIOLIB - select USB_EHCI_BIG_ENDIAN_MMIO - select USB_EHCI_BIG_ENDIAN_DESC + select USB_EHCI_BIG_ENDIAN_MMIO if USB_EHCI_HCD + select USB_EHCI_BIG_ENDIAN_DESC if USB_EHCI_HCD + +config MPC512x_LPBFIFO + tristate "MPC512x LocalPlus Bus FIFO driver" + depends on PPC_MPC512x && MPC512X_DMA + help + Enable support for Freescale MPC512x LocalPlus Bus FIFO (SCLPC). config MPC5121_ADS bool "Freescale MPC5121E ADS" diff --git a/kernel/arch/powerpc/platforms/512x/Makefile b/kernel/arch/powerpc/platforms/512x/Makefile index 01693121a..f47d42295 100644 --- a/kernel/arch/powerpc/platforms/512x/Makefile +++ b/kernel/arch/powerpc/platforms/512x/Makefile @@ -5,4 +5,5 @@ obj-$(CONFIG_COMMON_CLK) += clock-commonclk.o obj-y += mpc512x_shared.o obj-$(CONFIG_MPC5121_ADS) += mpc5121_ads.o mpc5121_ads_cpld.o obj-$(CONFIG_MPC512x_GENERIC) += mpc512x_generic.o +obj-$(CONFIG_MPC512x_LPBFIFO) += mpc512x_lpbfifo.o obj-$(CONFIG_PDM360NG) += pdm360ng.o diff --git a/kernel/arch/powerpc/platforms/512x/clock-commonclk.c b/kernel/arch/powerpc/platforms/512x/clock-commonclk.c index f691bcabd..c50ea76ba 100644 --- a/kernel/arch/powerpc/platforms/512x/clock-commonclk.c +++ b/kernel/arch/powerpc/platforms/512x/clock-commonclk.c @@ -12,6 +12,7 @@ */ #include <linux/bitops.h> +#include <linux/clk.h> #include <linux/clk-provider.h> #include <linux/clkdev.h> #include <linux/device.h> diff --git a/kernel/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/kernel/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c index ca3a062ed..0035d146d 100644 --- a/kernel/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c +++ b/kernel/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c @@ -104,9 +104,10 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp, return irq_linear_revmap(cpld_pic_host, cpld_irq); } -static void -cpld_pic_cascade(unsigned int irq, struct irq_desc *desc) +static void cpld_pic_cascade(struct irq_desc *desc) { + unsigned int irq; + irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status, &cpld_regs->pci_mask); if (irq != NO_IRQ) { @@ -123,7 +124,8 @@ cpld_pic_cascade(unsigned int irq, struct irq_desc *desc) } static int -cpld_pic_host_match(struct irq_domain *h, struct device_node *node) +cpld_pic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { return cpld_pic_node == node; } diff --git a/kernel/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/kernel/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c new file mode 100644 index 000000000..8eb82b043 --- /dev/null +++ b/kernel/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c @@ -0,0 +1,540 @@ +/* + * The driver for Freescale MPC512x LocalPlus Bus FIFO + * (called SCLPC in the Reference Manual). + * + * Copyright (C) 2013-2015 Alexander Popov <alex.popov@linux.com>. + * + * This file is released under the GPLv2. + */ + +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <asm/mpc5121.h> +#include <asm/io.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/dmaengine.h> +#include <linux/dma-direction.h> +#include <linux/dma-mapping.h> + +#define DRV_NAME "mpc512x_lpbfifo" + +struct cs_range { + u32 csnum; + u32 base; /* must be zero */ + u32 addr; + u32 size; +}; + +static struct lpbfifo_data { + spinlock_t lock; /* for protecting lpbfifo_data */ + phys_addr_t regs_phys; + resource_size_t regs_size; + struct mpc512x_lpbfifo __iomem *regs; + int irq; + struct cs_range *cs_ranges; + size_t cs_n; + struct dma_chan *chan; + struct mpc512x_lpbfifo_request *req; + dma_addr_t ram_bus_addr; + bool wait_lpbfifo_irq; + bool wait_lpbfifo_callback; +} lpbfifo; + +/* + * A data transfer from RAM to some device on LPB is finished + * when both mpc512x_lpbfifo_irq() and mpc512x_lpbfifo_callback() + * have been called. We execute the callback registered in + * mpc512x_lpbfifo_request just after that. + * But for a data transfer from some device on LPB to RAM we don't enable + * LPBFIFO interrupt because clearing MPC512X_SCLPC_SUCCESS interrupt flag + * automatically disables LPBFIFO reading request to the DMA controller + * and the data transfer hangs. So the callback registered in + * mpc512x_lpbfifo_request is executed at the end of mpc512x_lpbfifo_callback(). + */ + +/* + * mpc512x_lpbfifo_irq - IRQ handler for LPB FIFO + */ +static irqreturn_t mpc512x_lpbfifo_irq(int irq, void *param) +{ + struct device *dev = (struct device *)param; + struct mpc512x_lpbfifo_request *req = NULL; + unsigned long flags; + u32 status; + + spin_lock_irqsave(&lpbfifo.lock, flags); + + if (!lpbfifo.regs) + goto end; + + req = lpbfifo.req; + if (!req || req->dir == MPC512X_LPBFIFO_REQ_DIR_READ) { + dev_err(dev, "bogus LPBFIFO IRQ\n"); + goto end; + } + + status = in_be32(&lpbfifo.regs->status); + if (status != MPC512X_SCLPC_SUCCESS) { + dev_err(dev, "DMA transfer from RAM to peripheral failed\n"); + out_be32(&lpbfifo.regs->enable, + MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET); + goto end; + } + /* Clear the interrupt flag */ + out_be32(&lpbfifo.regs->status, MPC512X_SCLPC_SUCCESS); + + lpbfifo.wait_lpbfifo_irq = false; + + if (lpbfifo.wait_lpbfifo_callback) + goto end; + + /* Transfer is finished, set the FIFO as idle */ + lpbfifo.req = NULL; + + spin_unlock_irqrestore(&lpbfifo.lock, flags); + + if (req->callback) + req->callback(req); + + return IRQ_HANDLED; + + end: + spin_unlock_irqrestore(&lpbfifo.lock, flags); + return IRQ_HANDLED; +} + +/* + * mpc512x_lpbfifo_callback is called by DMA driver when + * DMA transaction is finished. + */ +static void mpc512x_lpbfifo_callback(void *param) +{ + unsigned long flags; + struct mpc512x_lpbfifo_request *req = NULL; + enum dma_data_direction dir; + + spin_lock_irqsave(&lpbfifo.lock, flags); + + if (!lpbfifo.regs) { + spin_unlock_irqrestore(&lpbfifo.lock, flags); + return; + } + + req = lpbfifo.req; + if (!req) { + pr_err("bogus LPBFIFO callback\n"); + spin_unlock_irqrestore(&lpbfifo.lock, flags); + return; + } + + /* Release the mapping */ + if (req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE) + dir = DMA_TO_DEVICE; + else + dir = DMA_FROM_DEVICE; + dma_unmap_single(lpbfifo.chan->device->dev, + lpbfifo.ram_bus_addr, req->size, dir); + + lpbfifo.wait_lpbfifo_callback = false; + + if (!lpbfifo.wait_lpbfifo_irq) { + /* Transfer is finished, set the FIFO as idle */ + lpbfifo.req = NULL; + + spin_unlock_irqrestore(&lpbfifo.lock, flags); + + if (req->callback) + req->callback(req); + } else { + spin_unlock_irqrestore(&lpbfifo.lock, flags); + } +} + +static int mpc512x_lpbfifo_kick(void) +{ + u32 bits; + bool no_incr = false; + u32 bpt = 32; /* max bytes per LPBFIFO transaction involving DMA */ + u32 cs = 0; + size_t i; + struct dma_device *dma_dev = NULL; + struct scatterlist sg; + enum dma_data_direction dir; + struct dma_slave_config dma_conf = {}; + struct dma_async_tx_descriptor *dma_tx = NULL; + dma_cookie_t cookie; + int ret; + + /* + * 1. Fit the requirements: + * - the packet size must be a multiple of 4 since FIFO Data Word + * Register allows only full-word access according the Reference + * Manual; + * - the physical address of the device on LPB and the packet size + * must be aligned on BPT (bytes per transaction) or 8-bytes + * boundary according the Reference Manual; + * - but we choose DMA maxburst equal (or very close to) BPT to prevent + * DMA controller from overtaking FIFO and causing FIFO underflow + * error. So we force the packet size to be aligned on BPT boundary + * not to confuse DMA driver which requires the packet size to be + * aligned on maxburst boundary; + * - BPT should be set to the LPB device port size for operation with + * disabled auto-incrementing according Reference Manual. + */ + if (lpbfifo.req->size == 0 || !IS_ALIGNED(lpbfifo.req->size, 4)) + return -EINVAL; + + if (lpbfifo.req->portsize != LPB_DEV_PORTSIZE_UNDEFINED) { + bpt = lpbfifo.req->portsize; + no_incr = true; + } + + while (bpt > 1) { + if (IS_ALIGNED(lpbfifo.req->dev_phys_addr, min(bpt, 0x8u)) && + IS_ALIGNED(lpbfifo.req->size, bpt)) { + break; + } + + if (no_incr) + return -EINVAL; + + bpt >>= 1; + } + dma_conf.dst_maxburst = max(bpt, 0x4u) / 4; + dma_conf.src_maxburst = max(bpt, 0x4u) / 4; + + for (i = 0; i < lpbfifo.cs_n; i++) { + phys_addr_t cs_start = lpbfifo.cs_ranges[i].addr; + phys_addr_t cs_end = cs_start + lpbfifo.cs_ranges[i].size; + phys_addr_t access_start = lpbfifo.req->dev_phys_addr; + phys_addr_t access_end = access_start + lpbfifo.req->size; + + if (access_start >= cs_start && access_end <= cs_end) { + cs = lpbfifo.cs_ranges[i].csnum; + break; + } + } + if (i == lpbfifo.cs_n) + return -EFAULT; + + /* 2. Prepare DMA */ + dma_dev = lpbfifo.chan->device; + + if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE) { + dir = DMA_TO_DEVICE; + dma_conf.direction = DMA_MEM_TO_DEV; + dma_conf.dst_addr = lpbfifo.regs_phys + + offsetof(struct mpc512x_lpbfifo, data_word); + } else { + dir = DMA_FROM_DEVICE; + dma_conf.direction = DMA_DEV_TO_MEM; + dma_conf.src_addr = lpbfifo.regs_phys + + offsetof(struct mpc512x_lpbfifo, data_word); + } + dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + dma_conf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + + /* Make DMA channel work with LPB FIFO data register */ + if (dma_dev->device_config(lpbfifo.chan, &dma_conf)) { + ret = -EINVAL; + goto err_dma_prep; + } + + sg_init_table(&sg, 1); + + sg_dma_address(&sg) = dma_map_single(dma_dev->dev, + lpbfifo.req->ram_virt_addr, lpbfifo.req->size, dir); + if (dma_mapping_error(dma_dev->dev, sg_dma_address(&sg))) + return -EFAULT; + + lpbfifo.ram_bus_addr = sg_dma_address(&sg); /* For freeing later */ + + sg_dma_len(&sg) = lpbfifo.req->size; + + dma_tx = dmaengine_prep_slave_sg(lpbfifo.chan, &sg, + 1, dma_conf.direction, 0); + if (!dma_tx) { + ret = -ENOSPC; + goto err_dma_prep; + } + dma_tx->callback = mpc512x_lpbfifo_callback; + dma_tx->callback_param = NULL; + + /* 3. Prepare FIFO */ + out_be32(&lpbfifo.regs->enable, + MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET); + out_be32(&lpbfifo.regs->enable, 0x0); + + /* + * Configure the watermarks for write operation (RAM->DMA->FIFO->dev): + * - high watermark 7 words according the Reference Manual, + * - low watermark 512 bytes (half of the FIFO). + * These watermarks don't work for read operation since the + * MPC512X_SCLPC_FLUSH bit is set (according the Reference Manual). + */ + out_be32(&lpbfifo.regs->fifo_ctrl, MPC512X_SCLPC_FIFO_CTRL(0x7)); + out_be32(&lpbfifo.regs->fifo_alarm, MPC512X_SCLPC_FIFO_ALARM(0x200)); + + /* + * Start address is a physical address of the region which belongs + * to the device on the LocalPlus Bus + */ + out_be32(&lpbfifo.regs->start_addr, lpbfifo.req->dev_phys_addr); + + /* + * Configure chip select, transfer direction, address increment option + * and bytes per transaction option + */ + bits = MPC512X_SCLPC_CS(cs); + if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_READ) + bits |= MPC512X_SCLPC_READ | MPC512X_SCLPC_FLUSH; + if (no_incr) + bits |= MPC512X_SCLPC_DAI; + bits |= MPC512X_SCLPC_BPT(bpt); + out_be32(&lpbfifo.regs->ctrl, bits); + + /* Unmask irqs */ + bits = MPC512X_SCLPC_ENABLE | MPC512X_SCLPC_ABORT_INT_ENABLE; + if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE) + bits |= MPC512X_SCLPC_NORM_INT_ENABLE; + else + lpbfifo.wait_lpbfifo_irq = false; + + out_be32(&lpbfifo.regs->enable, bits); + + /* 4. Set packet size and kick FIFO off */ + bits = lpbfifo.req->size | MPC512X_SCLPC_START; + out_be32(&lpbfifo.regs->pkt_size, bits); + + /* 5. Finally kick DMA off */ + cookie = dma_tx->tx_submit(dma_tx); + if (dma_submit_error(cookie)) { + ret = -ENOSPC; + goto err_dma_submit; + } + + return 0; + + err_dma_submit: + out_be32(&lpbfifo.regs->enable, + MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET); + err_dma_prep: + dma_unmap_single(dma_dev->dev, sg_dma_address(&sg), + lpbfifo.req->size, dir); + return ret; +} + +static int mpc512x_lpbfifo_submit_locked(struct mpc512x_lpbfifo_request *req) +{ + int ret = 0; + + if (!lpbfifo.regs) + return -ENODEV; + + /* Check whether a transfer is in progress */ + if (lpbfifo.req) + return -EBUSY; + + lpbfifo.wait_lpbfifo_irq = true; + lpbfifo.wait_lpbfifo_callback = true; + lpbfifo.req = req; + + ret = mpc512x_lpbfifo_kick(); + if (ret != 0) + lpbfifo.req = NULL; /* Set the FIFO as idle */ + + return ret; +} + +int mpc512x_lpbfifo_submit(struct mpc512x_lpbfifo_request *req) +{ + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&lpbfifo.lock, flags); + ret = mpc512x_lpbfifo_submit_locked(req); + spin_unlock_irqrestore(&lpbfifo.lock, flags); + + return ret; +} +EXPORT_SYMBOL(mpc512x_lpbfifo_submit); + +/* + * LPBFIFO driver uses "ranges" property of "localbus" device tree node + * for being able to determine the chip select number of a client device + * ordering a DMA transfer. + */ +static int get_cs_ranges(struct device *dev) +{ + int ret = -ENODEV; + struct device_node *lb_node; + const u32 *addr_cells_p; + const u32 *size_cells_p; + int proplen; + size_t i; + + lb_node = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-localbus"); + if (!lb_node) + return ret; + + /* + * The node defined as compatible with 'fsl,mpc5121-localbus' + * should have two address cells and one size cell. + * Every item of its ranges property should consist of: + * - the first address cell which is the chipselect number; + * - the second address cell which is the offset in the chipselect, + * must be zero. + * - CPU address of the beginning of an access window; + * - the only size cell which is the size of an access window. + */ + addr_cells_p = of_get_property(lb_node, "#address-cells", NULL); + size_cells_p = of_get_property(lb_node, "#size-cells", NULL); + if (addr_cells_p == NULL || *addr_cells_p != 2 || + size_cells_p == NULL || *size_cells_p != 1) { + goto end; + } + + proplen = of_property_count_u32_elems(lb_node, "ranges"); + if (proplen <= 0 || proplen % 4 != 0) + goto end; + + lpbfifo.cs_n = proplen / 4; + lpbfifo.cs_ranges = devm_kcalloc(dev, lpbfifo.cs_n, + sizeof(struct cs_range), GFP_KERNEL); + if (!lpbfifo.cs_ranges) + goto end; + + if (of_property_read_u32_array(lb_node, "ranges", + (u32 *)lpbfifo.cs_ranges, proplen) != 0) { + goto end; + } + + for (i = 0; i < lpbfifo.cs_n; i++) { + if (lpbfifo.cs_ranges[i].base != 0) + goto end; + } + + ret = 0; + + end: + of_node_put(lb_node); + return ret; +} + +static int mpc512x_lpbfifo_probe(struct platform_device *pdev) +{ + struct resource r; + int ret = 0; + + memset(&lpbfifo, 0, sizeof(struct lpbfifo_data)); + spin_lock_init(&lpbfifo.lock); + + lpbfifo.chan = dma_request_slave_channel(&pdev->dev, "rx-tx"); + if (lpbfifo.chan == NULL) + return -EPROBE_DEFER; + + if (of_address_to_resource(pdev->dev.of_node, 0, &r) != 0) { + dev_err(&pdev->dev, "bad 'reg' in 'sclpc' device tree node\n"); + ret = -ENODEV; + goto err0; + } + + lpbfifo.regs_phys = r.start; + lpbfifo.regs_size = resource_size(&r); + + if (!devm_request_mem_region(&pdev->dev, lpbfifo.regs_phys, + lpbfifo.regs_size, DRV_NAME)) { + dev_err(&pdev->dev, "unable to request region\n"); + ret = -EBUSY; + goto err0; + } + + lpbfifo.regs = devm_ioremap(&pdev->dev, + lpbfifo.regs_phys, lpbfifo.regs_size); + if (!lpbfifo.regs) { + dev_err(&pdev->dev, "mapping registers failed\n"); + ret = -ENOMEM; + goto err0; + } + + out_be32(&lpbfifo.regs->enable, + MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET); + + if (get_cs_ranges(&pdev->dev) != 0) { + dev_err(&pdev->dev, "bad '/localbus' device tree node\n"); + ret = -ENODEV; + goto err0; + } + + lpbfifo.irq = irq_of_parse_and_map(pdev->dev.of_node, 0); + if (lpbfifo.irq == NO_IRQ) { + dev_err(&pdev->dev, "mapping irq failed\n"); + ret = -ENODEV; + goto err0; + } + + if (request_irq(lpbfifo.irq, mpc512x_lpbfifo_irq, 0, + DRV_NAME, &pdev->dev) != 0) { + dev_err(&pdev->dev, "requesting irq failed\n"); + ret = -ENODEV; + goto err1; + } + + dev_info(&pdev->dev, "probe succeeded\n"); + return 0; + + err1: + irq_dispose_mapping(lpbfifo.irq); + err0: + dma_release_channel(lpbfifo.chan); + return ret; +} + +static int mpc512x_lpbfifo_remove(struct platform_device *pdev) +{ + unsigned long flags; + struct dma_device *dma_dev = lpbfifo.chan->device; + struct mpc512x_lpbfifo __iomem *regs = NULL; + + spin_lock_irqsave(&lpbfifo.lock, flags); + regs = lpbfifo.regs; + lpbfifo.regs = NULL; + spin_unlock_irqrestore(&lpbfifo.lock, flags); + + dma_dev->device_terminate_all(lpbfifo.chan); + out_be32(®s->enable, MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET); + + free_irq(lpbfifo.irq, &pdev->dev); + irq_dispose_mapping(lpbfifo.irq); + dma_release_channel(lpbfifo.chan); + + return 0; +} + +static const struct of_device_id mpc512x_lpbfifo_match[] = { + { .compatible = "fsl,mpc512x-lpbfifo", }, + {}, +}; +MODULE_DEVICE_TABLE(of, mpc512x_lpbfifo_match); + +static struct platform_driver mpc512x_lpbfifo_driver = { + .probe = mpc512x_lpbfifo_probe, + .remove = mpc512x_lpbfifo_remove, + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + .of_match_table = mpc512x_lpbfifo_match, + }, +}; + +module_platform_driver(mpc512x_lpbfifo_driver); + +MODULE_AUTHOR("Alexander Popov <alex.popov@linux.com>"); +MODULE_DESCRIPTION("MPC512x LocalPlus Bus FIFO device driver"); +MODULE_LICENSE("GPL v2"); diff --git a/kernel/arch/powerpc/platforms/52xx/media5200.c b/kernel/arch/powerpc/platforms/52xx/media5200.c index 32cae33c4..8fb95480f 100644 --- a/kernel/arch/powerpc/platforms/52xx/media5200.c +++ b/kernel/arch/powerpc/platforms/52xx/media5200.c @@ -80,7 +80,7 @@ static struct irq_chip media5200_irq_chip = { .irq_mask_ack = media5200_irq_mask, }; -void media5200_irq_cascade(unsigned int virq, struct irq_desc *desc) +static void media5200_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); int sub_virq, val; diff --git a/kernel/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/kernel/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index c949ca055..3048e34db 100644 --- a/kernel/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/kernel/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -191,9 +191,9 @@ static struct irq_chip mpc52xx_gpt_irq_chip = { .irq_set_type = mpc52xx_gpt_irq_set_type, }; -void mpc52xx_gpt_irq_cascade(unsigned int virq, struct irq_desc *desc) +static void mpc52xx_gpt_irq_cascade(struct irq_desc *desc) { - struct mpc52xx_gpt_priv *gpt = irq_get_handler_data(virq); + struct mpc52xx_gpt_priv *gpt = irq_desc_get_handler_data(desc); int sub_virq; u32 status; @@ -724,7 +724,7 @@ static int mpc52xx_gpt_probe(struct platform_device *ofdev) { struct mpc52xx_gpt_priv *gpt; - gpt = kzalloc(sizeof *gpt, GFP_KERNEL); + gpt = devm_kzalloc(&ofdev->dev, sizeof *gpt, GFP_KERNEL); if (!gpt) return -ENOMEM; @@ -732,10 +732,8 @@ static int mpc52xx_gpt_probe(struct platform_device *ofdev) gpt->dev = &ofdev->dev; gpt->ipb_freq = mpc5xxx_get_bus_frequency(ofdev->dev.of_node); gpt->regs = of_iomap(ofdev->dev.of_node, 0); - if (!gpt->regs) { - kfree(gpt); + if (!gpt->regs) return -ENOMEM; - } dev_set_drvdata(&ofdev->dev, gpt); diff --git a/kernel/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/kernel/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c index 251dcb90e..7bb42a010 100644 --- a/kernel/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c +++ b/kernel/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c @@ -568,6 +568,7 @@ static const struct of_device_id mpc52xx_lpbfifo_match[] = { { .compatible = "fsl,mpc5200-lpbfifo", }, {}, }; +MODULE_DEVICE_TABLE(of, mpc52xx_lpbfifo_match); static struct platform_driver mpc52xx_lpbfifo_driver = { .driver = { diff --git a/kernel/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/kernel/arch/powerpc/platforms/52xx/mpc52xx_pci.c index e2d401ad8..6eb3b2aba 100644 --- a/kernel/arch/powerpc/platforms/52xx/mpc52xx_pci.c +++ b/kernel/arch/powerpc/platforms/52xx/mpc52xx_pci.c @@ -12,7 +12,7 @@ #undef DEBUG -#include <asm/pci.h> +#include <linux/pci.h> #include <asm/mpc52xx.h> #include <asm/delay.h> #include <asm/machdep.h> diff --git a/kernel/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/kernel/arch/powerpc/platforms/52xx/mpc52xx_pic.c index 2944bc84b..4fe2074c8 100644 --- a/kernel/arch/powerpc/platforms/52xx/mpc52xx_pic.c +++ b/kernel/arch/powerpc/platforms/52xx/mpc52xx_pic.c @@ -196,7 +196,7 @@ static int mpc52xx_extirq_set_type(struct irq_data *d, unsigned int flow_type) ctrl_reg |= (type << (22 - (l2irq * 2))); out_be32(&intr->ctrl, ctrl_reg); - __irq_set_handler_locked(d->irq, handler); + irq_set_handler_locked(d, handler); return 0; } diff --git a/kernel/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/kernel/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c index 74861a7fb..60e89fc9c 100644 --- a/kernel/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c +++ b/kernel/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c @@ -78,7 +78,7 @@ static struct irq_chip pq2ads_pci_ic = { .irq_disable = pq2ads_pci_mask_irq }; -static void pq2ads_pci_irq_demux(unsigned int irq, struct irq_desc *desc) +static void pq2ads_pci_irq_demux(struct irq_desc *desc) { struct pq2ads_pci_pic *priv = irq_desc_get_handler_data(desc); u32 stat, mask, pend; diff --git a/kernel/arch/powerpc/platforms/83xx/suspend.c b/kernel/arch/powerpc/platforms/83xx/suspend.c index c9adbfb65..fcbea4b51 100644 --- a/kernel/arch/powerpc/platforms/83xx/suspend.c +++ b/kernel/arch/powerpc/platforms/83xx/suspend.c @@ -445,5 +445,4 @@ static int pmc_init(void) { return platform_driver_register(&pmc_driver); } - -module_init(pmc_init); +device_initcall(pmc_init); diff --git a/kernel/arch/powerpc/platforms/85xx/Kconfig b/kernel/arch/powerpc/platforms/85xx/Kconfig index 2fb4b2436..97915feff 100644 --- a/kernel/arch/powerpc/platforms/85xx/Kconfig +++ b/kernel/arch/powerpc/platforms/85xx/Kconfig @@ -282,7 +282,7 @@ config CORENET_GENERIC For 64bit kernel, the following boards are supported: T208x QDS/RDB, T4240 QDS/RDB and B4 QDS The following boards are supported for both 32bit and 64bit kernel: - P5020 DS, P5040 DS and T104xQDS/RDB + P5020 DS, P5040 DS, T102x QDS/RDB, T104x QDS/RDB endif # FSL_SOC_BOOKE diff --git a/kernel/arch/powerpc/platforms/85xx/c293pcie.c b/kernel/arch/powerpc/platforms/85xx/c293pcie.c index 84476b646..61bc851e9 100644 --- a/kernel/arch/powerpc/platforms/85xx/c293pcie.c +++ b/kernel/arch/powerpc/platforms/85xx/c293pcie.c @@ -66,10 +66,6 @@ define_machine(c293_pcie) { .probe = c293_pcie_probe, .setup_arch = c293_pcie_setup_arch, .init_IRQ = c293_pcie_pic_init, -#ifdef CONFIG_PCI - .pcibios_fixup_bus = fsl_pcibios_fixup_bus, - .pcibios_fixup_phb = fsl_pcibios_fixup_phb, -#endif .get_irq = mpic_get_irq, .restart = fsl_rstcr_restart, .calibrate_decr = generic_calibrate_decr, diff --git a/kernel/arch/powerpc/platforms/85xx/common.c b/kernel/arch/powerpc/platforms/85xx/common.c index 7bfb9b184..23791de7b 100644 --- a/kernel/arch/powerpc/platforms/85xx/common.c +++ b/kernel/arch/powerpc/platforms/85xx/common.c @@ -49,7 +49,7 @@ int __init mpc85xx_common_publish_devices(void) return of_platform_bus_probe(NULL, mpc85xx_common_ids, NULL); } #ifdef CONFIG_CPM2 -static void cpm2_cascade(unsigned int irq, struct irq_desc *desc) +static void cpm2_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); int cascade_irq; diff --git a/kernel/arch/powerpc/platforms/85xx/corenet_generic.c b/kernel/arch/powerpc/platforms/85xx/corenet_generic.c index 9824d2cf7..46d05c94a 100644 --- a/kernel/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/kernel/arch/powerpc/platforms/85xx/corenet_generic.c @@ -150,12 +150,18 @@ static const char * const boards[] __initconst = { "fsl,B4860QDS", "fsl,B4420QDS", "fsl,B4220QDS", + "fsl,T1023RDB", + "fsl,T1024QDS", + "fsl,T1024RDB", + "fsl,T1040D4RDB", + "fsl,T1042D4RDB", "fsl,T1040QDS", "fsl,T1042QDS", "fsl,T1040RDB", "fsl,T1042RDB", "fsl,T1042RDB_PI", "keymile,kmcoge4", + "varisys,CYRUS", NULL }; @@ -209,7 +215,17 @@ define_machine(corenet_generic) { .pcibios_fixup_bus = fsl_pcibios_fixup_bus, .pcibios_fixup_phb = fsl_pcibios_fixup_phb, #endif +/* + * Core reset may cause issues if using the proxy mode of MPIC. + * So, use the mixed mode of MPIC if enabling CPU hotplug. + * + * Likewise, problems have been seen with kexec when coreint is enabled. + */ +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC) + .get_irq = mpic_get_irq, +#else .get_irq = mpic_get_coreint_irq, +#endif .restart = fsl_rstcr_restart, .calibrate_decr = generic_calibrate_decr, .progress = udbg_progress, diff --git a/kernel/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/kernel/arch/powerpc/platforms/85xx/mpc85xx_cds.c index b0753e222..5ac70de3e 100644 --- a/kernel/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/kernel/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -192,8 +192,7 @@ void mpc85xx_cds_fixup_bus(struct pci_bus *bus) } #ifdef CONFIG_PPC_I8259 -static void mpc85xx_8259_cascade_handler(unsigned int irq, - struct irq_desc *desc) +static void mpc85xx_8259_cascade_handler(struct irq_desc *desc) { unsigned int cascade_irq = i8259_irq(); @@ -202,7 +201,7 @@ static void mpc85xx_8259_cascade_handler(unsigned int irq, generic_handle_irq(cascade_irq); /* check for any interrupts from the shared IRQ line */ - handle_fasteoi_irq(irq, desc); + handle_fasteoi_irq(desc); } static irqreturn_t mpc85xx_8259_cascade_action(int irq, void *dev_id) diff --git a/kernel/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/kernel/arch/powerpc/platforms/85xx/mpc85xx_ds.c index ffdf02121..f858306db 100644 --- a/kernel/arch/powerpc/platforms/85xx/mpc85xx_ds.c +++ b/kernel/arch/powerpc/platforms/85xx/mpc85xx_ds.c @@ -46,7 +46,7 @@ #endif #ifdef CONFIG_PPC_I8259 -static void mpc85xx_8259_cascade(unsigned int irq, struct irq_desc *desc) +static void mpc85xx_8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); diff --git a/kernel/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/kernel/arch/powerpc/platforms/85xx/mpc85xx_mds.c index a392e94a0..f0be439ce 100644 --- a/kernel/arch/powerpc/platforms/85xx/mpc85xx_mds.c +++ b/kernel/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -34,6 +34,7 @@ #include <linux/of_device.h> #include <linux/phy.h> #include <linux/memblock.h> +#include <linux/fsl/guts.h> #include <linux/atomic.h> #include <asm/time.h> @@ -51,7 +52,6 @@ #include <asm/qe_ic.h> #include <asm/mpic.h> #include <asm/swiotlb.h> -#include <asm/fsl_guts.h> #include "smp.h" #include "mpc85xx.h" diff --git a/kernel/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/kernel/arch/powerpc/platforms/85xx/mpc85xx_rdb.c index e358bed66..50dcc00a0 100644 --- a/kernel/arch/powerpc/platforms/85xx/mpc85xx_rdb.c +++ b/kernel/arch/powerpc/platforms/85xx/mpc85xx_rdb.c @@ -17,6 +17,7 @@ #include <linux/seq_file.h> #include <linux/interrupt.h> #include <linux/of_platform.h> +#include <linux/fsl/guts.h> #include <asm/time.h> #include <asm/machdep.h> @@ -27,7 +28,6 @@ #include <asm/mpic.h> #include <asm/qe.h> #include <asm/qe_ic.h> -#include <asm/fsl_guts.h> #include <sysdev/fsl_soc.h> #include <sysdev/fsl_pci.h> diff --git a/kernel/arch/powerpc/platforms/85xx/p1022_ds.c b/kernel/arch/powerpc/platforms/85xx/p1022_ds.c index 6ac986d3f..371df822e 100644 --- a/kernel/arch/powerpc/platforms/85xx/p1022_ds.c +++ b/kernel/arch/powerpc/platforms/85xx/p1022_ds.c @@ -16,6 +16,7 @@ * kind, whether express or implied. */ +#include <linux/fsl/guts.h> #include <linux/pci.h> #include <linux/of_platform.h> #include <asm/div64.h> @@ -25,7 +26,6 @@ #include <sysdev/fsl_soc.h> #include <sysdev/fsl_pci.h> #include <asm/udbg.h> -#include <asm/fsl_guts.h> #include <asm/fsl_lbc.h> #include "smp.h" diff --git a/kernel/arch/powerpc/platforms/85xx/p1022_rdk.c b/kernel/arch/powerpc/platforms/85xx/p1022_rdk.c index 680232d6b..5087becaa 100644 --- a/kernel/arch/powerpc/platforms/85xx/p1022_rdk.c +++ b/kernel/arch/powerpc/platforms/85xx/p1022_rdk.c @@ -12,6 +12,7 @@ * kind, whether express or implied. */ +#include <linux/fsl/guts.h> #include <linux/pci.h> #include <linux/of_platform.h> #include <asm/div64.h> @@ -21,7 +22,6 @@ #include <sysdev/fsl_soc.h> #include <sysdev/fsl_pci.h> #include <asm/udbg.h> -#include <asm/fsl_guts.h> #include "smp.h" #include "mpc85xx.h" diff --git a/kernel/arch/powerpc/platforms/85xx/smp.c b/kernel/arch/powerpc/platforms/85xx/smp.c index 8631ac5f0..6b107cea1 100644 --- a/kernel/arch/powerpc/platforms/85xx/smp.c +++ b/kernel/arch/powerpc/platforms/85xx/smp.c @@ -19,6 +19,7 @@ #include <linux/kexec.h> #include <linux/highmem.h> #include <linux/cpu.h> +#include <linux/fsl/guts.h> #include <asm/machdep.h> #include <asm/pgtable.h> @@ -26,7 +27,6 @@ #include <asm/mpic.h> #include <asm/cacheflush.h> #include <asm/dbell.h> -#include <asm/fsl_guts.h> #include <asm/code-patching.h> #include <asm/cputhreads.h> @@ -173,15 +173,22 @@ static inline u32 read_spin_table_addr_l(void *spin_table) static void wake_hw_thread(void *info) { void fsl_secondary_thread_init(void); - unsigned long imsr1, inia1; + unsigned long imsr, inia; int nr = *(const int *)info; - imsr1 = MSR_KERNEL; - inia1 = *(unsigned long *)fsl_secondary_thread_init; + imsr = MSR_KERNEL; + inia = *(unsigned long *)fsl_secondary_thread_init; - mttmr(TMRN_IMSR1, imsr1); - mttmr(TMRN_INIA1, inia1); - mtspr(SPRN_TENS, TEN_THREAD(1)); + if (cpu_thread_in_core(nr) == 0) { + /* For when we boot on a secondary thread with kdump */ + mttmr(TMRN_IMSR0, imsr); + mttmr(TMRN_INIA0, inia); + mtspr(SPRN_TENS, TEN_THREAD(0)); + } else { + mttmr(TMRN_IMSR1, imsr); + mttmr(TMRN_INIA1, inia); + mtspr(SPRN_TENS, TEN_THREAD(1)); + } smp_generic_kick_cpu(nr); } @@ -224,6 +231,12 @@ static int smp_85xx_kick_cpu(int nr) smp_call_function_single(primary, wake_hw_thread, &nr, 0); return 0; + } else if (cpu_thread_in_core(boot_cpuid) != 0 && + cpu_first_thread_sibling(boot_cpuid) == nr) { + if (WARN_ON_ONCE(!cpu_has_feature(CPU_FTR_SMT))) + return -ENOENT; + + smp_call_function_single(boot_cpuid, wake_hw_thread, &nr, 0); } #endif @@ -331,13 +344,14 @@ struct smp_ops_t smp_85xx_ops = { .cpu_disable = generic_cpu_disable, .cpu_die = generic_cpu_die, #endif -#ifdef CONFIG_KEXEC +#if defined(CONFIG_KEXEC) && !defined(CONFIG_PPC64) .give_timebase = smp_generic_give_timebase, .take_timebase = smp_generic_take_timebase, #endif }; #ifdef CONFIG_KEXEC +#ifdef CONFIG_PPC32 atomic_t kexec_down_cpus = ATOMIC_INIT(0); void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) @@ -345,6 +359,7 @@ void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) local_irq_disable(); if (secondary) { + __flush_disable_L1(); atomic_inc(&kexec_down_cpus); /* loop forever */ while (1); @@ -356,62 +371,67 @@ static void mpc85xx_smp_kexec_down(void *arg) if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(0,1); } - -static void map_and_flush(unsigned long paddr) +#else +void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) { - struct page *page = pfn_to_page(paddr >> PAGE_SHIFT); - unsigned long kaddr = (unsigned long)kmap_atomic(page); + int cpu = smp_processor_id(); + int sibling = cpu_last_thread_sibling(cpu); + bool notified = false; + int disable_cpu; + int disable_threadbit = 0; + long start = mftb(); + long now; - flush_dcache_range(kaddr, kaddr + PAGE_SIZE); - kunmap_atomic((void *)kaddr); -} + local_irq_disable(); + hard_irq_disable(); + mpic_teardown_this_cpu(secondary); -/** - * Before we reset the other cores, we need to flush relevant cache - * out to memory so we don't get anything corrupted, some of these flushes - * are performed out of an overabundance of caution as interrupts are not - * disabled yet and we can switch cores - */ -static void mpc85xx_smp_flush_dcache_kexec(struct kimage *image) -{ - kimage_entry_t *ptr, entry; - unsigned long paddr; - int i; - - if (image->type == KEXEC_TYPE_DEFAULT) { - /* normal kexec images are stored in temporary pages */ - for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); - ptr = (entry & IND_INDIRECTION) ? - phys_to_virt(entry & PAGE_MASK) : ptr + 1) { - if (!(entry & IND_DESTINATION)) { - map_and_flush(entry); + if (cpu == crashing_cpu && cpu_thread_in_core(cpu) != 0) { + /* + * We enter the crash kernel on whatever cpu crashed, + * even if it's a secondary thread. If that's the case, + * disable the corresponding primary thread. + */ + disable_threadbit = 1; + disable_cpu = cpu_first_thread_sibling(cpu); + } else if (sibling != crashing_cpu && + cpu_thread_in_core(cpu) == 0 && + cpu_thread_in_core(sibling) != 0) { + disable_threadbit = 2; + disable_cpu = sibling; + } + + if (disable_threadbit) { + while (paca[disable_cpu].kexec_state < KEXEC_STATE_REAL_MODE) { + barrier(); + now = mftb(); + if (!notified && now - start > 1000000) { + pr_info("%s/%d: waiting for cpu %d to enter KEXEC_STATE_REAL_MODE (%d)\n", + __func__, smp_processor_id(), + disable_cpu, + paca[disable_cpu].kexec_state); + notified = true; } } - /* flush out last IND_DONE page */ - map_and_flush(entry); - } else { - /* crash type kexec images are copied to the crash region */ - for (i = 0; i < image->nr_segments; i++) { - struct kexec_segment *seg = &image->segment[i]; - for (paddr = seg->mem; paddr < seg->mem + seg->memsz; - paddr += PAGE_SIZE) { - map_and_flush(paddr); - } + + if (notified) { + pr_info("%s: cpu %d done waiting\n", + __func__, disable_cpu); } - } - /* also flush the kimage struct to be passed in as well */ - flush_dcache_range((unsigned long)image, - (unsigned long)image + sizeof(*image)); + mtspr(SPRN_TENC, disable_threadbit); + while (mfspr(SPRN_TENSR) & disable_threadbit) + cpu_relax(); + } } +#endif static void mpc85xx_smp_machine_kexec(struct kimage *image) { +#ifdef CONFIG_PPC32 int timeout = INT_MAX; int i, num_cpus = num_present_cpus(); - mpc85xx_smp_flush_dcache_kexec(image); - if (image->type == KEXEC_TYPE_DEFAULT) smp_call_function(mpc85xx_smp_kexec_down, NULL, 0); @@ -429,6 +449,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image) if ( i == smp_processor_id() ) continue; mpic_reset_core(i); } +#endif default_machine_kexec(image); } diff --git a/kernel/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/kernel/arch/powerpc/platforms/85xx/socrates_fpga_pic.c index 55a9682b9..b02d6a5bb 100644 --- a/kernel/arch/powerpc/platforms/85xx/socrates_fpga_pic.c +++ b/kernel/arch/powerpc/platforms/85xx/socrates_fpga_pic.c @@ -91,9 +91,10 @@ static inline unsigned int socrates_fpga_pic_get_irq(unsigned int irq) (irq_hw_number_t)i); } -void socrates_fpga_pic_cascade(unsigned int irq, struct irq_desc *desc) +static void socrates_fpga_pic_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned int irq = irq_desc_get_irq(desc); unsigned int cascade_irq; /* diff --git a/kernel/arch/powerpc/platforms/85xx/twr_p102x.c b/kernel/arch/powerpc/platforms/85xx/twr_p102x.c index 1eadb6d0d..892e61351 100644 --- a/kernel/arch/powerpc/platforms/85xx/twr_p102x.c +++ b/kernel/arch/powerpc/platforms/85xx/twr_p102x.c @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/errno.h> +#include <linux/fsl/guts.h> #include <linux/pci.h> #include <linux/of_platform.h> @@ -23,7 +24,6 @@ #include <asm/mpic.h> #include <asm/qe.h> #include <asm/qe_ic.h> -#include <asm/fsl_guts.h> #include <sysdev/fsl_soc.h> #include <sysdev/fsl_pci.h> @@ -79,7 +79,7 @@ static void __init twr_p1025_setup_arch(void) mpc85xx_qe_init(); mpc85xx_qe_par_io_init(); -#if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE) +#if IS_ENABLED(CONFIG_UCC_GETH) || IS_ENABLED(CONFIG_SERIAL_QE) if (machine_is(twr_p1025)) { struct ccsr_guts __iomem *guts; @@ -101,7 +101,7 @@ static void __init twr_p1025_setup_arch(void) MPC85xx_PMUXCR_QE(12)); iounmap(guts); -#if defined(CONFIG_SERIAL_QE) +#if IS_ENABLED(CONFIG_SERIAL_QE) /* On P1025TWR board, the UCC7 acted as UART port. * However, The UCC7's CTS pin is low level in default, * it will impact the transmission in full duplex diff --git a/kernel/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/kernel/arch/powerpc/platforms/86xx/mpc8610_hpcd.c index 55413a547..437a9c372 100644 --- a/kernel/arch/powerpc/platforms/86xx/mpc8610_hpcd.c +++ b/kernel/arch/powerpc/platforms/86xx/mpc8610_hpcd.c @@ -24,6 +24,7 @@ #include <linux/delay.h> #include <linux/seq_file.h> #include <linux/of.h> +#include <linux/fsl/guts.h> #include <asm/time.h> #include <asm/machdep.h> @@ -38,7 +39,6 @@ #include <sysdev/fsl_pci.h> #include <sysdev/fsl_soc.h> #include <sysdev/simple_gpio.h> -#include <asm/fsl_guts.h> #include "mpc86xx.h" diff --git a/kernel/arch/powerpc/platforms/86xx/pic.c b/kernel/arch/powerpc/platforms/86xx/pic.c index d5b98c0f9..845defa1f 100644 --- a/kernel/arch/powerpc/platforms/86xx/pic.c +++ b/kernel/arch/powerpc/platforms/86xx/pic.c @@ -17,7 +17,7 @@ #include <asm/i8259.h> #ifdef CONFIG_PPC_I8259 -static void mpc86xx_8259_cascade(unsigned int irq, struct irq_desc *desc) +static void mpc86xx_8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); diff --git a/kernel/arch/powerpc/platforms/8xx/m8xx_setup.c b/kernel/arch/powerpc/platforms/8xx/m8xx_setup.c index d30377470..c289fc77b 100644 --- a/kernel/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/kernel/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -214,7 +214,7 @@ void mpc8xx_restart(char *cmd) panic("Restart failed\n"); } -static void cpm_cascade(unsigned int irq, struct irq_desc *desc) +static void cpm_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); int cascade_irq = cpm_get_irq(); diff --git a/kernel/arch/powerpc/platforms/Kconfig.cputype b/kernel/arch/powerpc/platforms/Kconfig.cputype index 7264e9119..142dff5e9 100644 --- a/kernel/arch/powerpc/platforms/Kconfig.cputype +++ b/kernel/arch/powerpc/platforms/Kconfig.cputype @@ -147,17 +147,6 @@ config 6xx depends on PPC32 && PPC_BOOK3S select PPC_HAVE_PMU_SUPPORT -config TUNE_CELL - bool "Optimize for Cell Broadband Engine" - depends on PPC64 && PPC_BOOK3S - help - Cause the compiler to optimize for the PPE of the Cell Broadband - Engine. This will make the code run considerably faster on Cell - but somewhat slower on other machines. This option only changes - the scheduling of instructions, not the selection of instructions - itself, so the resulting kernel will keep running on all other - machines. - # this is temp to handle compat with arch=ppc config 8xx bool @@ -405,6 +394,16 @@ config PPC_DOORBELL endmenu +config VDSO32 + def_bool y + depends on PPC32 || CPU_BIG_ENDIAN + help + This symbol controls whether we build the 32-bit VDSO. We obviously + want to do that if we're building a 32-bit kernel. If we're building + a 64-bit kernel then we only want a 32-bit VDSO if we're building for + big endian. That is because the only little endian configuration we + support is ppc64le which is 64-bit only. + choice prompt "Endianness selection" default CPU_BIG_ENDIAN @@ -421,6 +420,7 @@ config CPU_BIG_ENDIAN config CPU_LITTLE_ENDIAN bool "Build little endian kernel" + depends on PPC_BOOK3S_64 select PPC64_BOOT_WRAPPER help Build a little endian kernel. diff --git a/kernel/arch/powerpc/platforms/cell/Kconfig b/kernel/arch/powerpc/platforms/cell/Kconfig index 2f23133ab..429fc59d2 100644 --- a/kernel/arch/powerpc/platforms/cell/Kconfig +++ b/kernel/arch/powerpc/platforms/cell/Kconfig @@ -25,7 +25,7 @@ config PPC_CELL_NATIVE config PPC_IBM_CELL_BLADE bool "IBM Cell Blade" - depends on PPC64 && PPC_BOOK3S + depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN select PPC_CELL_NATIVE select PPC_OF_PLATFORM_PCI select PCI @@ -35,7 +35,7 @@ config PPC_IBM_CELL_BLADE config PPC_CELL_QPACE bool "IBM Cell - QPACE" - depends on PPC64 && PPC_BOOK3S + depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN select PPC_CELL_COMMON config AXON_MSI @@ -57,21 +57,6 @@ config SPU_FS Units on machines implementing the Broadband Processor Architecture. -config SPU_FS_64K_LS - bool "Use 64K pages to map SPE local store" - # we depend on PPC_MM_SLICES for now rather than selecting - # it because we depend on hugetlbfs hooks being present. We - # will fix that when the generic code has been improved to - # not require hijacking hugetlbfs hooks. - depends on SPU_FS && PPC_MM_SLICES && !PPC_64K_PAGES - default y - select PPC_HAS_HASH_64K - help - This option causes SPE local stores to be mapped in process - address spaces using 64K pages while the rest of the kernel - uses 4K pages. This can improve performances of applications - using multiple SPEs by lowering the TLB pressure on them. - config SPU_BASE bool default n diff --git a/kernel/arch/powerpc/platforms/cell/axon_msi.c b/kernel/arch/powerpc/platforms/cell/axon_msi.c index 623bd9614..aed771449 100644 --- a/kernel/arch/powerpc/platforms/cell/axon_msi.c +++ b/kernel/arch/powerpc/platforms/cell/axon_msi.c @@ -22,6 +22,7 @@ #include <asm/machdep.h> #include <asm/prom.h> +#include "cell.h" /* * MSIC registers, specified as offsets from dcr_base @@ -92,10 +93,10 @@ static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val) dcr_write(msic->dcr_host, dcr_n, val); } -static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc) +static void axon_msi_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); - struct axon_msic *msic = irq_get_handler_data(irq); + struct axon_msic *msic = irq_desc_get_handler_data(desc); u32 write_offset, msi; int idx; int retry = 0; @@ -212,7 +213,7 @@ static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg) return -ENODEV; } - entry = list_first_entry(&dev->msi_list, struct msi_desc, list); + entry = first_pci_msi_entry(dev); for (; dn; dn = of_get_next_parent(dn)) { if (entry->msi_attrib.is_64) { @@ -268,7 +269,7 @@ static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (rc) return rc; - list_for_each_entry(entry, &dev->msi_list, list) { + for_each_pci_msi_entry(entry, dev) { virq = irq_create_direct_mapping(msic->irq_domain); if (virq == NO_IRQ) { dev_warn(&dev->dev, @@ -291,7 +292,7 @@ static void axon_msi_teardown_msi_irqs(struct pci_dev *dev) dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n"); - list_for_each_entry(entry, &dev->msi_list, list) { + for_each_pci_msi_entry(entry, dev) { if (entry->irq == NO_IRQ) continue; @@ -326,7 +327,7 @@ static void axon_msi_shutdown(struct platform_device *device) u32 tmp; pr_devel("axon_msi: disabling %s\n", - msic->irq_domain->of_node->full_name); + irq_domain_get_of_node(msic->irq_domain)->full_name); tmp = dcr_read(msic->dcr_host, MSIC_CTRL_REG); tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE; msic_dcr_write(msic, MSIC_CTRL_REG, tmp); @@ -406,8 +407,8 @@ static int axon_msi_probe(struct platform_device *device) dev_set_drvdata(&device->dev, msic); - ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs; - ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs; + cell_pci_controller_ops.setup_msi_irqs = axon_msi_setup_msi_irqs; + cell_pci_controller_ops.teardown_msi_irqs = axon_msi_teardown_msi_irqs; axon_msi_debug_setup(dn, msic); diff --git a/kernel/arch/powerpc/platforms/cell/interrupt.c b/kernel/arch/powerpc/platforms/cell/interrupt.c index 3af8324c1..9f609fc8d 100644 --- a/kernel/arch/powerpc/platforms/cell/interrupt.c +++ b/kernel/arch/powerpc/platforms/cell/interrupt.c @@ -99,11 +99,12 @@ static void iic_ioexc_eoi(struct irq_data *d) { } -static void iic_ioexc_cascade(unsigned int irq, struct irq_desc *desc) +static void iic_ioexc_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct cbe_iic_regs __iomem *node_iic = (void __iomem *)irq_desc_get_handler_data(desc); + unsigned int irq = irq_desc_get_irq(desc); unsigned int base = (irq & 0xffffff00) | IIC_IRQ_TYPE_IOEXC; unsigned long bits, ack; int cascade; @@ -222,7 +223,8 @@ void iic_request_IPIs(void) #endif /* CONFIG_SMP */ -static int iic_host_match(struct irq_domain *h, struct device_node *node) +static int iic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { return of_device_is_compatible(node, "IBM,CBEA-Internal-Interrupt-Controller"); diff --git a/kernel/arch/powerpc/platforms/cell/iommu.c b/kernel/arch/powerpc/platforms/cell/iommu.c index 21b502398..14a582b21 100644 --- a/kernel/arch/powerpc/platforms/cell/iommu.c +++ b/kernel/arch/powerpc/platforms/cell/iommu.c @@ -466,6 +466,11 @@ static inline u32 cell_iommu_get_ioid(struct device_node *np) return *ioid; } +static struct iommu_table_ops cell_iommu_ops = { + .set = tce_build_cell, + .clear = tce_free_cell +}; + static struct iommu_window * __init cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, unsigned long offset, unsigned long size, @@ -492,6 +497,7 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, window->table.it_offset = (offset >> window->table.it_page_shift) + pte_offset; window->table.it_size = size >> window->table.it_page_shift; + window->table.it_ops = &cell_iommu_ops; iommu_init_table(&window->table, iommu->nid); @@ -1201,8 +1207,6 @@ static int __init cell_iommu_init(void) /* Setup various callbacks */ cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup; ppc_md.dma_get_required_mask = cell_dma_get_required_mask; - ppc_md.tce_build = tce_build_cell; - ppc_md.tce_free = tce_free_cell; if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0) goto bail; diff --git a/kernel/arch/powerpc/platforms/cell/ras.c b/kernel/arch/powerpc/platforms/cell/ras.c index e865d7481..2d4f60c01 100644 --- a/kernel/arch/powerpc/platforms/cell/ras.c +++ b/kernel/arch/powerpc/platforms/cell/ras.c @@ -123,7 +123,7 @@ static int __init cbe_ptcal_enable_on_node(int nid, int order) area->nid = nid; area->order = order; - area->pages = alloc_pages_exact_node(area->nid, + area->pages = __alloc_pages_node(area->nid, GFP_KERNEL|__GFP_THISNODE, area->order); diff --git a/kernel/arch/powerpc/platforms/cell/spider-pic.c b/kernel/arch/powerpc/platforms/cell/spider-pic.c index 1f72f4ab6..54ee5743c 100644 --- a/kernel/arch/powerpc/platforms/cell/spider-pic.c +++ b/kernel/arch/powerpc/platforms/cell/spider-pic.c @@ -199,7 +199,7 @@ static const struct irq_domain_ops spider_host_ops = { .xlate = spider_host_xlate, }; -static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc) +static void spider_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct spider_pic *pic = irq_desc_get_handler_data(desc); @@ -231,20 +231,23 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic) const u32 *imap, *tmp; int imaplen, intsize, unit; struct device_node *iic; + struct device_node *of_node; + + of_node = irq_domain_get_of_node(pic->host); /* First, we check whether we have a real "interrupts" in the device * tree in case the device-tree is ever fixed */ - virq = irq_of_parse_and_map(pic->host->of_node, 0); + virq = irq_of_parse_and_map(of_node, 0); if (virq) return virq; /* Now do the horrible hacks */ - tmp = of_get_property(pic->host->of_node, "#interrupt-cells", NULL); + tmp = of_get_property(of_node, "#interrupt-cells", NULL); if (tmp == NULL) return NO_IRQ; intsize = *tmp; - imap = of_get_property(pic->host->of_node, "interrupt-map", &imaplen); + imap = of_get_property(of_node, "interrupt-map", &imaplen); if (imap == NULL || imaplen < (intsize + 1)) return NO_IRQ; iic = of_find_node_by_phandle(imap[intsize]); diff --git a/kernel/arch/powerpc/platforms/cell/spufs/file.c b/kernel/arch/powerpc/platforms/cell/spufs/file.c index d966bbe58..5038fd578 100644 --- a/kernel/arch/powerpc/platforms/cell/spufs/file.c +++ b/kernel/arch/powerpc/platforms/cell/spufs/file.c @@ -239,23 +239,6 @@ spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) unsigned long address = (unsigned long)vmf->virtual_address; unsigned long pfn, offset; -#ifdef CONFIG_SPU_FS_64K_LS - struct spu_state *csa = &ctx->csa; - int psize; - - /* Check what page size we are using */ - psize = get_slice_psize(vma->vm_mm, address); - - /* Some sanity checking */ - BUG_ON(csa->use_big_pages != (psize == MMU_PAGE_64K)); - - /* Wow, 64K, cool, we need to align the address though */ - if (csa->use_big_pages) { - BUG_ON(vma->vm_start & 0xffff); - address &= ~0xfffful; - } -#endif /* CONFIG_SPU_FS_64K_LS */ - offset = vmf->pgoff << PAGE_SHIFT; if (offset >= LS_SIZE) return VM_FAULT_SIGBUS; @@ -310,22 +293,6 @@ static const struct vm_operations_struct spufs_mem_mmap_vmops = { static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) { -#ifdef CONFIG_SPU_FS_64K_LS - struct spu_context *ctx = file->private_data; - struct spu_state *csa = &ctx->csa; - - /* Sanity check VMA alignment */ - if (csa->use_big_pages) { - pr_debug("spufs_mem_mmap 64K, start=0x%lx, end=0x%lx," - " pgoff=0x%lx\n", vma->vm_start, vma->vm_end, - vma->vm_pgoff); - if (vma->vm_start & 0xffff) - return -EINVAL; - if (vma->vm_pgoff & 0xf) - return -EINVAL; - } -#endif /* CONFIG_SPU_FS_64K_LS */ - if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; @@ -336,25 +303,6 @@ static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -#ifdef CONFIG_SPU_FS_64K_LS -static unsigned long spufs_get_unmapped_area(struct file *file, - unsigned long addr, unsigned long len, unsigned long pgoff, - unsigned long flags) -{ - struct spu_context *ctx = file->private_data; - struct spu_state *csa = &ctx->csa; - - /* If not using big pages, fallback to normal MM g_u_a */ - if (!csa->use_big_pages) - return current->mm->get_unmapped_area(file, addr, len, - pgoff, flags); - - /* Else, try to obtain a 64K pages slice */ - return slice_get_unmapped_area(addr, len, flags, - MMU_PAGE_64K, 1); -} -#endif /* CONFIG_SPU_FS_64K_LS */ - static const struct file_operations spufs_mem_fops = { .open = spufs_mem_open, .release = spufs_mem_release, @@ -362,9 +310,6 @@ static const struct file_operations spufs_mem_fops = { .write = spufs_mem_write, .llseek = generic_file_llseek, .mmap = spufs_mem_mmap, -#ifdef CONFIG_SPU_FS_64K_LS - .get_unmapped_area = spufs_get_unmapped_area, -#endif }; static int spufs_ps_fault(struct vm_area_struct *vma, diff --git a/kernel/arch/powerpc/platforms/cell/spufs/inode.c b/kernel/arch/powerpc/platforms/cell/spufs/inode.c index 1ba6307be..11634fa7a 100644 --- a/kernel/arch/powerpc/platforms/cell/spufs/inode.c +++ b/kernel/arch/powerpc/platforms/cell/spufs/inode.c @@ -166,7 +166,7 @@ static void spufs_prune_dir(struct dentry *dir) mutex_lock(&d_inode(dir)->i_mutex); list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) { spin_lock(&dentry->d_lock); - if (!(d_unhashed(dentry)) && d_really_is_positive(dentry)) { + if (simple_positive(dentry)) { dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); diff --git a/kernel/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/kernel/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c index 147069938..b847e9403 100644 --- a/kernel/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c +++ b/kernel/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c @@ -31,7 +31,7 @@ #include "spufs.h" -static int spu_alloc_lscsa_std(struct spu_state *csa) +int spu_alloc_lscsa(struct spu_state *csa) { struct spu_lscsa *lscsa; unsigned char *p; @@ -48,7 +48,7 @@ static int spu_alloc_lscsa_std(struct spu_state *csa) return 0; } -static void spu_free_lscsa_std(struct spu_state *csa) +void spu_free_lscsa(struct spu_state *csa) { /* Clear reserved bit before vfree. */ unsigned char *p; @@ -61,123 +61,3 @@ static void spu_free_lscsa_std(struct spu_state *csa) vfree(csa->lscsa); } - -#ifdef CONFIG_SPU_FS_64K_LS - -#define SPU_64K_PAGE_SHIFT 16 -#define SPU_64K_PAGE_ORDER (SPU_64K_PAGE_SHIFT - PAGE_SHIFT) -#define SPU_64K_PAGE_COUNT (1ul << SPU_64K_PAGE_ORDER) - -int spu_alloc_lscsa(struct spu_state *csa) -{ - struct page **pgarray; - unsigned char *p; - int i, j, n_4k; - - /* Check availability of 64K pages */ - if (!spu_64k_pages_available()) - goto fail; - - csa->use_big_pages = 1; - - pr_debug("spu_alloc_lscsa(csa=0x%p), trying to allocate 64K pages\n", - csa); - - /* First try to allocate our 64K pages. We need 5 of them - * with the current implementation. In the future, we should try - * to separate the lscsa with the actual local store image, thus - * allowing us to require only 4 64K pages per context - */ - for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) { - /* XXX This is likely to fail, we should use a special pool - * similar to what hugetlbfs does. - */ - csa->lscsa_pages[i] = alloc_pages(GFP_KERNEL, - SPU_64K_PAGE_ORDER); - if (csa->lscsa_pages[i] == NULL) - goto fail; - } - - pr_debug(" success ! creating vmap...\n"); - - /* Now we need to create a vmalloc mapping of these for the kernel - * and SPU context switch code to use. Currently, we stick to a - * normal kernel vmalloc mapping, which in our case will be 4K - */ - n_4k = SPU_64K_PAGE_COUNT * SPU_LSCSA_NUM_BIG_PAGES; - pgarray = kmalloc(sizeof(struct page *) * n_4k, GFP_KERNEL); - if (pgarray == NULL) - goto fail; - for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) - for (j = 0; j < SPU_64K_PAGE_COUNT; j++) - /* We assume all the struct page's are contiguous - * which should be hopefully the case for an order 4 - * allocation.. - */ - pgarray[i * SPU_64K_PAGE_COUNT + j] = - csa->lscsa_pages[i] + j; - csa->lscsa = vmap(pgarray, n_4k, VM_USERMAP, PAGE_KERNEL); - kfree(pgarray); - if (csa->lscsa == NULL) - goto fail; - - memset(csa->lscsa, 0, sizeof(struct spu_lscsa)); - - /* Set LS pages reserved to allow for user-space mapping. - * - * XXX isn't that a bit obsolete ? I think we should just - * make sure the page count is high enough. Anyway, won't harm - * for now - */ - for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE) - SetPageReserved(vmalloc_to_page(p)); - - pr_debug(" all good !\n"); - - return 0; -fail: - pr_debug("spufs: failed to allocate lscsa 64K pages, falling back\n"); - spu_free_lscsa(csa); - return spu_alloc_lscsa_std(csa); -} - -void spu_free_lscsa(struct spu_state *csa) -{ - unsigned char *p; - int i; - - if (!csa->use_big_pages) { - spu_free_lscsa_std(csa); - return; - } - csa->use_big_pages = 0; - - if (csa->lscsa == NULL) - goto free_pages; - - for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE) - ClearPageReserved(vmalloc_to_page(p)); - - vunmap(csa->lscsa); - csa->lscsa = NULL; - - free_pages: - - for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) - if (csa->lscsa_pages[i]) - __free_pages(csa->lscsa_pages[i], SPU_64K_PAGE_ORDER); -} - -#else /* CONFIG_SPU_FS_64K_LS */ - -int spu_alloc_lscsa(struct spu_state *csa) -{ - return spu_alloc_lscsa_std(csa); -} - -void spu_free_lscsa(struct spu_state *csa) -{ - spu_free_lscsa_std(csa); -} - -#endif /* !defined(CONFIG_SPU_FS_64K_LS) */ diff --git a/kernel/arch/powerpc/platforms/chrp/setup.c b/kernel/arch/powerpc/platforms/chrp/setup.c index 15ebc4e8a..987d1b8d6 100644 --- a/kernel/arch/powerpc/platforms/chrp/setup.c +++ b/kernel/arch/powerpc/platforms/chrp/setup.c @@ -363,7 +363,7 @@ void __init chrp_setup_arch(void) if (ppc_md.progress) ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0x0); } -static void chrp_8259_cascade(unsigned int irq, struct irq_desc *desc) +static void chrp_8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); diff --git a/kernel/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/kernel/arch/powerpc/platforms/embedded6xx/flipper-pic.c index 4cde8e7da..b7866e014 100644 --- a/kernel/arch/powerpc/platforms/embedded6xx/flipper-pic.c +++ b/kernel/arch/powerpc/platforms/embedded6xx/flipper-pic.c @@ -108,7 +108,8 @@ static int flipper_pic_map(struct irq_domain *h, unsigned int virq, return 0; } -static int flipper_pic_match(struct irq_domain *h, struct device_node *np) +static int flipper_pic_match(struct irq_domain *h, struct device_node *np, + enum irq_domain_bus_token bus_token) { return 1; } diff --git a/kernel/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/kernel/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index c269caee5..9b7975706 100644 --- a/kernel/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/kernel/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -120,11 +120,10 @@ static unsigned int __hlwd_pic_get_irq(struct irq_domain *h) return irq_linear_revmap(h, irq); } -static void hlwd_pic_irq_cascade(unsigned int cascade_virq, - struct irq_desc *desc) +static void hlwd_pic_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); - struct irq_domain *irq_domain = irq_get_handler_data(cascade_virq); + struct irq_domain *irq_domain = irq_desc_get_handler_data(desc); unsigned int virq; raw_spin_lock(&desc->lock); diff --git a/kernel/arch/powerpc/platforms/embedded6xx/mvme5100.c b/kernel/arch/powerpc/platforms/embedded6xx/mvme5100.c index 161330317..8f65aa374 100644 --- a/kernel/arch/powerpc/platforms/embedded6xx/mvme5100.c +++ b/kernel/arch/powerpc/platforms/embedded6xx/mvme5100.c @@ -42,7 +42,7 @@ static phys_addr_t pci_membase; static u_char *restart; -static void mvme5100_8259_cascade(unsigned int irq, struct irq_desc *desc) +static void mvme5100_8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); diff --git a/kernel/arch/powerpc/platforms/maple/Kconfig b/kernel/arch/powerpc/platforms/maple/Kconfig index 1ea621a94..e359d0db0 100644 --- a/kernel/arch/powerpc/platforms/maple/Kconfig +++ b/kernel/arch/powerpc/platforms/maple/Kconfig @@ -1,5 +1,5 @@ config PPC_MAPLE - depends on PPC64 && PPC_BOOK3S + depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN bool "Maple 970FX Evaluation Board" select PCI select MPIC diff --git a/kernel/arch/powerpc/platforms/pasemi/Kconfig b/kernel/arch/powerpc/platforms/pasemi/Kconfig index a2aeb327d..00d4b28cb 100644 --- a/kernel/arch/powerpc/platforms/pasemi/Kconfig +++ b/kernel/arch/powerpc/platforms/pasemi/Kconfig @@ -1,5 +1,5 @@ config PPC_PASEMI - depends on PPC64 && PPC_BOOK3S + depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN bool "PA Semi SoC-based platforms" default n select MPIC diff --git a/kernel/arch/powerpc/platforms/pasemi/Makefile b/kernel/arch/powerpc/platforms/pasemi/Makefile index 8e8d4cae5..60b4e0fd9 100644 --- a/kernel/arch/powerpc/platforms/pasemi/Makefile +++ b/kernel/arch/powerpc/platforms/pasemi/Makefile @@ -1,2 +1,3 @@ obj-y += setup.o pci.o time.o idle.o powersave.o iommu.o dma_lib.o misc.o obj-$(CONFIG_PPC_PASEMI_MDIO) += gpio_mdio.o +obj-$(CONFIG_PCI_MSI) += msi.o diff --git a/kernel/arch/powerpc/platforms/pasemi/iommu.c b/kernel/arch/powerpc/platforms/pasemi/iommu.c index b8f567b2e..c929644e7 100644 --- a/kernel/arch/powerpc/platforms/pasemi/iommu.c +++ b/kernel/arch/powerpc/platforms/pasemi/iommu.c @@ -134,6 +134,10 @@ static void iobmap_free(struct iommu_table *tbl, long index, } } +static struct iommu_table_ops iommu_table_iobmap_ops = { + .set = iobmap_build, + .clear = iobmap_free +}; static void iommu_table_iobmap_setup(void) { @@ -153,6 +157,7 @@ static void iommu_table_iobmap_setup(void) * Should probably be 8 (64 bytes) */ iommu_table_iobmap.it_blocksize = 4; + iommu_table_iobmap.it_ops = &iommu_table_iobmap_ops; iommu_init_table(&iommu_table_iobmap, 0); pr_debug(" <- %s\n", __func__); } @@ -252,8 +257,6 @@ void __init iommu_init_early_pasemi(void) pasemi_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pasemi; pasemi_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pasemi; - ppc_md.tce_build = iobmap_build; - ppc_md.tce_free = iobmap_free; set_pci_dma_ops(&dma_iommu_ops); } diff --git a/kernel/arch/powerpc/platforms/pasemi/msi.c b/kernel/arch/powerpc/platforms/pasemi/msi.c new file mode 100644 index 000000000..d9af76342 --- /dev/null +++ b/kernel/arch/powerpc/platforms/pasemi/msi.c @@ -0,0 +1,171 @@ +/* + * Copyright 2007, Olof Johansson, PA Semi + * + * Based on arch/powerpc/sysdev/mpic_u3msi.c: + * + * Copyright 2006, Segher Boessenkool, IBM Corporation. + * Copyright 2006-2007, Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 of the + * License. + * + */ + +#include <linux/irq.h> +#include <linux/msi.h> +#include <asm/mpic.h> +#include <asm/prom.h> +#include <asm/hw_irq.h> +#include <asm/ppc-pci.h> +#include <asm/msi_bitmap.h> + +#include <sysdev/mpic.h> + +/* Allocate 16 interrupts per device, to give an alignment of 16, + * since that's the size of the grouping w.r.t. affinity. If someone + * needs more than 32 MSI's down the road we'll have to rethink this, + * but it should be OK for now. + */ +#define ALLOC_CHUNK 16 + +#define PASEMI_MSI_ADDR 0xfc080000 + +/* A bit ugly, can we get this from the pci_dev somehow? */ +static struct mpic *msi_mpic; + + +static void mpic_pasemi_msi_mask_irq(struct irq_data *data) +{ + pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq); + pci_msi_mask_irq(data); + mpic_mask_irq(data); +} + +static void mpic_pasemi_msi_unmask_irq(struct irq_data *data) +{ + pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq); + mpic_unmask_irq(data); + pci_msi_unmask_irq(data); +} + +static struct irq_chip mpic_pasemi_msi_chip = { + .irq_shutdown = mpic_pasemi_msi_mask_irq, + .irq_mask = mpic_pasemi_msi_mask_irq, + .irq_unmask = mpic_pasemi_msi_unmask_irq, + .irq_eoi = mpic_end_irq, + .irq_set_type = mpic_set_irq_type, + .irq_set_affinity = mpic_set_affinity, + .name = "PASEMI-MSI", +}; + +static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) +{ + struct msi_desc *entry; + irq_hw_number_t hwirq; + + pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev); + + for_each_pci_msi_entry(entry, pdev) { + if (entry->irq == NO_IRQ) + continue; + + hwirq = virq_to_hw(entry->irq); + irq_set_msi_desc(entry->irq, NULL); + irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, ALLOC_CHUNK); + } + + return; +} + +static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + unsigned int virq; + struct msi_desc *entry; + struct msi_msg msg; + int hwirq; + + if (type == PCI_CAP_ID_MSIX) + pr_debug("pasemi_msi: MSI-X untested, trying anyway\n"); + pr_debug("pasemi_msi_setup_msi_irqs, pdev %p nvec %d type %d\n", + pdev, nvec, type); + + msg.address_hi = 0; + msg.address_lo = PASEMI_MSI_ADDR; + + for_each_pci_msi_entry(entry, pdev) { + /* Allocate 16 interrupts for now, since that's the grouping for + * affinity. This can be changed later if it turns out 32 is too + * few MSIs for someone, but restrictions will apply to how the + * sources can be changed independently. + */ + hwirq = msi_bitmap_alloc_hwirqs(&msi_mpic->msi_bitmap, + ALLOC_CHUNK); + if (hwirq < 0) { + pr_debug("pasemi_msi: failed allocating hwirq\n"); + return hwirq; + } + + virq = irq_create_mapping(msi_mpic->irqhost, hwirq); + if (virq == NO_IRQ) { + pr_debug("pasemi_msi: failed mapping hwirq 0x%x\n", + hwirq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, + ALLOC_CHUNK); + return -ENOSPC; + } + + /* Vector on MSI is really an offset, the hardware adds + * it to the value written at the magic address. So set + * it to 0 to remain sane. + */ + mpic_set_vector(virq, 0); + + irq_set_msi_desc(virq, entry); + irq_set_chip(virq, &mpic_pasemi_msi_chip); + irq_set_irq_type(virq, IRQ_TYPE_EDGE_RISING); + + pr_debug("pasemi_msi: allocated virq 0x%x (hw 0x%x) " \ + "addr 0x%x\n", virq, hwirq, msg.address_lo); + + /* Likewise, the device writes [0...511] into the target + * register to generate MSI [512...1023] + */ + msg.data = hwirq-0x200; + pci_write_msi_msg(virq, &msg); + } + + return 0; +} + +int mpic_pasemi_msi_init(struct mpic *mpic) +{ + int rc; + struct pci_controller *phb; + struct device_node *of_node; + + of_node = irq_domain_get_of_node(mpic->irqhost); + if (!of_node || + !of_device_is_compatible(of_node, + "pasemi,pwrficient-openpic")) + return -ENODEV; + + rc = mpic_msi_init_allocator(mpic); + if (rc) { + pr_debug("pasemi_msi: Error allocating bitmap!\n"); + return rc; + } + + pr_debug("pasemi_msi: Registering PA Semi MPIC MSI callbacks\n"); + + msi_mpic = mpic; + list_for_each_entry(phb, &hose_list, list_node) { + WARN_ON(phb->controller_ops.setup_msi_irqs); + phb->controller_ops.setup_msi_irqs = pasemi_msi_setup_msi_irqs; + phb->controller_ops.teardown_msi_irqs = pasemi_msi_teardown_msi_irqs; + } + + return 0; +} diff --git a/kernel/arch/powerpc/platforms/powermac/Kconfig b/kernel/arch/powerpc/platforms/powermac/Kconfig index 607124bae..43c606268 100644 --- a/kernel/arch/powerpc/platforms/powermac/Kconfig +++ b/kernel/arch/powerpc/platforms/powermac/Kconfig @@ -1,6 +1,6 @@ config PPC_PMAC bool "Apple PowerMac based machines" - depends on PPC_BOOK3S + depends on PPC_BOOK3S && CPU_BIG_ENDIAN select MPIC select PCI select PPC_INDIRECT_PCI if PPC32 diff --git a/kernel/arch/powerpc/platforms/powermac/pic.c b/kernel/arch/powerpc/platforms/powermac/pic.c index 59cfc9d63..6f4f8b060 100644 --- a/kernel/arch/powerpc/platforms/powermac/pic.c +++ b/kernel/arch/powerpc/platforms/powermac/pic.c @@ -268,7 +268,8 @@ static struct irqaction gatwick_cascade_action = { .name = "cascade", }; -static int pmac_pic_host_match(struct irq_domain *h, struct device_node *node) +static int pmac_pic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { /* We match all, we don't always have a node anyway */ return 1; diff --git a/kernel/arch/powerpc/platforms/powernv/Kconfig b/kernel/arch/powerpc/platforms/powernv/Kconfig index 4b044d8cb..604190cab 100644 --- a/kernel/arch/powerpc/platforms/powernv/Kconfig +++ b/kernel/arch/powerpc/platforms/powernv/Kconfig @@ -19,3 +19,10 @@ config PPC_POWERNV select CPU_FREQ_GOV_CONSERVATIVE select PPC_DOORBELL default y + +config OPAL_PRD + tristate 'OPAL PRD driver' + depends on PPC_POWERNV + help + This enables the opal-prd driver, a facility to run processor + recovery diagnostics on OpenPower machines diff --git a/kernel/arch/powerpc/platforms/powernv/Makefile b/kernel/arch/powerpc/platforms/powernv/Makefile index 33e44f372..b9de7ef48 100644 --- a/kernel/arch/powerpc/platforms/powernv/Makefile +++ b/kernel/arch/powerpc/platforms/powernv/Makefile @@ -1,7 +1,8 @@ -obj-y += setup.o opal-wrappers.o opal.o opal-async.o +obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o -obj-y += opal-msglog.o opal-hmi.o opal-power.o +obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o +obj-y += opal-kmsg.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o @@ -9,3 +10,4 @@ obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o +obj-$(CONFIG_OPAL_PRD) += opal-prd.o diff --git a/kernel/arch/powerpc/platforms/powernv/eeh-powernv.c b/kernel/arch/powerpc/platforms/powernv/eeh-powernv.c index ce738ab3d..2ba602591 100644 --- a/kernel/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/kernel/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -16,6 +16,7 @@ #include <linux/delay.h> #include <linux/export.h> #include <linux/init.h> +#include <linux/interrupt.h> #include <linux/list.h> #include <linux/msi.h> #include <linux/of.h> @@ -40,18 +41,13 @@ #include "pci.h" static bool pnv_eeh_nb_init = false; +static int eeh_event_irq = -EINVAL; -/** - * pnv_eeh_init - EEH platform dependent initialization - * - * EEH platform dependent initialization on powernv - */ static int pnv_eeh_init(void) { struct pci_controller *hose; struct pnv_phb *phb; - /* We require OPALv3 */ if (!firmware_has_feature(FW_FEATURE_OPALv3)) { pr_warn("%s: OPALv3 is required !\n", __func__); @@ -75,9 +71,9 @@ static int pnv_eeh_init(void) /* * PE#0 should be regarded as valid by EEH core * if it's not the reserved one. Currently, we - * have the reserved PE#0 and PE#127 for PHB3 + * have the reserved PE#255 and PE#127 for PHB3 * and P7IOC separately. So we should regard - * PE#0 as valid for P7IOC. + * PE#0 as valid for PHB3 and P7IOC. */ if (phb->ioda.reserved_pe != 0) eeh_add_flag(EEH_VALID_PE_ZERO); @@ -88,34 +84,22 @@ static int pnv_eeh_init(void) return 0; } -static int pnv_eeh_event(struct notifier_block *nb, - unsigned long events, void *change) +static irqreturn_t pnv_eeh_event(int irq, void *data) { - uint64_t changed_evts = (uint64_t)change; - /* - * We simply send special EEH event if EEH has - * been enabled, or clear pending events in - * case that we enable EEH soon + * We simply send a special EEH event if EEH has been + * enabled. We don't care about EEH events until we've + * finished processing the outstanding ones. Event processing + * gets unmasked in next_error() if EEH is enabled. */ - if (!(changed_evts & OPAL_EVENT_PCI_ERROR) || - !(events & OPAL_EVENT_PCI_ERROR)) - return 0; + disable_irq_nosync(irq); if (eeh_enabled()) eeh_send_failure_event(NULL); - else - opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); - return 0; + return IRQ_HANDLED; } -static struct notifier_block pnv_eeh_nb = { - .notifier_call = pnv_eeh_event, - .next = NULL, - .priority = 0 -}; - #ifdef CONFIG_DEBUG_FS static ssize_t pnv_eeh_ei_write(struct file *filp, const char __user *user_buf, @@ -237,16 +221,28 @@ static int pnv_eeh_post_init(void) /* Register OPAL event notifier */ if (!pnv_eeh_nb_init) { - ret = opal_notifier_register(&pnv_eeh_nb); - if (ret) { - pr_warn("%s: Can't register OPAL event notifier (%d)\n", - __func__, ret); + eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR)); + if (eeh_event_irq < 0) { + pr_err("%s: Can't register OPAL event interrupt (%d)\n", + __func__, eeh_event_irq); + return eeh_event_irq; + } + + ret = request_irq(eeh_event_irq, pnv_eeh_event, + IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL); + if (ret < 0) { + irq_dispose_mapping(eeh_event_irq); + pr_err("%s: Can't request OPAL event interrupt (%d)\n", + __func__, eeh_event_irq); return ret; } pnv_eeh_nb_init = true; } + if (!eeh_enabled()) + disable_irq(eeh_event_irq); + list_for_each_entry(hose, &hose_list, list_node) { phb = hose->private_data; @@ -282,33 +278,23 @@ static int pnv_eeh_post_init(void) #endif /* CONFIG_DEBUG_FS */ } - return ret; } -static int pnv_eeh_cap_start(struct pci_dn *pdn) +static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap) { - u32 status; + int pos = PCI_CAPABILITY_LIST; + int cnt = 48; /* Maximal number of capabilities */ + u32 status, id; if (!pdn) return 0; + /* Check if the device supports capabilities */ pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status); if (!(status & PCI_STATUS_CAP_LIST)) return 0; - return PCI_CAPABILITY_LIST; -} - -static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap) -{ - int pos = pnv_eeh_cap_start(pdn); - int cnt = 48; /* Maximal number of capabilities */ - u32 id; - - if (!pos) - return 0; - while (cnt--) { pnv_pci_cfg_read(pdn, pos, 1, &pos); if (pos < 0x40) @@ -441,11 +427,14 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) * that PE to block its config space. * * Broadcom Austin 4-ports NICs (14e4:1657) + * Broadcom Shiner 4-ports 1G NICs (14e4:168a) * Broadcom Shiner 2-ports 10G NICs (14e4:168e) */ if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && pdn->device_id == 0x1657) || (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && + pdn->device_id == 0x168a) || + (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && pdn->device_id == 0x168e)) edev->pe->state |= EEH_PE_CFG_RESTRICTED; @@ -455,9 +444,12 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) * PCI devices of the PE are expected to be removed prior * to PE reset. */ - if (!edev->pe->bus) + if (!(edev->pe->state & EEH_PE_PRI_BUS)) { edev->pe->bus = pci_find_bus(hose->global_number, pdn->busno); + if (edev->pe->bus) + edev->pe->state |= EEH_PE_PRI_BUS; + } /* * Enable EEH explicitly so that we will do EEH check @@ -485,10 +477,9 @@ static int pnv_eeh_set_option(struct eeh_pe *pe, int option) struct pci_controller *hose = pe->phb; struct pnv_phb *phb = hose->private_data; bool freeze_pe = false; - int opt, ret = 0; + int opt; s64 rc; - /* Sanity check on option */ switch (option) { case EEH_OPT_DISABLE: return -EPERM; @@ -509,38 +500,37 @@ static int pnv_eeh_set_option(struct eeh_pe *pe, int option) return -EINVAL; } - /* If PHB supports compound PE, to handle it */ + /* Freeze master and slave PEs if PHB supports compound PEs */ if (freeze_pe) { if (phb->freeze_pe) { phb->freeze_pe(phb, pe->addr); - } else { - rc = opal_pci_eeh_freeze_set(phb->opal_id, - pe->addr, opt); - if (rc != OPAL_SUCCESS) { - pr_warn("%s: Failure %lld freezing " - "PHB#%x-PE#%x\n", - __func__, rc, - phb->hose->global_number, pe->addr); - ret = -EIO; - } + return 0; } - } else { - if (phb->unfreeze_pe) { - ret = phb->unfreeze_pe(phb, pe->addr, opt); - } else { - rc = opal_pci_eeh_freeze_clear(phb->opal_id, - pe->addr, opt); - if (rc != OPAL_SUCCESS) { - pr_warn("%s: Failure %lld enable %d " - "for PHB#%x-PE#%x\n", - __func__, rc, option, - phb->hose->global_number, pe->addr); - ret = -EIO; - } + + rc = opal_pci_eeh_freeze_set(phb->opal_id, pe->addr, opt); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, + pe->addr); + return -EIO; } + + return 0; } - return ret; + /* Unfreeze master and slave PEs if PHB supports */ + if (phb->unfreeze_pe) + return phb->unfreeze_pe(phb, pe->addr, opt); + + rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe->addr, opt); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n", + __func__, rc, option, phb->hose->global_number, + pe->addr); + return -EIO; + } + + return 0; } /** @@ -979,7 +969,7 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) /** * pnv_eeh_wait_state - Wait for PE state * @pe: EEH PE - * @max_wait: maximal period in microsecond + * @max_wait: maximal period in millisecond * * Wait for the state of associated PE. It might take some time * to retrieve the PE's state. @@ -1000,13 +990,13 @@ static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait) if (ret != EEH_STATE_UNAVAILABLE) return ret; - max_wait -= mwait; if (max_wait <= 0) { pr_warn("%s: Timeout getting PE#%x's state (%d)\n", __func__, pe->addr, max_wait); return EEH_STATE_NOT_SUPPORT; } + max_wait -= mwait; msleep(mwait); } @@ -1063,7 +1053,6 @@ static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func, struct pnv_phb *phb = hose->private_data; s64 rc; - /* Sanity check on error type */ if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR && type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) { pr_warn("%s: Invalid error type %d\n", @@ -1303,12 +1292,10 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) int state, ret = EEH_NEXT_ERR_NONE; /* - * While running here, it's safe to purge the event queue. - * And we should keep the cached OPAL notifier event sychronized - * between the kernel and firmware. + * While running here, it's safe to purge the event queue. The + * event should still be masked. */ eeh_remove_event(NULL, false); - opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); list_for_each_entry(hose, &hose_list, list_node) { /* @@ -1394,11 +1381,19 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) */ if (pnv_eeh_get_pe(hose, be64_to_cpu(frozen_pe_no), pe)) { - /* Try best to clear it */ pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n", - hose->global_number, frozen_pe_no); + hose->global_number, be64_to_cpu(frozen_pe_no)); pr_info("EEH: PHB location: %s\n", eeh_pe_loc_get(phb_pe)); + + /* Dump PHB diag-data */ + rc = opal_pci_get_phb_diag_data2(phb->opal_id, + phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); + if (rc == OPAL_SUCCESS) + pnv_pci_dump_phb_diag_data(hose, + phb->diag.blob); + + /* Try best to clear it */ opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); @@ -1477,6 +1472,10 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) break; } + /* Unmask the event */ + if (ret == EEH_NEXT_ERR_NONE && eeh_enabled()) + enable_irq(eeh_event_irq); + return ret; } diff --git a/kernel/arch/powerpc/platforms/powernv/idle.c b/kernel/arch/powerpc/platforms/powernv/idle.c new file mode 100644 index 000000000..59d735d2e --- /dev/null +++ b/kernel/arch/powerpc/platforms/powernv/idle.c @@ -0,0 +1,293 @@ +/* + * PowerNV cpuidle code + * + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/of.h> +#include <linux/device.h> +#include <linux/cpu.h> + +#include <asm/firmware.h> +#include <asm/machdep.h> +#include <asm/opal.h> +#include <asm/cputhreads.h> +#include <asm/cpuidle.h> +#include <asm/code-patching.h> +#include <asm/smp.h> + +#include "powernv.h" +#include "subcore.h" + +static u32 supported_cpuidle_states; + +int pnv_save_sprs_for_winkle(void) +{ + int cpu; + int rc; + + /* + * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross + * all cpus at boot. Get these reg values of current cpu and use the + * same accross all cpus. + */ + uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; + uint64_t hid0_val = mfspr(SPRN_HID0); + uint64_t hid1_val = mfspr(SPRN_HID1); + uint64_t hid4_val = mfspr(SPRN_HID4); + uint64_t hid5_val = mfspr(SPRN_HID5); + uint64_t hmeer_val = mfspr(SPRN_HMEER); + + for_each_possible_cpu(cpu) { + uint64_t pir = get_hard_smp_processor_id(cpu); + uint64_t hsprg0_val = (uint64_t)&paca[cpu]; + + /* + * HSPRG0 is used to store the cpu's pointer to paca. Hence last + * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0 + * with 63rd bit set, so that when a thread wakes up at 0x100 we + * can use this bit to distinguish between fastsleep and + * deep winkle. + */ + hsprg0_val |= 1; + + rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); + if (rc != 0) + return rc; + + /* HIDs are per core registers */ + if (cpu_thread_in_core(cpu) == 0) { + + rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); + if (rc != 0) + return rc; + } + } + + return 0; +} + +static void pnv_alloc_idle_core_states(void) +{ + int i, j; + int nr_cores = cpu_nr_cores(); + u32 *core_idle_state; + + /* + * core_idle_state - First 8 bits track the idle state of each thread + * of the core. The 8th bit is the lock bit. Initially all thread bits + * are set. They are cleared when the thread enters deep idle state + * like sleep and winkle. Initially the lock bit is cleared. + * The lock bit has 2 purposes + * a. While the first thread is restoring core state, it prevents + * other threads in the core from switching to process context. + * b. While the last thread in the core is saving the core state, it + * prevents a different thread from waking up. + */ + for (i = 0; i < nr_cores; i++) { + int first_cpu = i * threads_per_core; + int node = cpu_to_node(first_cpu); + + core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); + *core_idle_state = PNV_CORE_IDLE_THREAD_BITS; + + for (j = 0; j < threads_per_core; j++) { + int cpu = first_cpu + j; + + paca[cpu].core_idle_state_ptr = core_idle_state; + paca[cpu].thread_idle_state = PNV_THREAD_RUNNING; + paca[cpu].thread_mask = 1 << j; + } + } + + update_subcore_sibling_mask(); + + if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) + pnv_save_sprs_for_winkle(); +} + +u32 pnv_get_supported_cpuidle_states(void) +{ + return supported_cpuidle_states; +} +EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); + + +static void pnv_fastsleep_workaround_apply(void *info) + +{ + int rc; + int *err = info; + + rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, + OPAL_CONFIG_IDLE_APPLY); + if (rc) + *err = 1; +} + +/* + * Used to store fastsleep workaround state + * 0 - Workaround applied/undone at fastsleep entry/exit path (Default) + * 1 - Workaround applied once, never undone. + */ +static u8 fastsleep_workaround_applyonce; + +static ssize_t show_fastsleep_workaround_applyonce(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", fastsleep_workaround_applyonce); +} + +static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + cpumask_t primary_thread_mask; + int err; + u8 val; + + if (kstrtou8(buf, 0, &val) || val != 1) + return -EINVAL; + + if (fastsleep_workaround_applyonce == 1) + return count; + + /* + * fastsleep_workaround_applyonce = 1 implies + * fastsleep workaround needs to be left in 'applied' state on all + * the cores. Do this by- + * 1. Patching out the call to 'undo' workaround in fastsleep exit path + * 2. Sending ipi to all the cores which have atleast one online thread + * 3. Patching out the call to 'apply' workaround in fastsleep entry + * path + * There is no need to send ipi to cores which have all threads + * offlined, as last thread of the core entering fastsleep or deeper + * state would have applied workaround. + */ + err = patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_exit, + PPC_INST_NOP); + if (err) { + pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit"); + goto fail; + } + + get_online_cpus(); + primary_thread_mask = cpu_online_cores_map(); + on_each_cpu_mask(&primary_thread_mask, + pnv_fastsleep_workaround_apply, + &err, 1); + put_online_cpus(); + if (err) { + pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); + goto fail; + } + + err = patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_entry, + PPC_INST_NOP); + if (err) { + pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry"); + goto fail; + } + + fastsleep_workaround_applyonce = 1; + + return count; +fail: + return -EIO; +} + +static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, + show_fastsleep_workaround_applyonce, + store_fastsleep_workaround_applyonce); + +static int __init pnv_init_idle_states(void) +{ + struct device_node *power_mgt; + int dt_idle_states; + u32 *flags; + int i; + + supported_cpuidle_states = 0; + + if (cpuidle_disable != IDLE_NO_OVERRIDE) + goto out; + + if (!firmware_has_feature(FW_FEATURE_OPALv3)) + goto out; + + power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); + if (!power_mgt) { + pr_warn("opal: PowerMgmt Node not found\n"); + goto out; + } + dt_idle_states = of_property_count_u32_elems(power_mgt, + "ibm,cpu-idle-state-flags"); + if (dt_idle_states < 0) { + pr_warn("cpuidle-powernv: no idle states found in the DT\n"); + goto out; + } + + flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL); + if (of_property_read_u32_array(power_mgt, + "ibm,cpu-idle-state-flags", flags, dt_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); + goto out_free; + } + + for (i = 0; i < dt_idle_states; i++) + supported_cpuidle_states |= flags[i]; + + if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { + patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_entry, + PPC_INST_NOP); + patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_exit, + PPC_INST_NOP); + } else { + /* + * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that + * workaround is needed to use fastsleep. Provide sysfs + * control to choose how this workaround has to be applied. + */ + device_create_file(cpu_subsys.dev_root, + &dev_attr_fastsleep_workaround_applyonce); + } + + pnv_alloc_idle_core_states(); +out_free: + kfree(flags); +out: + return 0; +} +machine_subsys_initcall(powernv, pnv_init_idle_states); diff --git a/kernel/arch/powerpc/platforms/powernv/opal-async.c b/kernel/arch/powerpc/platforms/powernv/opal-async.c index 693b6cdac..bdc8c0c71 100644 --- a/kernel/arch/powerpc/platforms/powernv/opal-async.c +++ b/kernel/arch/powerpc/platforms/powernv/opal-async.c @@ -151,7 +151,7 @@ static struct notifier_block opal_async_comp_nb = { .priority = 0, }; -static int __init opal_async_comp_init(void) +int __init opal_async_comp_init(void) { struct device_node *opal_node; const __be32 *async; @@ -205,4 +205,3 @@ out_opal_node: out: return err; } -machine_subsys_initcall(powernv, opal_async_comp_init); diff --git a/kernel/arch/powerpc/platforms/powernv/opal-dump.c b/kernel/arch/powerpc/platforms/powernv/opal-dump.c index 5aa9c1ce4..2ee96431f 100644 --- a/kernel/arch/powerpc/platforms/powernv/opal-dump.c +++ b/kernel/arch/powerpc/platforms/powernv/opal-dump.c @@ -15,6 +15,7 @@ #include <linux/vmalloc.h> #include <linux/pagemap.h> #include <linux/delay.h> +#include <linux/interrupt.h> #include <asm/opal.h> @@ -60,7 +61,7 @@ static ssize_t dump_type_show(struct dump_obj *dump_obj, struct dump_attribute *attr, char *buf) { - + return sprintf(buf, "0x%x %s\n", dump_obj->type, dump_type_to_string(dump_obj->type)); } @@ -363,7 +364,7 @@ static struct dump_obj *create_dump_obj(uint32_t id, size_t size, return dump; } -static int process_dump(void) +static irqreturn_t process_dump(int irq, void *data) { int rc; uint32_t dump_id, dump_size, dump_type; @@ -387,45 +388,13 @@ static int process_dump(void) if (!dump) return -1; - return 0; -} - -static void dump_work_fn(struct work_struct *work) -{ - process_dump(); + return IRQ_HANDLED; } -static DECLARE_WORK(dump_work, dump_work_fn); - -static void schedule_process_dump(void) -{ - schedule_work(&dump_work); -} - -/* - * New dump available notification - * - * Once we get notification, we add sysfs entries for it. - * We only fetch the dump on demand, and create sysfs asynchronously. - */ -static int dump_event(struct notifier_block *nb, - unsigned long events, void *change) -{ - if (events & OPAL_EVENT_DUMP_AVAIL) - schedule_process_dump(); - - return 0; -} - -static struct notifier_block dump_nb = { - .notifier_call = dump_event, - .next = NULL, - .priority = 0 -}; - void __init opal_platform_dump_init(void) { int rc; + int dump_irq; /* ELOG not supported by firmware */ if (!opal_check_token(OPAL_DUMP_READ)) @@ -445,10 +414,19 @@ void __init opal_platform_dump_init(void) return; } - rc = opal_notifier_register(&dump_nb); + dump_irq = opal_event_request(ilog2(OPAL_EVENT_DUMP_AVAIL)); + if (!dump_irq) { + pr_err("%s: Can't register OPAL event irq (%d)\n", + __func__, dump_irq); + return; + } + + rc = request_threaded_irq(dump_irq, NULL, process_dump, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + "opal-dump", NULL); if (rc) { - pr_warn("%s: Can't register OPAL event notifier (%d)\n", - __func__, rc); + pr_err("%s: Can't request OPAL event irq (%d)\n", + __func__, rc); return; } diff --git a/kernel/arch/powerpc/platforms/powernv/opal-elog.c b/kernel/arch/powerpc/platforms/powernv/opal-elog.c index 38ce757e5..37f959bf3 100644 --- a/kernel/arch/powerpc/platforms/powernv/opal-elog.c +++ b/kernel/arch/powerpc/platforms/powernv/opal-elog.c @@ -10,6 +10,7 @@ */ #include <linux/kernel.h> #include <linux/init.h> +#include <linux/interrupt.h> #include <linux/of.h> #include <linux/slab.h> #include <linux/sysfs.h> @@ -236,7 +237,7 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) return elog; } -static void elog_work_fn(struct work_struct *work) +static irqreturn_t elog_event(int irq, void *data) { __be64 size; __be64 id; @@ -250,7 +251,7 @@ static void elog_work_fn(struct work_struct *work) rc = opal_get_elog_size(&id, &size, &type); if (rc != OPAL_SUCCESS) { pr_err("ELOG: OPAL log info read failed\n"); - return; + return IRQ_HANDLED; } elog_size = be64_to_cpu(size); @@ -269,31 +270,16 @@ static void elog_work_fn(struct work_struct *work) * entries. */ if (kset_find_obj(elog_kset, name)) - return; + return IRQ_HANDLED; create_elog_obj(log_id, elog_size, elog_type); -} - -static DECLARE_WORK(elog_work, elog_work_fn); -static int elog_event(struct notifier_block *nb, - unsigned long events, void *change) -{ - /* check for error log event */ - if (events & OPAL_EVENT_ERROR_LOG_AVAIL) - schedule_work(&elog_work); - return 0; + return IRQ_HANDLED; } -static struct notifier_block elog_nb = { - .notifier_call = elog_event, - .next = NULL, - .priority = 0 -}; - int __init opal_elog_init(void) { - int rc = 0; + int rc = 0, irq; /* ELOG not supported by firmware */ if (!opal_check_token(OPAL_ELOG_READ)) @@ -305,10 +291,18 @@ int __init opal_elog_init(void) return -1; } - rc = opal_notifier_register(&elog_nb); + irq = opal_event_request(ilog2(OPAL_EVENT_ERROR_LOG_AVAIL)); + if (!irq) { + pr_err("%s: Can't register OPAL event irq (%d)\n", + __func__, irq); + return irq; + } + + rc = request_threaded_irq(irq, NULL, elog_event, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, "opal-elog", NULL); if (rc) { - pr_err("%s: Can't register OPAL event notifier (%d)\n", - __func__, rc); + pr_err("%s: Can't request OPAL event irq (%d)\n", + __func__, rc); return rc; } diff --git a/kernel/arch/powerpc/platforms/powernv/opal-hmi.c b/kernel/arch/powerpc/platforms/powernv/opal-hmi.c index b322bfb51..d000f4e21 100644 --- a/kernel/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/kernel/arch/powerpc/platforms/powernv/opal-hmi.c @@ -35,9 +35,134 @@ struct OpalHmiEvtNode { struct list_head list; struct OpalHMIEvent hmi_evt; }; + +struct xstop_reason { + uint32_t xstop_reason; + const char *unit_failed; + const char *description; +}; + static LIST_HEAD(opal_hmi_evt_list); static DEFINE_SPINLOCK(opal_hmi_evt_lock); +static void print_core_checkstop_reason(const char *level, + struct OpalHMIEvent *hmi_evt) +{ + int i; + static const struct xstop_reason xstop_reason[] = { + { CORE_CHECKSTOP_IFU_REGFILE, "IFU", + "RegFile core check stop" }, + { CORE_CHECKSTOP_IFU_LOGIC, "IFU", "Logic core check stop" }, + { CORE_CHECKSTOP_PC_DURING_RECOV, "PC", + "Core checkstop during recovery" }, + { CORE_CHECKSTOP_ISU_REGFILE, "ISU", + "RegFile core check stop (mapper error)" }, + { CORE_CHECKSTOP_ISU_LOGIC, "ISU", "Logic core check stop" }, + { CORE_CHECKSTOP_FXU_LOGIC, "FXU", "Logic core check stop" }, + { CORE_CHECKSTOP_VSU_LOGIC, "VSU", "Logic core check stop" }, + { CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE, "PC", + "Recovery in maintenance mode" }, + { CORE_CHECKSTOP_LSU_REGFILE, "LSU", + "RegFile core check stop" }, + { CORE_CHECKSTOP_PC_FWD_PROGRESS, "PC", + "Forward Progress Error" }, + { CORE_CHECKSTOP_LSU_LOGIC, "LSU", "Logic core check stop" }, + { CORE_CHECKSTOP_PC_LOGIC, "PC", "Logic core check stop" }, + { CORE_CHECKSTOP_PC_HYP_RESOURCE, "PC", + "Hypervisor Resource error - core check stop" }, + { CORE_CHECKSTOP_PC_HANG_RECOV_FAILED, "PC", + "Hang Recovery Failed (core check stop)" }, + { CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED, "PC", + "Ambiguous Hang Detected (unknown source)" }, + { CORE_CHECKSTOP_PC_DEBUG_TRIG_ERR_INJ, "PC", + "Debug Trigger Error inject" }, + { CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ, "PC", + "Hypervisor check stop via SPRC/SPRD" }, + }; + + /* Validity check */ + if (!hmi_evt->u.xstop_error.xstop_reason) { + printk("%s Unknown Core check stop.\n", level); + return; + } + + printk("%s CPU PIR: %08x\n", level, + be32_to_cpu(hmi_evt->u.xstop_error.u.pir)); + for (i = 0; i < ARRAY_SIZE(xstop_reason); i++) + if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) & + xstop_reason[i].xstop_reason) + printk("%s [Unit: %-3s] %s\n", level, + xstop_reason[i].unit_failed, + xstop_reason[i].description); +} + +static void print_nx_checkstop_reason(const char *level, + struct OpalHMIEvent *hmi_evt) +{ + int i; + static const struct xstop_reason xstop_reason[] = { + { NX_CHECKSTOP_SHM_INVAL_STATE_ERR, "DMA & Engine", + "SHM invalid state error" }, + { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1, "DMA & Engine", + "DMA invalid state error bit 15" }, + { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2, "DMA & Engine", + "DMA invalid state error bit 16" }, + { NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR, "DMA & Engine", + "Channel 0 invalid state error" }, + { NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR, "DMA & Engine", + "Channel 1 invalid state error" }, + { NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR, "DMA & Engine", + "Channel 2 invalid state error" }, + { NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR, "DMA & Engine", + "Channel 3 invalid state error" }, + { NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR, "DMA & Engine", + "Channel 4 invalid state error" }, + { NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR, "DMA & Engine", + "Channel 5 invalid state error" }, + { NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR, "DMA & Engine", + "Channel 6 invalid state error" }, + { NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR, "DMA & Engine", + "Channel 7 invalid state error" }, + { NX_CHECKSTOP_DMA_CRB_UE, "DMA & Engine", + "UE error on CRB(CSB address, CCB)" }, + { NX_CHECKSTOP_DMA_CRB_SUE, "DMA & Engine", + "SUE error on CRB(CSB address, CCB)" }, + { NX_CHECKSTOP_PBI_ISN_UE, "PowerBus Interface", + "CRB Kill ISN received while holding ISN with UE error" }, + }; + + /* Validity check */ + if (!hmi_evt->u.xstop_error.xstop_reason) { + printk("%s Unknown NX check stop.\n", level); + return; + } + + printk("%s NX checkstop on CHIP ID: %x\n", level, + be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id)); + for (i = 0; i < ARRAY_SIZE(xstop_reason); i++) + if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) & + xstop_reason[i].xstop_reason) + printk("%s [Unit: %-3s] %s\n", level, + xstop_reason[i].unit_failed, + xstop_reason[i].description); +} + +static void print_checkstop_reason(const char *level, + struct OpalHMIEvent *hmi_evt) +{ + switch (hmi_evt->u.xstop_error.xstop_type) { + case CHECKSTOP_TYPE_CORE: + print_core_checkstop_reason(level, hmi_evt); + break; + case CHECKSTOP_TYPE_NX: + print_nx_checkstop_reason(level, hmi_evt); + break; + case CHECKSTOP_TYPE_UNKNOWN: + printk("%s Unknown Malfunction Alert.\n", level); + break; + } +} + static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) { const char *level, *sevstr, *error_info; @@ -95,6 +220,13 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY)) printk("%s TFMR: %016llx\n", level, be64_to_cpu(hmi_evt->tfmr)); + + if (hmi_evt->version < OpalHMIEvt_V2) + return; + + /* OpalHMIEvt_V2 and above provides reason for malfunction alert. */ + if (hmi_evt->type == OpalHMI_ERROR_MALFUNC_ALERT) + print_checkstop_reason(level, hmi_evt); } static void hmi_event_handler(struct work_struct *work) @@ -103,6 +235,8 @@ static void hmi_event_handler(struct work_struct *work) struct OpalHMIEvent *hmi_evt; struct OpalHmiEvtNode *msg_node; uint8_t disposition; + struct opal_msg msg; + int unrecoverable = 0; spin_lock_irqsave(&opal_hmi_evt_lock, flags); while (!list_empty(&opal_hmi_evt_list)) { @@ -118,14 +252,53 @@ static void hmi_event_handler(struct work_struct *work) /* * Check if HMI event has been recovered or not. If not - * then we can't continue, invoke panic. + * then kernel can't continue, we need to panic. + * But before we do that, display all the HMI event + * available on the list and set unrecoverable flag to 1. */ if (disposition != OpalHMI_DISPOSITION_RECOVERED) - panic("Unrecoverable HMI exception"); + unrecoverable = 1; spin_lock_irqsave(&opal_hmi_evt_lock, flags); } spin_unlock_irqrestore(&opal_hmi_evt_lock, flags); + + if (unrecoverable) { + int ret; + + /* Pull all HMI events from OPAL before we panic. */ + while (opal_get_msg(__pa(&msg), sizeof(msg)) == OPAL_SUCCESS) { + u32 type; + + type = be32_to_cpu(msg.msg_type); + + /* skip if not HMI event */ + if (type != OPAL_MSG_HMI_EVT) + continue; + + /* HMI event info starts from param[0] */ + hmi_evt = (struct OpalHMIEvent *)&msg.params[0]; + print_hmi_event_info(hmi_evt); + } + + /* + * Unrecoverable HMI exception. We need to inform BMC/OCC + * about this error so that it can collect relevant data + * for error analysis before rebooting. + */ + ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, + "Unrecoverable HMI exception"); + if (ret == OPAL_UNSUPPORTED) { + pr_emerg("Reboot type %d not supported\n", + OPAL_REBOOT_PLATFORM_ERROR); + } + + /* + * Fall through and panic if opal_cec_reboot2() returns + * OPAL_UNSUPPORTED. + */ + panic("Unrecoverable HMI exception"); + } } static DECLARE_WORK(hmi_event_work, hmi_event_handler); @@ -170,7 +343,7 @@ static struct notifier_block opal_hmi_handler_nb = { .priority = 0, }; -static int __init opal_hmi_handler_init(void) +int __init opal_hmi_handler_init(void) { int ret; @@ -186,4 +359,3 @@ static int __init opal_hmi_handler_init(void) } return 0; } -machine_subsys_initcall(powernv, opal_hmi_handler_init); diff --git a/kernel/arch/powerpc/platforms/powernv/opal-irqchip.c b/kernel/arch/powerpc/platforms/powernv/opal-irqchip.c new file mode 100644 index 000000000..e505223b4 --- /dev/null +++ b/kernel/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -0,0 +1,266 @@ +/* + * This file implements an irqchip for OPAL events. Whenever there is + * an interrupt that is handled by OPAL we get passed a list of events + * that Linux needs to do something about. These basically look like + * interrupts to Linux so we implement an irqchip to handle them. + * + * Copyright Alistair Popple, IBM Corporation 2014. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include <linux/bitops.h> +#include <linux/irq.h> +#include <linux/irqchip.h> +#include <linux/irqdomain.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/kthread.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/irq_work.h> + +#include <asm/machdep.h> +#include <asm/opal.h> + +#include "powernv.h" + +/* Maximum number of events supported by OPAL firmware */ +#define MAX_NUM_EVENTS 64 + +struct opal_event_irqchip { + struct irq_chip irqchip; + struct irq_domain *domain; + unsigned long mask; +}; +static struct opal_event_irqchip opal_event_irqchip; + +static unsigned int opal_irq_count; +static unsigned int *opal_irqs; + +static void opal_handle_irq_work(struct irq_work *work); +static u64 last_outstanding_events; +static struct irq_work opal_event_irq_work = { + .func = opal_handle_irq_work, +}; + +void opal_handle_events(uint64_t events) +{ + int virq, hwirq = 0; + u64 mask = opal_event_irqchip.mask; + + if (!in_irq() && (events & mask)) { + last_outstanding_events = events; + irq_work_queue(&opal_event_irq_work); + return; + } + + while (events & mask) { + hwirq = fls64(events) - 1; + if (BIT_ULL(hwirq) & mask) { + virq = irq_find_mapping(opal_event_irqchip.domain, + hwirq); + if (virq) + generic_handle_irq(virq); + } + events &= ~BIT_ULL(hwirq); + } +} + +static void opal_event_mask(struct irq_data *d) +{ + clear_bit(d->hwirq, &opal_event_irqchip.mask); +} + +static void opal_event_unmask(struct irq_data *d) +{ + __be64 events; + + set_bit(d->hwirq, &opal_event_irqchip.mask); + + opal_poll_events(&events); + last_outstanding_events = be64_to_cpu(events); + + /* + * We can't just handle the events now with opal_handle_events(). + * If we did we would deadlock when opal_event_unmask() is called from + * handle_level_irq() with the irq descriptor lock held, because + * calling opal_handle_events() would call generic_handle_irq() and + * then handle_level_irq() which would try to take the descriptor lock + * again. Instead queue the events for later. + */ + if (last_outstanding_events & opal_event_irqchip.mask) + /* Need to retrigger the interrupt */ + irq_work_queue(&opal_event_irq_work); +} + +static int opal_event_set_type(struct irq_data *d, unsigned int flow_type) +{ + /* + * For now we only support level triggered events. The irq + * handler will be called continuously until the event has + * been cleared in OPAL. + */ + if (flow_type != IRQ_TYPE_LEVEL_HIGH) + return -EINVAL; + + return 0; +} + +static struct opal_event_irqchip opal_event_irqchip = { + .irqchip = { + .name = "OPAL EVT", + .irq_mask = opal_event_mask, + .irq_unmask = opal_event_unmask, + .irq_set_type = opal_event_set_type, + }, + .mask = 0, +}; + +static int opal_event_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + irq_set_chip_data(irq, &opal_event_irqchip); + irq_set_chip_and_handler(irq, &opal_event_irqchip.irqchip, + handle_level_irq); + + return 0; +} + +static irqreturn_t opal_interrupt(int irq, void *data) +{ + __be64 events; + + opal_handle_interrupt(virq_to_hw(irq), &events); + opal_handle_events(be64_to_cpu(events)); + + return IRQ_HANDLED; +} + +static void opal_handle_irq_work(struct irq_work *work) +{ + opal_handle_events(last_outstanding_events); +} + +static int opal_event_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) +{ + return irq_domain_get_of_node(h) == node; +} + +static int opal_event_xlate(struct irq_domain *h, struct device_node *np, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) +{ + *out_hwirq = intspec[0]; + *out_flags = IRQ_TYPE_LEVEL_HIGH; + + return 0; +} + +static const struct irq_domain_ops opal_event_domain_ops = { + .match = opal_event_match, + .map = opal_event_map, + .xlate = opal_event_xlate, +}; + +void opal_event_shutdown(void) +{ + unsigned int i; + + /* First free interrupts, which will also mask them */ + for (i = 0; i < opal_irq_count; i++) { + if (opal_irqs[i]) + free_irq(opal_irqs[i], NULL); + opal_irqs[i] = 0; + } +} + +int __init opal_event_init(void) +{ + struct device_node *dn, *opal_node; + const __be32 *irqs; + int i, irqlen, rc = 0; + + opal_node = of_find_node_by_path("/ibm,opal"); + if (!opal_node) { + pr_warn("opal: Node not found\n"); + return -ENODEV; + } + + /* If dn is NULL it means the domain won't be linked to a DT + * node so therefore irq_of_parse_and_map(...) wont work. But + * that shouldn't be problem because if we're running a + * version of skiboot that doesn't have the dn then the + * devices won't have the correct properties and will have to + * fall back to the legacy method (opal_event_request(...)) + * anyway. */ + dn = of_find_compatible_node(NULL, NULL, "ibm,opal-event"); + opal_event_irqchip.domain = irq_domain_add_linear(dn, MAX_NUM_EVENTS, + &opal_event_domain_ops, &opal_event_irqchip); + of_node_put(dn); + if (!opal_event_irqchip.domain) { + pr_warn("opal: Unable to create irq domain\n"); + rc = -ENOMEM; + goto out; + } + + /* Get interrupt property */ + irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); + opal_irq_count = irqs ? (irqlen / 4) : 0; + pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count); + + /* Install interrupt handlers */ + opal_irqs = kcalloc(opal_irq_count, sizeof(*opal_irqs), GFP_KERNEL); + for (i = 0; irqs && i < opal_irq_count; i++, irqs++) { + unsigned int irq, virq; + + /* Get hardware and virtual IRQ */ + irq = be32_to_cpup(irqs); + virq = irq_create_mapping(NULL, irq); + if (virq == NO_IRQ) { + pr_warn("Failed to map irq 0x%x\n", irq); + continue; + } + + /* Install interrupt handler */ + rc = request_irq(virq, opal_interrupt, 0, "opal", NULL); + if (rc) { + irq_dispose_mapping(virq); + pr_warn("Error %d requesting irq %d (0x%x)\n", + rc, virq, irq); + continue; + } + + /* Cache IRQ */ + opal_irqs[i] = virq; + } + +out: + of_node_put(opal_node); + return rc; +} +machine_arch_initcall(powernv, opal_event_init); + +/** + * opal_event_request(unsigned int opal_event_nr) - Request an event + * @opal_event_nr: the opal event number to request + * + * This routine can be used to find the linux virq number which can + * then be passed to request_irq to assign a handler for a particular + * opal event. This should only be used by legacy devices which don't + * have proper device tree bindings. Most devices should use + * irq_of_parse_and_map() instead. + */ +int opal_event_request(unsigned int opal_event_nr) +{ + if (WARN_ON_ONCE(!opal_event_irqchip.domain)) + return NO_IRQ; + + return irq_create_mapping(opal_event_irqchip.domain, opal_event_nr); +} +EXPORT_SYMBOL(opal_event_request); diff --git a/kernel/arch/powerpc/platforms/powernv/opal-kmsg.c b/kernel/arch/powerpc/platforms/powernv/opal-kmsg.c new file mode 100644 index 000000000..6f1214d4d --- /dev/null +++ b/kernel/arch/powerpc/platforms/powernv/opal-kmsg.c @@ -0,0 +1,75 @@ +/* + * kmsg dumper that ensures the OPAL console fully flushes panic messages + * + * Author: Russell Currey <ruscur@russell.cc> + * + * Copyright 2015 IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/kmsg_dump.h> + +#include <asm/opal.h> +#include <asm/opal-api.h> + +/* + * Console output is controlled by OPAL firmware. The kernel regularly calls + * OPAL_POLL_EVENTS, which flushes some console output. In a panic state, + * however, the kernel no longer calls OPAL_POLL_EVENTS and the panic message + * may not be completely printed. This function does not actually dump the + * message, it just ensures that OPAL completely flushes the console buffer. + */ +static void force_opal_console_flush(struct kmsg_dumper *dumper, + enum kmsg_dump_reason reason) +{ + int i; + int64_t ret; + + /* + * Outside of a panic context the pollers will continue to run, + * so we don't need to do any special flushing. + */ + if (reason != KMSG_DUMP_PANIC) + return; + + if (opal_check_token(OPAL_CONSOLE_FLUSH)) { + ret = opal_console_flush(0); + + if (ret == OPAL_UNSUPPORTED || ret == OPAL_PARAMETER) + return; + + /* Incrementally flush until there's nothing left */ + while (opal_console_flush(0) != OPAL_SUCCESS); + } else { + /* + * If OPAL_CONSOLE_FLUSH is not implemented in the firmware, + * the console can still be flushed by calling the polling + * function enough times to flush the buffer. We don't know + * how much output still needs to be flushed, but we can be + * generous since the kernel is in panic and doesn't need + * to do much else. + */ + printk(KERN_NOTICE "opal: OPAL_CONSOLE_FLUSH missing.\n"); + for (i = 0; i < 1024; i++) { + opal_poll_events(NULL); + } + } +} + +static struct kmsg_dumper opal_kmsg_dumper = { + .dump = force_opal_console_flush +}; + +void __init opal_kmsg_init(void) +{ + int rc; + + /* Add our dumper to the list */ + rc = kmsg_dump_register(&opal_kmsg_dumper); + if (rc != 0) + pr_err("opal: kmsg_dump_register failed; returned %d\n", rc); +} diff --git a/kernel/arch/powerpc/platforms/powernv/opal-memory-errors.c b/kernel/arch/powerpc/platforms/powernv/opal-memory-errors.c index 43db2136d..00a29432b 100644 --- a/kernel/arch/powerpc/platforms/powernv/opal-memory-errors.c +++ b/kernel/arch/powerpc/platforms/powernv/opal-memory-errors.c @@ -144,4 +144,4 @@ static int __init opal_mem_err_init(void) } return 0; } -machine_subsys_initcall(powernv, opal_mem_err_init); +machine_device_initcall(powernv, opal_mem_err_init); diff --git a/kernel/arch/powerpc/platforms/powernv/opal-power.c b/kernel/arch/powerpc/platforms/powernv/opal-power.c index ac46c2c24..58dc33082 100644 --- a/kernel/arch/powerpc/platforms/powernv/opal-power.c +++ b/kernel/arch/powerpc/platforms/powernv/opal-power.c @@ -9,9 +9,12 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) "opal-power: " fmt + #include <linux/kernel.h> #include <linux/reboot.h> #include <linux/notifier.h> +#include <linux/of.h> #include <asm/opal.h> #include <asm/machdep.h> @@ -19,30 +22,116 @@ #define SOFT_OFF 0x00 #define SOFT_REBOOT 0x01 +/* Detect EPOW event */ +static bool detect_epow(void) +{ + u16 epow; + int i, rc; + __be16 epow_classes; + __be16 opal_epow_status[OPAL_SYSEPOW_MAX] = {0}; + + /* + * Check for EPOW event. Kernel sends supported EPOW classes info + * to OPAL. OPAL returns EPOW info along with classes present. + */ + epow_classes = cpu_to_be16(OPAL_SYSEPOW_MAX); + rc = opal_get_epow_status(opal_epow_status, &epow_classes); + if (rc != OPAL_SUCCESS) { + pr_err("Failed to get EPOW event information\n"); + return false; + } + + /* Look for EPOW events present */ + for (i = 0; i < be16_to_cpu(epow_classes); i++) { + epow = be16_to_cpu(opal_epow_status[i]); + + /* Filter events which do not need shutdown. */ + if (i == OPAL_SYSEPOW_POWER) + epow &= ~(OPAL_SYSPOWER_CHNG | OPAL_SYSPOWER_FAIL | + OPAL_SYSPOWER_INCL); + if (epow) + return true; + } + + return false; +} + +/* Check for existing EPOW, DPO events */ +static bool poweroff_pending(void) +{ + int rc; + __be64 opal_dpo_timeout; + + /* Check for DPO event */ + rc = opal_get_dpo_status(&opal_dpo_timeout); + if (rc == OPAL_SUCCESS) { + pr_info("Existing DPO event detected.\n"); + return true; + } + + /* Check for EPOW event */ + if (detect_epow()) { + pr_info("Existing EPOW event detected.\n"); + return true; + } + + return false; +} + +/* OPAL power-control events notifier */ static int opal_power_control_event(struct notifier_block *nb, - unsigned long msg_type, void *msg) + unsigned long msg_type, void *msg) { - struct opal_msg *power_msg = msg; uint64_t type; - type = be64_to_cpu(power_msg->params[0]); - - switch (type) { - case SOFT_REBOOT: - pr_info("OPAL: reboot requested\n"); - orderly_reboot(); + switch (msg_type) { + case OPAL_MSG_EPOW: + if (detect_epow()) { + pr_info("EPOW msg received. Powering off system\n"); + orderly_poweroff(true); + } break; - case SOFT_OFF: - pr_info("OPAL: poweroff requested\n"); + case OPAL_MSG_DPO: + pr_info("DPO msg received. Powering off system\n"); orderly_poweroff(true); break; + case OPAL_MSG_SHUTDOWN: + type = be64_to_cpu(((struct opal_msg *)msg)->params[0]); + switch (type) { + case SOFT_REBOOT: + pr_info("Reboot requested\n"); + orderly_reboot(); + break; + case SOFT_OFF: + pr_info("Poweroff requested\n"); + orderly_poweroff(true); + break; + default: + pr_err("Unknown power-control type %llu\n", type); + } + break; default: - pr_err("OPAL: power control type unexpected %016llx\n", type); + pr_err("Unknown OPAL message type %lu\n", msg_type); } return 0; } +/* OPAL EPOW event notifier block */ +static struct notifier_block opal_epow_nb = { + .notifier_call = opal_power_control_event, + .next = NULL, + .priority = 0, +}; + +/* OPAL DPO event notifier block */ +static struct notifier_block opal_dpo_nb = { + .notifier_call = opal_power_control_event, + .next = NULL, + .priority = 0, +}; + +/* OPAL power-control event notifier block */ static struct notifier_block opal_power_control_nb = { .notifier_call = opal_power_control_event, .next = NULL, @@ -51,16 +140,40 @@ static struct notifier_block opal_power_control_nb = { static int __init opal_power_control_init(void) { - int ret; + int ret, supported = 0; + struct device_node *np; + /* Register OPAL power-control events notifier */ ret = opal_message_notifier_register(OPAL_MSG_SHUTDOWN, - &opal_power_control_nb); - if (ret) { - pr_err("%s: Can't register OPAL event notifier (%d)\n", - __func__, ret); - return ret; + &opal_power_control_nb); + if (ret) + pr_err("Failed to register SHUTDOWN notifier, ret = %d\n", ret); + + /* Determine OPAL EPOW, DPO support */ + np = of_find_node_by_path("/ibm,opal/epow"); + if (np) { + supported = of_device_is_compatible(np, "ibm,opal-v3-epow"); + of_node_put(np); } + if (!supported) + return 0; + pr_info("OPAL EPOW, DPO support detected.\n"); + + /* Register EPOW event notifier */ + ret = opal_message_notifier_register(OPAL_MSG_EPOW, &opal_epow_nb); + if (ret) + pr_err("Failed to register EPOW notifier, ret = %d\n", ret); + + /* Register DPO event notifier */ + ret = opal_message_notifier_register(OPAL_MSG_DPO, &opal_dpo_nb); + if (ret) + pr_err("Failed to register DPO notifier, ret = %d\n", ret); + + /* Check for any pending EPOW or DPO events. */ + if (poweroff_pending()) + orderly_poweroff(true); + return 0; } machine_subsys_initcall(powernv, opal_power_control_init); diff --git a/kernel/arch/powerpc/platforms/powernv/opal-prd.c b/kernel/arch/powerpc/platforms/powernv/opal-prd.c new file mode 100644 index 000000000..4ece8e40d --- /dev/null +++ b/kernel/arch/powerpc/platforms/powernv/opal-prd.c @@ -0,0 +1,448 @@ +/* + * OPAL Runtime Diagnostics interface driver + * Supported on POWERNV platform + * + * Copyright IBM Corporation 2015 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) "opal-prd: " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/miscdevice.h> +#include <linux/fs.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/poll.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <asm/opal-prd.h> +#include <asm/opal.h> +#include <asm/io.h> +#include <asm/uaccess.h> + + +/** + * The msg member must be at the end of the struct, as it's followed by the + * message data. + */ +struct opal_prd_msg_queue_item { + struct list_head list; + struct opal_prd_msg_header msg; +}; + +static struct device_node *prd_node; +static LIST_HEAD(opal_prd_msg_queue); +static DEFINE_SPINLOCK(opal_prd_msg_queue_lock); +static DECLARE_WAIT_QUEUE_HEAD(opal_prd_msg_wait); +static atomic_t prd_usage; + +static bool opal_prd_range_is_valid(uint64_t addr, uint64_t size) +{ + struct device_node *parent, *node; + bool found; + + if (addr + size < addr) + return false; + + parent = of_find_node_by_path("/reserved-memory"); + if (!parent) + return false; + + found = false; + + for_each_child_of_node(parent, node) { + uint64_t range_addr, range_size, range_end; + const __be32 *addrp; + const char *label; + + addrp = of_get_address(node, 0, &range_size, NULL); + + range_addr = of_read_number(addrp, 2); + range_end = range_addr + range_size; + + label = of_get_property(node, "ibm,prd-label", NULL); + + /* PRD ranges need a label */ + if (!label) + continue; + + if (range_end <= range_addr) + continue; + + if (addr >= range_addr && addr + size <= range_end) { + found = true; + of_node_put(node); + break; + } + } + + of_node_put(parent); + return found; +} + +static int opal_prd_open(struct inode *inode, struct file *file) +{ + /* + * Prevent multiple (separate) processes from concurrent interactions + * with the FW PRD channel + */ + if (atomic_xchg(&prd_usage, 1) == 1) + return -EBUSY; + + return 0; +} + +/* + * opal_prd_mmap - maps firmware-provided ranges into userspace + * @file: file structure for the device + * @vma: VMA to map the registers into + */ + +static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) +{ + size_t addr, size; + pgprot_t page_prot; + int rc; + + pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n", + vma->vm_start, vma->vm_end, vma->vm_pgoff, + vma->vm_flags); + + addr = vma->vm_pgoff << PAGE_SHIFT; + size = vma->vm_end - vma->vm_start; + + /* ensure we're mapping within one of the allowable ranges */ + if (!opal_prd_range_is_valid(addr, size)) + return -EINVAL; + + page_prot = phys_mem_access_prot(file, vma->vm_pgoff, + size, vma->vm_page_prot); + + rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, + page_prot); + + return rc; +} + +static bool opal_msg_queue_empty(void) +{ + unsigned long flags; + bool ret; + + spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); + ret = list_empty(&opal_prd_msg_queue); + spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags); + + return ret; +} + +static unsigned int opal_prd_poll(struct file *file, + struct poll_table_struct *wait) +{ + poll_wait(file, &opal_prd_msg_wait, wait); + + if (!opal_msg_queue_empty()) + return POLLIN | POLLRDNORM; + + return 0; +} + +static ssize_t opal_prd_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct opal_prd_msg_queue_item *item; + unsigned long flags; + ssize_t size, err; + int rc; + + /* we need at least a header's worth of data */ + if (count < sizeof(item->msg)) + return -EINVAL; + + if (*ppos) + return -ESPIPE; + + item = NULL; + + for (;;) { + + spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); + if (!list_empty(&opal_prd_msg_queue)) { + item = list_first_entry(&opal_prd_msg_queue, + struct opal_prd_msg_queue_item, list); + list_del(&item->list); + } + spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags); + + if (item) + break; + + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + + rc = wait_event_interruptible(opal_prd_msg_wait, + !opal_msg_queue_empty()); + if (rc) + return -EINTR; + } + + size = be16_to_cpu(item->msg.size); + if (size > count) { + err = -EINVAL; + goto err_requeue; + } + + rc = copy_to_user(buf, &item->msg, size); + if (rc) { + err = -EFAULT; + goto err_requeue; + } + + kfree(item); + + return size; + +err_requeue: + /* eep! re-queue at the head of the list */ + spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); + list_add(&item->list, &opal_prd_msg_queue); + spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags); + return err; +} + +static ssize_t opal_prd_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct opal_prd_msg_header hdr; + ssize_t size; + void *msg; + int rc; + + size = sizeof(hdr); + + if (count < size) + return -EINVAL; + + /* grab the header */ + rc = copy_from_user(&hdr, buf, sizeof(hdr)); + if (rc) + return -EFAULT; + + size = be16_to_cpu(hdr.size); + + msg = kmalloc(size, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + rc = copy_from_user(msg, buf, size); + if (rc) { + size = -EFAULT; + goto out_free; + } + + rc = opal_prd_msg(msg); + if (rc) { + pr_warn("write: opal_prd_msg returned %d\n", rc); + size = -EIO; + } + +out_free: + kfree(msg); + + return size; +} + +static int opal_prd_release(struct inode *inode, struct file *file) +{ + struct opal_prd_msg_header msg; + + msg.size = cpu_to_be16(sizeof(msg)); + msg.type = OPAL_PRD_MSG_TYPE_FINI; + + opal_prd_msg((struct opal_prd_msg *)&msg); + + atomic_xchg(&prd_usage, 0); + + return 0; +} + +static long opal_prd_ioctl(struct file *file, unsigned int cmd, + unsigned long param) +{ + struct opal_prd_info info; + struct opal_prd_scom scom; + int rc = 0; + + switch (cmd) { + case OPAL_PRD_GET_INFO: + memset(&info, 0, sizeof(info)); + info.version = OPAL_PRD_KERNEL_VERSION; + rc = copy_to_user((void __user *)param, &info, sizeof(info)); + if (rc) + return -EFAULT; + break; + + case OPAL_PRD_SCOM_READ: + rc = copy_from_user(&scom, (void __user *)param, sizeof(scom)); + if (rc) + return -EFAULT; + + scom.rc = opal_xscom_read(scom.chip, scom.addr, + (__be64 *)&scom.data); + scom.data = be64_to_cpu(scom.data); + pr_devel("ioctl SCOM_READ: chip %llx addr %016llx data %016llx rc %lld\n", + scom.chip, scom.addr, scom.data, scom.rc); + + rc = copy_to_user((void __user *)param, &scom, sizeof(scom)); + if (rc) + return -EFAULT; + break; + + case OPAL_PRD_SCOM_WRITE: + rc = copy_from_user(&scom, (void __user *)param, sizeof(scom)); + if (rc) + return -EFAULT; + + scom.rc = opal_xscom_write(scom.chip, scom.addr, scom.data); + pr_devel("ioctl SCOM_WRITE: chip %llx addr %016llx data %016llx rc %lld\n", + scom.chip, scom.addr, scom.data, scom.rc); + + rc = copy_to_user((void __user *)param, &scom, sizeof(scom)); + if (rc) + return -EFAULT; + break; + + default: + rc = -EINVAL; + } + + return rc; +} + +static const struct file_operations opal_prd_fops = { + .open = opal_prd_open, + .mmap = opal_prd_mmap, + .poll = opal_prd_poll, + .read = opal_prd_read, + .write = opal_prd_write, + .unlocked_ioctl = opal_prd_ioctl, + .release = opal_prd_release, + .owner = THIS_MODULE, +}; + +static struct miscdevice opal_prd_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "opal-prd", + .fops = &opal_prd_fops, +}; + +/* opal interface */ +static int opal_prd_msg_notifier(struct notifier_block *nb, + unsigned long msg_type, void *_msg) +{ + struct opal_prd_msg_queue_item *item; + struct opal_prd_msg_header *hdr; + struct opal_msg *msg = _msg; + int msg_size, item_size; + unsigned long flags; + + if (msg_type != OPAL_MSG_PRD) + return 0; + + /* Calculate total size of the message and item we need to store. The + * 'size' field in the header includes the header itself. */ + hdr = (void *)msg->params; + msg_size = be16_to_cpu(hdr->size); + item_size = msg_size + sizeof(*item) - sizeof(item->msg); + + item = kzalloc(item_size, GFP_ATOMIC); + if (!item) + return -ENOMEM; + + memcpy(&item->msg, msg->params, msg_size); + + spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); + list_add_tail(&item->list, &opal_prd_msg_queue); + spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags); + + wake_up_interruptible(&opal_prd_msg_wait); + + return 0; +} + +static struct notifier_block opal_prd_event_nb = { + .notifier_call = opal_prd_msg_notifier, + .next = NULL, + .priority = 0, +}; + +static int opal_prd_probe(struct platform_device *pdev) +{ + int rc; + + if (!pdev || !pdev->dev.of_node) + return -ENODEV; + + /* We should only have one prd driver instance per machine; ensure + * that we only get a valid probe on a single OF node. + */ + if (prd_node) + return -EBUSY; + + prd_node = pdev->dev.of_node; + + rc = opal_message_notifier_register(OPAL_MSG_PRD, &opal_prd_event_nb); + if (rc) { + pr_err("Couldn't register event notifier\n"); + return rc; + } + + rc = misc_register(&opal_prd_dev); + if (rc) { + pr_err("failed to register miscdev\n"); + opal_message_notifier_unregister(OPAL_MSG_PRD, + &opal_prd_event_nb); + return rc; + } + + return 0; +} + +static int opal_prd_remove(struct platform_device *pdev) +{ + misc_deregister(&opal_prd_dev); + opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); + return 0; +} + +static const struct of_device_id opal_prd_match[] = { + { .compatible = "ibm,opal-prd" }, + { }, +}; + +static struct platform_driver opal_prd_driver = { + .driver = { + .name = "opal-prd", + .owner = THIS_MODULE, + .of_match_table = opal_prd_match, + }, + .probe = opal_prd_probe, + .remove = opal_prd_remove, +}; + +module_platform_driver(opal_prd_driver); + +MODULE_DEVICE_TABLE(of, opal_prd_match); +MODULE_DESCRIPTION("PowerNV OPAL runtime diagnostic driver"); +MODULE_LICENSE("GPL"); diff --git a/kernel/arch/powerpc/platforms/powernv/opal-sensor.c b/kernel/arch/powerpc/platforms/powernv/opal-sensor.c index 655250499..a06059df9 100644 --- a/kernel/arch/powerpc/platforms/powernv/opal-sensor.c +++ b/kernel/arch/powerpc/platforms/powernv/opal-sensor.c @@ -77,7 +77,7 @@ out: } EXPORT_SYMBOL_GPL(opal_get_sensor_data); -static __init int opal_sensor_init(void) +int __init opal_sensor_init(void) { struct platform_device *pdev; struct device_node *sensor; @@ -93,4 +93,3 @@ static __init int opal_sensor_init(void) return PTR_ERR_OR_ZERO(pdev); } -machine_subsys_initcall(powernv, opal_sensor_init); diff --git a/kernel/arch/powerpc/platforms/powernv/opal-sysparam.c b/kernel/arch/powerpc/platforms/powernv/opal-sysparam.c index 9d1acf22a..afe66c576 100644 --- a/kernel/arch/powerpc/platforms/powernv/opal-sysparam.c +++ b/kernel/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -55,8 +55,10 @@ static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer) } ret = opal_get_param(token, param_id, (u64)buffer, length); - if (ret != OPAL_ASYNC_COMPLETION) + if (ret != OPAL_ASYNC_COMPLETION) { + ret = opal_error_code(ret); goto out_token; + } ret = opal_async_wait_response(token, &msg); if (ret) { @@ -65,7 +67,7 @@ static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer) goto out_token; } - ret = be64_to_cpu(msg.params[1]); + ret = opal_error_code(be64_to_cpu(msg.params[1])); out_token: opal_async_release_token(token); @@ -89,8 +91,10 @@ static int opal_set_sys_param(u32 param_id, u32 length, void *buffer) ret = opal_set_param(token, param_id, (u64)buffer, length); - if (ret != OPAL_ASYNC_COMPLETION) + if (ret != OPAL_ASYNC_COMPLETION) { + ret = opal_error_code(ret); goto out_token; + } ret = opal_async_wait_response(token, &msg); if (ret) { @@ -99,7 +103,7 @@ static int opal_set_sys_param(u32 param_id, u32 length, void *buffer) goto out_token; } - ret = be64_to_cpu(msg.params[1]); + ret = opal_error_code(be64_to_cpu(msg.params[1])); out_token: opal_async_release_token(token); @@ -162,10 +166,20 @@ void __init opal_sys_param_init(void) goto out; } + /* Some systems do not use sysparams; this is not an error */ + sysparam = of_find_node_by_path("/ibm,opal/sysparams"); + if (!sysparam) + goto out; + + if (!of_device_is_compatible(sysparam, "ibm,opal-sysparams")) { + pr_err("SYSPARAM: Opal sysparam node not compatible\n"); + goto out_node_put; + } + sysparam_kobj = kobject_create_and_add("sysparams", opal_kobj); if (!sysparam_kobj) { pr_err("SYSPARAM: Failed to create sysparam kobject\n"); - goto out; + goto out_node_put; } /* Allocate big enough buffer for any get/set transactions */ @@ -176,30 +190,19 @@ void __init opal_sys_param_init(void) goto out_kobj_put; } - sysparam = of_find_node_by_path("/ibm,opal/sysparams"); - if (!sysparam) { - pr_err("SYSPARAM: Opal sysparam node not found\n"); - goto out_param_buf; - } - - if (!of_device_is_compatible(sysparam, "ibm,opal-sysparams")) { - pr_err("SYSPARAM: Opal sysparam node not compatible\n"); - goto out_node_put; - } - /* Number of parameters exposed through DT */ count = of_property_count_strings(sysparam, "param-name"); if (count < 0) { pr_err("SYSPARAM: No string found of property param-name in " "the node %s\n", sysparam->name); - goto out_node_put; + goto out_param_buf; } id = kzalloc(sizeof(*id) * count, GFP_KERNEL); if (!id) { pr_err("SYSPARAM: Failed to allocate memory to read parameter " "id\n"); - goto out_node_put; + goto out_param_buf; } size = kzalloc(sizeof(*size) * count, GFP_KERNEL); @@ -293,12 +296,12 @@ out_free_size: kfree(size); out_free_id: kfree(id); -out_node_put: - of_node_put(sysparam); out_param_buf: kfree(param_data_buf); out_kobj_put: kobject_put(sysparam_kobj); +out_node_put: + of_node_put(sysparam); out: return; } diff --git a/kernel/arch/powerpc/platforms/powernv/opal-wrappers.S b/kernel/arch/powerpc/platforms/powernv/opal-wrappers.S index a7ade94cd..e45b88a5d 100644 --- a/kernel/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/kernel/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -202,6 +202,7 @@ OPAL_CALL(opal_rtc_read, OPAL_RTC_READ); OPAL_CALL(opal_rtc_write, OPAL_RTC_WRITE); OPAL_CALL(opal_cec_power_down, OPAL_CEC_POWER_DOWN); OPAL_CALL(opal_cec_reboot, OPAL_CEC_REBOOT); +OPAL_CALL(opal_cec_reboot2, OPAL_CEC_REBOOT2); OPAL_CALL(opal_read_nvram, OPAL_READ_NVRAM); OPAL_CALL(opal_write_nvram, OPAL_WRITE_NVRAM); OPAL_CALL(opal_handle_interrupt, OPAL_HANDLE_INTERRUPT); @@ -249,6 +250,7 @@ OPAL_CALL(opal_pci_reinit, OPAL_PCI_REINIT); OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR); OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS); OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS); +OPAL_CALL(opal_get_dpo_status, OPAL_GET_DPO_STATUS); OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED); OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR); OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL); @@ -283,6 +285,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); OPAL_CALL(opal_get_param, OPAL_GET_PARAM); OPAL_CALL(opal_set_param, OPAL_SET_PARAM); OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); +OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); @@ -295,3 +298,7 @@ OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST); OPAL_CALL(opal_flash_read, OPAL_FLASH_READ); OPAL_CALL(opal_flash_write, OPAL_FLASH_WRITE); OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE); +OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG); +OPAL_CALL(opal_leds_get_ind, OPAL_LEDS_GET_INDICATOR); +OPAL_CALL(opal_leds_set_ind, OPAL_LEDS_SET_INDICATOR); +OPAL_CALL(opal_console_flush, OPAL_CONSOLE_FLUSH); diff --git a/kernel/arch/powerpc/platforms/powernv/opal.c b/kernel/arch/powerpc/platforms/powernv/opal.c index 2241565b0..ae29eaf85 100644 --- a/kernel/arch/powerpc/platforms/powernv/opal.c +++ b/kernel/arch/powerpc/platforms/powernv/opal.c @@ -53,13 +53,7 @@ static int mc_recoverable_range_len; struct device_node *opal_node; static DEFINE_SPINLOCK(opal_write_lock); -static unsigned int *opal_irqs; -static unsigned int opal_irq_count; -static ATOMIC_NOTIFIER_HEAD(opal_notifier_head); static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX]; -static DEFINE_SPINLOCK(opal_notifier_lock); -static uint64_t last_notified_mask = 0x0ul; -static atomic_t opal_notifier_hold = ATOMIC_INIT(0); static uint32_t opal_heartbeat; static void opal_reinit_cores(void) @@ -225,82 +219,6 @@ static int __init opal_register_exception_handlers(void) } machine_early_initcall(powernv, opal_register_exception_handlers); -int opal_notifier_register(struct notifier_block *nb) -{ - if (!nb) { - pr_warning("%s: Invalid argument (%p)\n", - __func__, nb); - return -EINVAL; - } - - atomic_notifier_chain_register(&opal_notifier_head, nb); - return 0; -} -EXPORT_SYMBOL_GPL(opal_notifier_register); - -int opal_notifier_unregister(struct notifier_block *nb) -{ - if (!nb) { - pr_warning("%s: Invalid argument (%p)\n", - __func__, nb); - return -EINVAL; - } - - atomic_notifier_chain_unregister(&opal_notifier_head, nb); - return 0; -} -EXPORT_SYMBOL_GPL(opal_notifier_unregister); - -static void opal_do_notifier(uint64_t events) -{ - unsigned long flags; - uint64_t changed_mask; - - if (atomic_read(&opal_notifier_hold)) - return; - - spin_lock_irqsave(&opal_notifier_lock, flags); - changed_mask = last_notified_mask ^ events; - last_notified_mask = events; - spin_unlock_irqrestore(&opal_notifier_lock, flags); - - /* - * We feed with the event bits and changed bits for - * enough information to the callback. - */ - atomic_notifier_call_chain(&opal_notifier_head, - events, (void *)changed_mask); -} - -void opal_notifier_update_evt(uint64_t evt_mask, - uint64_t evt_val) -{ - unsigned long flags; - - spin_lock_irqsave(&opal_notifier_lock, flags); - last_notified_mask &= ~evt_mask; - last_notified_mask |= evt_val; - spin_unlock_irqrestore(&opal_notifier_lock, flags); -} - -void opal_notifier_enable(void) -{ - int64_t rc; - __be64 evt = 0; - - atomic_set(&opal_notifier_hold, 0); - - /* Process pending events */ - rc = opal_poll_events(&evt); - if (rc == OPAL_SUCCESS && evt) - opal_do_notifier(be64_to_cpu(evt)); -} - -void opal_notifier_disable(void) -{ - atomic_set(&opal_notifier_hold, 1); -} - /* * Opal message notifier based on message type. Allow subscribers to get * notified for specific messgae type. @@ -317,6 +235,7 @@ int opal_message_notifier_register(enum opal_msg_type msg_type, return atomic_notifier_chain_register( &opal_msg_notifier_head[msg_type], nb); } +EXPORT_SYMBOL_GPL(opal_message_notifier_register); int opal_message_notifier_unregister(enum opal_msg_type msg_type, struct notifier_block *nb) @@ -324,6 +243,7 @@ int opal_message_notifier_unregister(enum opal_msg_type msg_type, return atomic_notifier_chain_unregister( &opal_msg_notifier_head[msg_type], nb); } +EXPORT_SYMBOL_GPL(opal_message_notifier_unregister); static void opal_message_do_notify(uint32_t msg_type, void *msg) { @@ -358,42 +278,42 @@ static void opal_handle_message(void) /* Sanity check */ if (type >= OPAL_MSG_TYPE_MAX) { - pr_warning("%s: Unknown message type: %u\n", __func__, type); + pr_warn_once("%s: Unknown message type: %u\n", __func__, type); return; } opal_message_do_notify(type, (void *)&msg); } -static int opal_message_notify(struct notifier_block *nb, - unsigned long events, void *change) +static irqreturn_t opal_message_notify(int irq, void *data) { - if (events & OPAL_EVENT_MSG_PENDING) - opal_handle_message(); - return 0; + opal_handle_message(); + return IRQ_HANDLED; } -static struct notifier_block opal_message_nb = { - .notifier_call = opal_message_notify, - .next = NULL, - .priority = 0, -}; - static int __init opal_message_init(void) { - int ret, i; + int ret, i, irq; for (i = 0; i < OPAL_MSG_TYPE_MAX; i++) ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]); - ret = opal_notifier_register(&opal_message_nb); + irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING)); + if (!irq) { + pr_err("%s: Can't register OPAL event irq (%d)\n", + __func__, irq); + return irq; + } + + ret = request_irq(irq, opal_message_notify, + IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL); if (ret) { - pr_err("%s: Can't register OPAL event notifier (%d)\n", + pr_err("%s: Can't request OPAL event irq (%d)\n", __func__, ret); return ret; } + return 0; } -machine_early_initcall(powernv, opal_message_init); int opal_get_chars(uint32_t vtermno, char *buf, int count) { @@ -521,6 +441,7 @@ static int opal_recover_mce(struct pt_regs *regs, int opal_machine_check(struct pt_regs *regs) { struct machine_check_event evt; + int ret; if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return 0; @@ -535,6 +456,43 @@ int opal_machine_check(struct pt_regs *regs) if (opal_recover_mce(regs, &evt)) return 1; + + /* + * Unrecovered machine check, we are heading to panic path. + * + * We may have hit this MCE in very early stage of kernel + * initialization even before opal-prd has started running. If + * this is the case then this MCE error may go un-noticed or + * un-analyzed if we go down panic path. We need to inform + * BMC/OCC about this error so that they can collect relevant + * data for error analysis before rebooting. + * Use opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR) to do so. + * This function may not return on BMC based system. + */ + ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, + "Unrecoverable Machine Check exception"); + if (ret == OPAL_UNSUPPORTED) { + pr_emerg("Reboot type %d not supported\n", + OPAL_REBOOT_PLATFORM_ERROR); + } + + /* + * We reached here. There can be three possibilities: + * 1. We are running on a firmware level that do not support + * opal_cec_reboot2() + * 2. We are running on a firmware level that do not support + * OPAL_REBOOT_PLATFORM_ERROR reboot type. + * 3. We are running on FSP based system that does not need opal + * to trigger checkstop explicitly for error analysis. The FSP + * PRD component would have already got notified about this + * error through other channels. + * + * If hardware marked this as an unrecoverable MCE, we are + * going to panic anyway. Even if it didn't, it's not safe to + * continue at this point, so we should explicitly panic. + */ + + panic("PowerNV Unrecovered Machine Check"); return 0; } @@ -573,7 +531,7 @@ int opal_handle_hmi_exception(struct pt_regs *regs) local_paca->hmi_event_available = 0; rc = opal_poll_events(&evt); if (rc == OPAL_SUCCESS && evt) - opal_do_notifier(be64_to_cpu(evt)); + opal_handle_events(be64_to_cpu(evt)); return 1; } @@ -610,17 +568,6 @@ out: return !!recover_addr; } -static irqreturn_t opal_interrupt(int irq, void *data) -{ - __be64 events; - - opal_handle_interrupt(virq_to_hw(irq), &events); - - opal_do_notifier(be64_to_cpu(events)); - - return IRQ_HANDLED; -} - static int opal_sysfs_init(void) { opal_kobj = kobject_create_and_add("opal", firmware_kobj); @@ -693,21 +640,13 @@ static void __init opal_dump_region_init(void) "rc = %d\n", rc); } -static void opal_flash_init(struct device_node *opal_node) +static void opal_pdev_init(struct device_node *opal_node, + const char *compatible) { struct device_node *np; for_each_child_of_node(opal_node, np) - if (of_device_is_compatible(np, "ibm,opal-flash")) - of_platform_device_create(np, NULL, NULL); -} - -static void opal_ipmi_init(struct device_node *opal_node) -{ - struct device_node *np; - - for_each_child_of_node(opal_node, np) - if (of_device_is_compatible(np, "ibm,opal-ipmi")) + if (of_device_is_compatible(np, compatible)) of_platform_device_create(np, NULL, NULL); } @@ -719,52 +658,15 @@ static void opal_i2c_create_devs(void) of_platform_device_create(np, NULL, NULL); } -static void __init opal_irq_init(struct device_node *dn) -{ - const __be32 *irqs; - int i, irqlen; - - /* Get interrupt property */ - irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); - opal_irq_count = irqs ? (irqlen / 4) : 0; - pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count); - if (!opal_irq_count) - return; - - /* Install interrupt handlers */ - opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL); - for (i = 0; irqs && i < opal_irq_count; i++, irqs++) { - unsigned int irq, virq; - int rc; - - /* Get hardware and virtual IRQ */ - irq = be32_to_cpup(irqs); - virq = irq_create_mapping(NULL, irq); - if (virq == NO_IRQ) { - pr_warn("Failed to map irq 0x%x\n", irq); - continue; - } - - /* Install interrupt handler */ - rc = request_irq(virq, opal_interrupt, 0, "opal", NULL); - if (rc) { - irq_dispose_mapping(virq); - pr_warn("Error %d requesting irq %d (0x%x)\n", - rc, virq, irq); - continue; - } - - /* Cache IRQ */ - opal_irqs[i] = virq; - } -} - static int kopald(void *unused) { + __be64 events; + set_freezable(); do { try_to_freeze(); - opal_poll_events(NULL); + opal_poll_events(&events); + opal_handle_events(be64_to_cpu(events)); msleep_interruptible(opal_heartbeat); } while (!kthread_should_stop()); @@ -784,7 +686,7 @@ static void opal_init_heartbeat(void) static int __init opal_init(void) { - struct device_node *np, *consoles; + struct device_node *np, *consoles, *leds; int rc; opal_node = of_find_node_by_path("/ibm,opal"); @@ -807,14 +709,30 @@ static int __init opal_init(void) of_node_put(consoles); } + /* Initialise OPAL messaging system */ + opal_message_init(); + + /* Initialise OPAL asynchronous completion interface */ + opal_async_comp_init(); + + /* Initialise OPAL sensor interface */ + opal_sensor_init(); + + /* Initialise OPAL hypervisor maintainence interrupt handling */ + opal_hmi_handler_init(); + /* Create i2c platform devices */ opal_i2c_create_devs(); /* Setup a heatbeat thread if requested by OPAL */ opal_init_heartbeat(); - /* Find all OPAL interrupts and request them */ - opal_irq_init(opal_node); + /* Create leds platform devices */ + leds = of_find_node_by_path("/ibm,opal/leds"); + if (leds) { + of_platform_device_create(leds, "opal_leds", NULL); + of_node_put(leds); + } /* Create "opal" kobject under /sys/firmware */ rc = opal_sysfs_init(); @@ -835,10 +753,13 @@ static int __init opal_init(void) opal_msglog_init(); } - /* Initialize OPAL IPMI backend */ - opal_ipmi_init(opal_node); + /* Initialize platform devices: IPMI backend, PRD & flash interface */ + opal_pdev_init(opal_node, "ibm,opal-ipmi"); + opal_pdev_init(opal_node, "ibm,opal-flash"); + opal_pdev_init(opal_node, "ibm,opal-prd"); - opal_flash_init(opal_node); + /* Initialise OPAL kmsg dumper for flushing console on panic */ + opal_kmsg_init(); return 0; } @@ -846,15 +767,9 @@ machine_subsys_initcall(powernv, opal_init); void opal_shutdown(void) { - unsigned int i; long rc = OPAL_BUSY; - /* First free interrupts, which will also mask them */ - for (i = 0; i < opal_irq_count; i++) { - if (opal_irqs[i]) - free_irq(opal_irqs[i], NULL); - opal_irqs[i] = 0; - } + opal_event_shutdown(); /* * Then sync with OPAL which ensure anything that can @@ -876,11 +791,14 @@ void opal_shutdown(void) /* Export this so that test modules can use it */ EXPORT_SYMBOL_GPL(opal_invalid_call); +EXPORT_SYMBOL_GPL(opal_xscom_read); +EXPORT_SYMBOL_GPL(opal_xscom_write); EXPORT_SYMBOL_GPL(opal_ipmi_send); EXPORT_SYMBOL_GPL(opal_ipmi_recv); EXPORT_SYMBOL_GPL(opal_flash_read); EXPORT_SYMBOL_GPL(opal_flash_write); EXPORT_SYMBOL_GPL(opal_flash_erase); +EXPORT_SYMBOL_GPL(opal_prd_msg); /* Convert a region of vmalloc memory to an opal sg list */ struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, @@ -954,6 +872,7 @@ int opal_error_code(int rc) case OPAL_ASYNC_COMPLETION: return -EINPROGRESS; case OPAL_BUSY_EVENT: return -EBUSY; case OPAL_NO_MEM: return -ENOMEM; + case OPAL_PERMISSION: return -EPERM; case OPAL_UNSUPPORTED: return -EIO; case OPAL_HARDWARE: return -EIO; @@ -970,3 +889,6 @@ EXPORT_SYMBOL_GPL(opal_rtc_write); EXPORT_SYMBOL_GPL(opal_tpo_read); EXPORT_SYMBOL_GPL(opal_tpo_write); EXPORT_SYMBOL_GPL(opal_i2c_request); +/* Export these symbols for PowerNV LED class driver */ +EXPORT_SYMBOL_GPL(opal_leds_get_ind); +EXPORT_SYMBOL_GPL(opal_leds_set_ind); diff --git a/kernel/arch/powerpc/platforms/powernv/pci-ioda.c b/kernel/arch/powerpc/platforms/powernv/pci-ioda.c index f8bc950ef..e40d07146 100644 --- a/kernel/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/kernel/arch/powerpc/platforms/powernv/pci-ioda.c @@ -23,6 +23,9 @@ #include <linux/io.h> #include <linux/msi.h> #include <linux/memblock.h> +#include <linux/iommu.h> +#include <linux/rculist.h> +#include <linux/sizes.h> #include <asm/sections.h> #include <asm/io.h> @@ -38,8 +41,9 @@ #include <asm/debug.h> #include <asm/firmware.h> #include <asm/pnv-pci.h> +#include <asm/mmzone.h> -#include <misc/cxl.h> +#include <misc/cxl-base.h> #include "powernv.h" #include "pci.h" @@ -47,6 +51,11 @@ /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) +#define POWERNV_IOMMU_DEFAULT_LEVELS 1 +#define POWERNV_IOMMU_MAX_LEVELS 5 + +static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl); + static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...) { @@ -131,11 +140,9 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) return; } - if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) { - pr_warn("%s: PE %d was assigned on PHB#%x\n", - __func__, pe_no, phb->hose->global_number); - return; - } + if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) + pr_debug("%s: PE %d was reserved on PHB#%x\n", + __func__, pe_no, phb->hose->global_number); phb->ioda.pe_array[pe_no].phb = phb; phb->ioda.pe_array[pe_no].pe_number = pe_no; @@ -222,61 +229,60 @@ fail: return -EIO; } -static void pnv_ioda2_reserve_m64_pe(struct pnv_phb *phb) +static void pnv_ioda2_reserve_dev_m64_pe(struct pci_dev *pdev, + unsigned long *pe_bitmap) { - resource_size_t sgsz = phb->ioda.m64_segsize; - struct pci_dev *pdev; + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; struct resource *r; - int base, step, i; - - /* - * Root bus always has full M64 range and root port has - * M64 range used in reality. So we're checking root port - * instead of root bus. - */ - list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) { - for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) { - r = &pdev->resource[PCI_BRIDGE_RESOURCES + i]; - if (!r->parent || - !pnv_pci_is_mem_pref_64(r->flags)) - continue; + resource_size_t base, sgsz, start, end; + int segno, i; + + base = phb->ioda.m64_base; + sgsz = phb->ioda.m64_segsize; + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { + r = &pdev->resource[i]; + if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags)) + continue; - base = (r->start - phb->ioda.m64_base) / sgsz; - for (step = 0; step < resource_size(r) / sgsz; step++) - pnv_ioda_reserve_pe(phb, base + step); + start = _ALIGN_DOWN(r->start - base, sgsz); + end = _ALIGN_UP(r->end - base, sgsz); + for (segno = start / sgsz; segno < end / sgsz; segno++) { + if (pe_bitmap) + set_bit(segno, pe_bitmap); + else + pnv_ioda_reserve_pe(phb, segno); } } } -static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb, - struct pci_bus *bus, int all) +static void pnv_ioda2_reserve_m64_pe(struct pci_bus *bus, + unsigned long *pe_bitmap, + bool all) { - resource_size_t segsz = phb->ioda.m64_segsize; struct pci_dev *pdev; - struct resource *r; + + list_for_each_entry(pdev, &bus->devices, bus_list) { + pnv_ioda2_reserve_dev_m64_pe(pdev, pe_bitmap); + + if (all && pdev->subordinate) + pnv_ioda2_reserve_m64_pe(pdev->subordinate, + pe_bitmap, all); + } +} + +static int pnv_ioda2_pick_m64_pe(struct pci_bus *bus, bool all) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + struct pnv_phb *phb = hose->private_data; struct pnv_ioda_pe *master_pe, *pe; unsigned long size, *pe_alloc; - bool found; - int start, i, j; + int i; /* Root bus shouldn't use M64 */ if (pci_is_root_bus(bus)) return IODA_INVALID_PE; - /* We support only one M64 window on each bus */ - found = false; - pci_bus_for_each_resource(bus, r, i) { - if (r && r->parent && - pnv_pci_is_mem_pref_64(r->flags)) { - found = true; - break; - } - } - - /* No M64 window found ? */ - if (!found) - return IODA_INVALID_PE; - /* Allocate bitmap */ size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); pe_alloc = kzalloc(size, GFP_KERNEL); @@ -286,35 +292,8 @@ static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb, return IODA_INVALID_PE; } - /* - * Figure out reserved PE numbers by the PE - * the its child PEs. - */ - start = (r->start - phb->ioda.m64_base) / segsz; - for (i = 0; i < resource_size(r) / segsz; i++) - set_bit(start + i, pe_alloc); - - if (all) - goto done; - - /* - * If the PE doesn't cover all subordinate buses, - * we need subtract from reserved PEs for children. - */ - list_for_each_entry(pdev, &bus->devices, bus_list) { - if (!pdev->subordinate) - continue; - - pci_bus_for_each_resource(pdev->subordinate, r, i) { - if (!r || !r->parent || - !pnv_pci_is_mem_pref_64(r->flags)) - continue; - - start = (r->start - phb->ioda.m64_base) / segsz; - for (j = 0; j < resource_size(r) / segsz ; j++) - clear_bit(start + j, pe_alloc); - } - } + /* Figure out reserved PE numbers by the PE */ + pnv_ioda2_reserve_m64_pe(bus, pe_alloc, all); /* * the current bus might not own M64 window and that's all @@ -330,7 +309,6 @@ static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb, * Figure out the master PE and put all slave PEs to master * PE's list to form compound PE. */ -done: master_pe = NULL; i = -1; while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) < @@ -644,7 +622,7 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb, pdev = pe->pdev->bus->self; #ifdef CONFIG_PCI_IOV else if (pe->flags & PNV_IODA_PE_VF) - pdev = pe->parent_dev->bus->self; + pdev = pe->parent_dev; #endif /* CONFIG_PCI_IOV */ while (pdev) { struct pci_dn *pdn = pci_get_pdn(pdev); @@ -723,7 +701,7 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) parent = parent->bus->self; } - opal_pci_eeh_freeze_set(phb->opal_id, pe->pe_number, + opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); /* Disassociate PE in PELT */ @@ -937,8 +915,9 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) res2 = *res; res->start += size * offset; - dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (enabling %d VFs shifted by %d)\n", - i, &res2, res, num_vfs, offset); + dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n", + i, &res2, res, (offset > 0) ? "En" : "Dis", + num_vfs, offset); pci_update_resource(dev, i + PCI_IOV_RESOURCES); } return 0; @@ -1041,7 +1020,7 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) * subordinate PCI devices and buses. The second type of PE is normally * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports. */ -static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) +static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) { struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; @@ -1050,7 +1029,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) /* Check if PE is determined by M64 */ if (phb->pick_m64_pe) - pe_num = phb->pick_m64_pe(phb, bus, all); + pe_num = phb->pick_m64_pe(bus, all); /* The PE number isn't pinned by M64 */ if (pe_num == IODA_INVALID_PE) @@ -1086,10 +1065,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) return; } - pe->tce32_table = kzalloc_node(sizeof(struct iommu_table), - GFP_KERNEL, hose->node); - pe->tce32_table->data = pe; - /* Associate it with all child devices */ pnv_ioda_setup_same_PE(bus, pe); @@ -1112,12 +1087,12 @@ static void pnv_ioda_setup_PEs(struct pci_bus *bus) { struct pci_dev *dev; - pnv_ioda_setup_bus_PE(bus, 0); + pnv_ioda_setup_bus_PE(bus, false); list_for_each_entry(dev, &bus->devices, bus_list) { if (dev->subordinate) { if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) - pnv_ioda_setup_bus_PE(dev->subordinate, 1); + pnv_ioda_setup_bus_PE(dev->subordinate, true); else pnv_ioda_setup_PEs(dev->subordinate); } @@ -1142,7 +1117,7 @@ static void pnv_pci_ioda_setup_PEs(void) /* M64 layout might affect PE allocation */ if (phb->reserve_m64_pe) - phb->reserve_m64_pe(phb); + phb->reserve_m64_pe(hose->bus, NULL, true); pnv_ioda_setup_PEs(hose->bus); } @@ -1283,36 +1258,27 @@ m64_failed: return -EBUSY; } +static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, + int num); +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); + static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe) { - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; struct iommu_table *tbl; - unsigned long addr; int64_t rc; - bus = dev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - tbl = pe->tce32_table; - addr = tbl->it_base; - - opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, - pe->pe_number << 1, 1, __pa(addr), - 0, 0x1000); - - rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, - pe->pe_number, - (pe->pe_number << 1) + 1, - pe->tce_bypass_base, - 0); + tbl = pe->table_group.tables[0]; + rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); if (rc) pe_warn(pe, "OPAL error %ld release DMA window\n", rc); + pnv_pci_ioda2_set_bypass(pe, false); + if (pe->table_group.group) { + iommu_group_put(pe->table_group.group); + BUG_ON(pe->table_group.group); + } + pnv_pci_ioda2_table_free_pages(tbl); iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); - free_pages(addr, get_order(TCE32_TABLE_SIZE)); - pe->tce32_table = NULL; } static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs) @@ -1460,10 +1426,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) continue; } - pe->tce32_table = kzalloc_node(sizeof(struct iommu_table), - GFP_KERNEL, hose->node); - pe->tce32_table->data = pe; - /* Put PE to the list */ mutex_lock(&phb->ioda.pe_list_mutex); list_add_tail(&pe->list, &phb->ioda.pe_list); @@ -1598,12 +1560,20 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev pe = &phb->ioda.pe_array[pdn->pe_number]; WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); - set_iommu_table_base_and_group(&pdev->dev, pe->tce32_table); + set_dma_offset(&pdev->dev, pe->tce_bypass_base); + set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]); + /* + * Note: iommu_add_device() will fail here as + * for physical PE: the device is already added by now; + * for virtual PE: sysfs entries are not ready yet and + * tce_iommu_bus_notifier will add the device to a group later. + */ } -static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, - struct pci_dev *pdev, u64 dma_mask) +static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) { + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; uint64_t top; @@ -1621,19 +1591,18 @@ static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, if (bypass) { dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n"); set_dma_ops(&pdev->dev, &dma_direct_ops); - set_dma_offset(&pdev->dev, pe->tce_bypass_base); } else { dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); set_dma_ops(&pdev->dev, &dma_iommu_ops); - set_iommu_table_base(&pdev->dev, pe->tce32_table); } *pdev->dev.dma_mask = dma_mask; return 0; } -static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb, - struct pci_dev *pdev) +static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev) { + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; u64 end, mask; @@ -1654,36 +1623,37 @@ static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb, } static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, - struct pci_bus *bus, - bool add_to_iommu_group) + struct pci_bus *bus) { struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { - if (add_to_iommu_group) - set_iommu_table_base_and_group(&dev->dev, - pe->tce32_table); - else - set_iommu_table_base(&dev->dev, pe->tce32_table); + set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); + set_dma_offset(&dev->dev, pe->tce_bypass_base); + iommu_add_device(&dev->dev); - if (dev->subordinate) - pnv_ioda_setup_bus_dma(pe, dev->subordinate, - add_to_iommu_group); + if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) + pnv_ioda_setup_bus_dma(pe, dev->subordinate); } } -static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, - struct iommu_table *tbl, - __be64 *startp, __be64 *endp, bool rm) +static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, + unsigned long index, unsigned long npages, bool rm) { + struct iommu_table_group_link *tgl = list_first_entry_or_null( + &tbl->it_group_list, struct iommu_table_group_link, + next); + struct pnv_ioda_pe *pe = container_of(tgl->table_group, + struct pnv_ioda_pe, table_group); __be64 __iomem *invalidate = rm ? - (__be64 __iomem *)pe->tce_inval_reg_phys : - (__be64 __iomem *)tbl->it_index; + (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : + pe->phb->ioda.tce_inval_reg; unsigned long start, end, inc; const unsigned shift = tbl->it_page_shift; - start = __pa(startp); - end = __pa(endp); + start = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset); + end = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset + + npages - 1); /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */ if (tbl->it_busno) { @@ -1719,26 +1689,79 @@ static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, */ } -static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, - struct iommu_table *tbl, - __be64 *startp, __be64 *endp, bool rm) +static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index, + long npages, unsigned long uaddr, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + int ret = pnv_tce_build(tbl, index, npages, uaddr, direction, + attrs); + + if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE)) + pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false); + + return ret; +} + +#ifdef CONFIG_IOMMU_API +static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction) +{ + long ret = pnv_tce_xchg(tbl, index, hpa, direction); + + if (!ret && (tbl->it_type & + (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE))) + pnv_pci_ioda1_tce_invalidate(tbl, index, 1, false); + + return ret; +} +#endif + +static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, + long npages) +{ + pnv_tce_free(tbl, index, npages); + + if (tbl->it_type & TCE_PCI_SWINV_FREE) + pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false); +} + +static struct iommu_table_ops pnv_ioda1_iommu_ops = { + .set = pnv_ioda1_tce_build, +#ifdef CONFIG_IOMMU_API + .exchange = pnv_ioda1_tce_xchg, +#endif + .clear = pnv_ioda1_tce_free, + .get = pnv_tce_get, +}; + +static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe) +{ + /* 01xb - invalidate TCEs that match the specified PE# */ + unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF); + struct pnv_phb *phb = pe->phb; + + if (!phb->ioda.tce_inval_reg) + return; + + mb(); /* Ensure above stores are visible */ + __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg); +} + +static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm, + __be64 __iomem *invalidate, unsigned shift, + unsigned long index, unsigned long npages) { unsigned long start, end, inc; - __be64 __iomem *invalidate = rm ? - (__be64 __iomem *)pe->tce_inval_reg_phys : - (__be64 __iomem *)tbl->it_index; - const unsigned shift = tbl->it_page_shift; /* We'll invalidate DMA address in PE scope */ start = 0x2ull << 60; - start |= (pe->pe_number & 0xFF); + start |= (pe_number & 0xFF); end = start; /* Figure out the start, end and step */ - inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64)); - start |= (inc << shift); - inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64)); - end |= (inc << shift); + start |= (index << shift); + end |= ((index + npages - 1) << shift); inc = (0x1ull << shift); mb(); @@ -1751,25 +1774,83 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, } } -void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, - __be64 *startp, __be64 *endp, bool rm) +static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, + unsigned long index, unsigned long npages, bool rm) { - struct pnv_ioda_pe *pe = tbl->data; - struct pnv_phb *phb = pe->phb; + struct iommu_table_group_link *tgl; - if (phb->type == PNV_PHB_IODA1) - pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm); - else - pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm); + list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { + struct pnv_ioda_pe *pe = container_of(tgl->table_group, + struct pnv_ioda_pe, table_group); + __be64 __iomem *invalidate = rm ? + (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : + pe->phb->ioda.tce_inval_reg; + + pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm, + invalidate, tbl->it_page_shift, + index, npages); + } } +static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, + long npages, unsigned long uaddr, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + int ret = pnv_tce_build(tbl, index, npages, uaddr, direction, + attrs); + + if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE)) + pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false); + + return ret; +} + +#ifdef CONFIG_IOMMU_API +static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction) +{ + long ret = pnv_tce_xchg(tbl, index, hpa, direction); + + if (!ret && (tbl->it_type & + (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE))) + pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false); + + return ret; +} +#endif + +static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, + long npages) +{ + pnv_tce_free(tbl, index, npages); + + if (tbl->it_type & TCE_PCI_SWINV_FREE) + pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false); +} + +static void pnv_ioda2_table_free(struct iommu_table *tbl) +{ + pnv_pci_ioda2_table_free_pages(tbl); + iommu_free_table(tbl, "pnv"); +} + +static struct iommu_table_ops pnv_ioda2_iommu_ops = { + .set = pnv_ioda2_tce_build, +#ifdef CONFIG_IOMMU_API + .exchange = pnv_ioda2_tce_xchg, +#endif + .clear = pnv_ioda2_tce_free, + .get = pnv_tce_get, + .free = pnv_ioda2_table_free, +}; + static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe, unsigned int base, unsigned int segs) { struct page *tce_mem = NULL; - const __be64 *swinvp; struct iommu_table *tbl; unsigned int i; int64_t rc; @@ -1783,6 +1864,11 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, if (WARN_ON(pe->tce32_seg >= 0)) return; + tbl = pnv_pci_table_alloc(phb->hose->node); + iommu_register_group(&pe->table_group, phb->hose->global_number, + pe->pe_number); + pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); + /* Grab a 32-bit TCE table */ pe->tce32_seg = base; pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", @@ -1817,39 +1903,30 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, } /* Setup linux iommu table */ - tbl = pe->tce32_table; pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, base << 28, IOMMU_PAGE_SHIFT_4K); /* OPAL variant of P7IOC SW invalidated TCEs */ - swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); - if (swinvp) { - /* We need a couple more fields -- an address and a data - * to or. Since the bus is only printed out on table free - * errors, and on the first pass the data will be a relative - * bus number, print that out instead. - */ - pe->tce_inval_reg_phys = be64_to_cpup(swinvp); - tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, - 8); + if (phb->ioda.tce_inval_reg) tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE | TCE_PCI_SWINV_PAIR); - } + + tbl->it_ops = &pnv_ioda1_iommu_ops; + pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift; + pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift; iommu_init_table(tbl, phb->hose->node); if (pe->flags & PNV_IODA_PE_DEV) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); - set_iommu_table_base_and_group(&pe->pdev->dev, tbl); - } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); - pnv_ioda_setup_bus_dma(pe, pe->pbus, true); - } else if (pe->flags & PNV_IODA_PE_VF) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); - } + /* + * Setting table base here only for carrying iommu_group + * further down to let iommu_add_device() do the job. + * pnv_pci_ioda_dma_dev_setup will override it later anyway. + */ + set_iommu_table_base(&pe->pdev->dev, tbl); + iommu_add_device(&pe->pdev->dev); + } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) + pnv_ioda_setup_bus_dma(pe, pe->pbus); return; fail: @@ -1858,11 +1935,53 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, pe->tce32_seg = -1; if (tce_mem) __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); + if (tbl) { + pnv_pci_unlink_table_and_group(tbl, &pe->table_group); + iommu_free_table(tbl, "pnv"); + } +} + +static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, + int num, struct iommu_table *tbl) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + struct pnv_phb *phb = pe->phb; + int64_t rc; + const unsigned long size = tbl->it_indirect_levels ? + tbl->it_level_size : tbl->it_size; + const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; + const __u64 win_size = tbl->it_size << tbl->it_page_shift; + + pe_info(pe, "Setting up window#%d %llx..%llx pg=%x\n", num, + start_addr, start_addr + win_size - 1, + IOMMU_PAGE_SIZE(tbl)); + + /* + * Map TCE table through TVT. The TVE index is the PE number + * shifted by 1 bit for 32-bits DMA space. + */ + rc = opal_pci_map_pe_dma_window(phb->opal_id, + pe->pe_number, + (pe->pe_number << 1) + num, + tbl->it_indirect_levels + 1, + __pa(tbl->it_base), + size << 3, + IOMMU_PAGE_SIZE(tbl)); + if (rc) { + pe_err(pe, "Failed to configure TCE table, err %ld\n", rc); + return rc; + } + + pnv_pci_link_table_and_group(phb->hose->node, num, + tbl, &pe->table_group); + pnv_pci_ioda2_tce_invalidate_entire(pe); + + return 0; } -static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) { - struct pnv_ioda_pe *pe = tbl->data; uint16_t window_id = (pe->pe_number << 1 ) + 1; int64_t rc; @@ -1882,17 +2001,6 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) window_id, pe->tce_bypass_base, 0); - - /* - * EEH needs the mapping between IOMMU table and group - * of those VFIO/KVM pass-through devices. We can postpone - * resetting DMA ops until the DMA mask is configured in - * host side. - */ - if (pe->pdev) - set_iommu_table_base(&pe->pdev->dev, tbl); - else - pnv_ioda_setup_bus_dma(pe, pe->pbus, false); } if (rc) pe_err(pe, "OPAL error %lld configuring bypass window\n", rc); @@ -1900,106 +2008,378 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) pe->tce_bypass_enabled = enable; } -static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb, - struct pnv_ioda_pe *pe) +static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, + __u32 page_shift, __u64 window_size, __u32 levels, + struct iommu_table *tbl); + +static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, + int num, __u32 page_shift, __u64 window_size, __u32 levels, + struct iommu_table **ptbl) { - /* TVE #1 is selected by PCI address bit 59 */ - pe->tce_bypass_base = 1ull << 59; + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + int nid = pe->phb->hose->node; + __u64 bus_offset = num ? pe->tce_bypass_base : table_group->tce32_start; + long ret; + struct iommu_table *tbl; + + tbl = pnv_pci_table_alloc(nid); + if (!tbl) + return -ENOMEM; + + ret = pnv_pci_ioda2_table_alloc_pages(nid, + bus_offset, page_shift, window_size, + levels, tbl); + if (ret) { + iommu_free_table(tbl, "pnv"); + return ret; + } + + tbl->it_ops = &pnv_ioda2_iommu_ops; + if (pe->phb->ioda.tce_inval_reg) + tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); - /* Install set_bypass callback for VFIO */ - pe->tce32_table->set_bypass = pnv_pci_ioda2_set_bypass; + *ptbl = tbl; - /* Enable bypass by default */ - pnv_pci_ioda2_set_bypass(pe->tce32_table, true); + return 0; } -static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, - struct pnv_ioda_pe *pe) +static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) +{ + struct iommu_table *tbl = NULL; + long rc; + + /* + * crashkernel= specifies the kdump kernel's maximum memory at + * some offset and there is no guaranteed the result is a power + * of 2, which will cause errors later. + */ + const u64 max_memory = __rounddown_pow_of_two(memory_hotplug_max()); + + /* + * In memory constrained environments, e.g. kdump kernel, the + * DMA window can be larger than available memory, which will + * cause errors later. + */ + const u64 window_size = min((u64)pe->table_group.tce32_size, max_memory); + + rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, + IOMMU_PAGE_SHIFT_4K, + window_size, + POWERNV_IOMMU_DEFAULT_LEVELS, &tbl); + if (rc) { + pe_err(pe, "Failed to create 32-bit TCE table, err %ld", + rc); + return rc; + } + + iommu_init_table(tbl, pe->phb->hose->node); + + rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); + if (rc) { + pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", + rc); + pnv_ioda2_table_free(tbl); + return rc; + } + + if (!pnv_iommu_bypass_disabled) + pnv_pci_ioda2_set_bypass(pe, true); + + /* OPAL variant of PHB3 invalidated TCEs */ + if (pe->phb->ioda.tce_inval_reg) + tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); + + /* + * Setting table base here only for carrying iommu_group + * further down to let iommu_add_device() do the job. + * pnv_pci_ioda_dma_dev_setup will override it later anyway. + */ + if (pe->flags & PNV_IODA_PE_DEV) + set_iommu_table_base(&pe->pdev->dev, tbl); + + return 0; +} + +#if defined(CONFIG_IOMMU_API) || defined(CONFIG_PCI_IOV) +static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, + int num) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + struct pnv_phb *phb = pe->phb; + long ret; + + pe_info(pe, "Removing DMA window #%d\n", num); + + ret = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, + (pe->pe_number << 1) + num, + 0/* levels */, 0/* table address */, + 0/* table size */, 0/* page size */); + if (ret) + pe_warn(pe, "Unmapping failed, ret = %ld\n", ret); + else + pnv_pci_ioda2_tce_invalidate_entire(pe); + + pnv_pci_unlink_table_and_group(table_group->tables[num], table_group); + + return ret; +} +#endif + +#ifdef CONFIG_IOMMU_API +static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, + __u64 window_size, __u32 levels) +{ + unsigned long bytes = 0; + const unsigned window_shift = ilog2(window_size); + unsigned entries_shift = window_shift - page_shift; + unsigned table_shift = entries_shift + 3; + unsigned long tce_table_size = max(0x1000UL, 1UL << table_shift); + unsigned long direct_table_size; + + if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) || + (window_size > memory_hotplug_max()) || + !is_power_of_2(window_size)) + return 0; + + /* Calculate a direct table size from window_size and levels */ + entries_shift = (entries_shift + levels - 1) / levels; + table_shift = entries_shift + 3; + table_shift = max_t(unsigned, table_shift, PAGE_SHIFT); + direct_table_size = 1UL << table_shift; + + for ( ; levels; --levels) { + bytes += _ALIGN_UP(tce_table_size, direct_table_size); + + tce_table_size /= direct_table_size; + tce_table_size <<= 3; + tce_table_size = _ALIGN_UP(tce_table_size, direct_table_size); + } + + return bytes; +} + +static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + /* Store @tbl as pnv_pci_ioda2_unset_window() resets it */ + struct iommu_table *tbl = pe->table_group.tables[0]; + + pnv_pci_ioda2_set_bypass(pe, false); + pnv_pci_ioda2_unset_window(&pe->table_group, 0); + pnv_ioda2_table_free(tbl); +} + +static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + + pnv_pci_ioda2_setup_default_config(pe); +} + +static struct iommu_table_group_ops pnv_pci_ioda2_ops = { + .get_table_size = pnv_pci_ioda2_get_table_size, + .create_table = pnv_pci_ioda2_create_table, + .set_window = pnv_pci_ioda2_set_window, + .unset_window = pnv_pci_ioda2_unset_window, + .take_ownership = pnv_ioda2_take_ownership, + .release_ownership = pnv_ioda2_release_ownership, +}; +#endif + +static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb) { - struct page *tce_mem = NULL; - void *addr; const __be64 *swinvp; - struct iommu_table *tbl; - unsigned int tce_table_size, end; - int64_t rc; - /* We shouldn't already have a 32-bit DMA associated */ - if (WARN_ON(pe->tce32_seg >= 0)) + /* OPAL variant of PHB3 invalidated TCEs */ + swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); + if (!swinvp) return; - /* The PE will reserve all possible 32-bits space */ - pe->tce32_seg = 0; - end = (1 << ilog2(phb->ioda.m32_pci_base)); - tce_table_size = (end / 0x1000) * 8; - pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n", - end); + phb->ioda.tce_inval_reg_phys = be64_to_cpup(swinvp); + phb->ioda.tce_inval_reg = ioremap(phb->ioda.tce_inval_reg_phys, 8); +} - /* Allocate TCE table */ - tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, - get_order(tce_table_size)); +static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift, + unsigned levels, unsigned long limit, + unsigned long *current_offset, unsigned long *total_allocated) +{ + struct page *tce_mem = NULL; + __be64 *addr, *tmp; + unsigned order = max_t(unsigned, shift, PAGE_SHIFT) - PAGE_SHIFT; + unsigned long allocated = 1UL << (order + PAGE_SHIFT); + unsigned entries = 1UL << (shift - 3); + long i; + + tce_mem = alloc_pages_node(nid, GFP_KERNEL, order); if (!tce_mem) { - pe_err(pe, "Failed to allocate a 32-bit TCE memory\n"); - goto fail; + pr_err("Failed to allocate a TCE memory, order=%d\n", order); + return NULL; } addr = page_address(tce_mem); - memset(addr, 0, tce_table_size); + memset(addr, 0, allocated); + *total_allocated += allocated; + + --levels; + if (!levels) { + *current_offset += allocated; + return addr; + } + + for (i = 0; i < entries; ++i) { + tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift, + levels, limit, current_offset, total_allocated); + if (!tmp) + break; + + addr[i] = cpu_to_be64(__pa(tmp) | + TCE_PCI_READ | TCE_PCI_WRITE); + + if (*current_offset >= limit) + break; + } + + return addr; +} + +static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, + unsigned long size, unsigned level); + +static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, + __u32 page_shift, __u64 window_size, __u32 levels, + struct iommu_table *tbl) +{ + void *addr; + unsigned long offset = 0, level_shift, total_allocated = 0; + const unsigned window_shift = ilog2(window_size); + unsigned entries_shift = window_shift - page_shift; + unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT); + const unsigned long tce_table_size = 1UL << table_shift; + + if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS)) + return -EINVAL; + + if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size)) + return -EINVAL; + + /* Adjust direct table size from window_size and levels */ + entries_shift = (entries_shift + levels - 1) / levels; + level_shift = entries_shift + 3; + level_shift = max_t(unsigned, level_shift, PAGE_SHIFT); + + /* Allocate TCE table */ + addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, + levels, tce_table_size, &offset, &total_allocated); + + /* addr==NULL means that the first level allocation failed */ + if (!addr) + return -ENOMEM; /* - * Map TCE table through TVT. The TVE index is the PE number - * shifted by 1 bit for 32-bits DMA space. + * First level was allocated but some lower level failed as + * we did not allocate as much as we wanted, + * release partially allocated table. */ - rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, - pe->pe_number << 1, 1, __pa(addr), - tce_table_size, 0x1000); - if (rc) { - pe_err(pe, "Failed to configure 32-bit TCE table," - " err %ld\n", rc); - goto fail; + if (offset < tce_table_size) { + pnv_pci_ioda2_table_do_free_pages(addr, + 1ULL << (level_shift - 3), levels - 1); + return -ENOMEM; } /* Setup linux iommu table */ - tbl = pe->tce32_table; - pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, - IOMMU_PAGE_SHIFT_4K); + pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset, + page_shift); + tbl->it_level_size = 1ULL << (level_shift - 3); + tbl->it_indirect_levels = levels - 1; + tbl->it_allocated_size = total_allocated; - /* OPAL variant of PHB3 invalidated TCEs */ - swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); - if (swinvp) { - /* We need a couple more fields -- an address and a data - * to or. Since the bus is only printed out on table free - * errors, and on the first pass the data will be a relative - * bus number, print that out instead. - */ - pe->tce_inval_reg_phys = be64_to_cpup(swinvp); - tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, - 8); - tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); + pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n", + window_size, tce_table_size, bus_offset); + + return 0; +} + +static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, + unsigned long size, unsigned level) +{ + const unsigned long addr_ul = (unsigned long) addr & + ~(TCE_PCI_READ | TCE_PCI_WRITE); + + if (level) { + long i; + u64 *tmp = (u64 *) addr_ul; + + for (i = 0; i < size; ++i) { + unsigned long hpa = be64_to_cpu(tmp[i]); + + if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE))) + continue; + + pnv_pci_ioda2_table_do_free_pages(__va(hpa), size, + level - 1); + } } - iommu_init_table(tbl, phb->hose->node); - if (pe->flags & PNV_IODA_PE_DEV) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); - set_iommu_table_base_and_group(&pe->pdev->dev, tbl); - } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); - pnv_ioda_setup_bus_dma(pe, pe->pbus, true); - } else if (pe->flags & PNV_IODA_PE_VF) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); - } - - /* Also create a bypass window */ - if (!pnv_iommu_bypass_disabled) - pnv_pci_ioda2_setup_bypass_pe(phb, pe); + free_pages(addr_ul, get_order(size << 3)); +} - return; -fail: - if (pe->tce32_seg >= 0) - pe->tce32_seg = -1; - if (tce_mem) - __free_pages(tce_mem, get_order(tce_table_size)); +static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) +{ + const unsigned long size = tbl->it_indirect_levels ? + tbl->it_level_size : tbl->it_size; + + if (!tbl->it_size) + return; + + pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size, + tbl->it_indirect_levels); +} + +static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, + struct pnv_ioda_pe *pe) +{ + int64_t rc; + + /* We shouldn't already have a 32-bit DMA associated */ + if (WARN_ON(pe->tce32_seg >= 0)) + return; + + /* TVE #1 is selected by PCI address bit 59 */ + pe->tce_bypass_base = 1ull << 59; + + iommu_register_group(&pe->table_group, phb->hose->global_number, + pe->pe_number); + + /* The PE will reserve all possible 32-bits space */ + pe->tce32_seg = 0; + pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n", + phb->ioda.m32_pci_base); + + /* Setup linux iommu table */ + pe->table_group.tce32_start = 0; + pe->table_group.tce32_size = phb->ioda.m32_pci_base; + pe->table_group.max_dynamic_windows_supported = + IOMMU_TABLE_GROUP_MAX_TABLES; + pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS; + pe->table_group.pgsizes = SZ_4K | SZ_64K | SZ_16M; +#ifdef CONFIG_IOMMU_API + pe->table_group.ops = &pnv_pci_ioda2_ops; +#endif + + rc = pnv_pci_ioda2_setup_default_config(pe); + if (rc) { + if (pe->tce32_seg >= 0) + pe->tce32_seg = -1; + return; + } + + if (pe->flags & PNV_IODA_PE_DEV) + iommu_add_device(&pe->pdev->dev); + else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) + pnv_ioda_setup_bus_dma(pe, pe->pbus); } static void pnv_ioda_setup_dma(struct pnv_phb *phb) @@ -2024,6 +2404,8 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) pr_info("PCI: %d PE# for a total weight of %d\n", phb->ioda.dma_pe_count, phb->ioda.dma_weight); + pnv_pci_ioda_setup_opal_tce_kill(phb); + /* Walk our PE list and configure their DMA segments, hand them * out one base segment plus any residual segments based on * weight @@ -2642,12 +3024,29 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; } -static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) +static void pnv_pci_ioda_shutdown(struct pci_controller *hose) { + struct pnv_phb *phb = hose->private_data; + opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); } +static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { + .dma_dev_setup = pnv_pci_dma_dev_setup, + .dma_bus_setup = pnv_pci_dma_bus_setup, +#ifdef CONFIG_PCI_MSI + .setup_msi_irqs = pnv_setup_msi_irqs, + .teardown_msi_irqs = pnv_teardown_msi_irqs, +#endif + .enable_device_hook = pnv_pci_enable_device_hook, + .window_alignment = pnv_pci_window_alignment, + .reset_secondary_bus = pnv_pci_reset_secondary_bus, + .dma_set_mask = pnv_pci_ioda_dma_set_mask, + .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask, + .shutdown = pnv_pci_ioda_shutdown, +}; + static void __init pnv_pci_init_ioda_phb(struct device_node *np, u64 hub_id, int ioda_type) { @@ -2791,11 +3190,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Setup TCEs */ phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; - phb->dma_set_mask = pnv_pci_ioda_dma_set_mask; - phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask; - - /* Setup shutdown function for kexec */ - phb->shutdown = pnv_pci_ioda_shutdown; /* Setup MSI support */ pnv_pci_init_ioda_msis(phb); @@ -2808,10 +3202,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, * the child P2P bridges) can form individual PE. */ ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; - pnv_pci_controller_ops.enable_device_hook = pnv_pci_enable_device_hook; - pnv_pci_controller_ops.window_alignment = pnv_pci_window_alignment; - pnv_pci_controller_ops.reset_secondary_bus = pnv_pci_reset_secondary_bus; - hose->controller_ops = pnv_pci_controller_ops; + hose->controller_ops = pnv_pci_ioda_controller_ops; #ifdef CONFIG_PCI_IOV ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources; diff --git a/kernel/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/kernel/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 4729ca793..f2bdfea3b 100644 --- a/kernel/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/kernel/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -83,18 +83,42 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { } #endif /* CONFIG_PCI_MSI */ +static struct iommu_table_ops pnv_p5ioc2_iommu_ops = { + .set = pnv_tce_build, +#ifdef CONFIG_IOMMU_API + .exchange = pnv_tce_xchg, +#endif + .clear = pnv_tce_free, + .get = pnv_tce_get, +}; + static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) { - if (phb->p5ioc2.iommu_table.it_map == NULL) { - iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node); - iommu_register_group(&phb->p5ioc2.iommu_table, + struct iommu_table *tbl = phb->p5ioc2.table_group.tables[0]; + + if (!tbl->it_map) { + tbl->it_ops = &pnv_p5ioc2_iommu_ops; + iommu_init_table(tbl, phb->hose->node); + iommu_register_group(&phb->p5ioc2.table_group, pci_domain_nr(phb->hose->bus), phb->opal_id); + INIT_LIST_HEAD_RCU(&tbl->it_group_list); + pnv_pci_link_table_and_group(phb->hose->node, 0, + tbl, &phb->p5ioc2.table_group); } - set_iommu_table_base_and_group(&pdev->dev, &phb->p5ioc2.iommu_table); + set_iommu_table_base(&pdev->dev, tbl); + iommu_add_device(&pdev->dev); } +static const struct pci_controller_ops pnv_pci_p5ioc2_controller_ops = { + .dma_dev_setup = pnv_pci_dma_dev_setup, +#ifdef CONFIG_PCI_MSI + .setup_msi_irqs = pnv_setup_msi_irqs, + .teardown_msi_irqs = pnv_teardown_msi_irqs, +#endif +}; + static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, void *tce_mem, u64 tce_size) { @@ -103,6 +127,8 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, u64 phb_id; int64_t rc; static int primary = 1; + struct iommu_table_group *table_group; + struct iommu_table *tbl; pr_info(" Initializing p5ioc2 PHB %s\n", np->full_name); @@ -133,7 +159,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, phb->hose->first_busno = 0; phb->hose->last_busno = 0xff; phb->hose->private_data = phb; - phb->hose->controller_ops = pnv_pci_controller_ops; + phb->hose->controller_ops = pnv_pci_p5ioc2_controller_ops; phb->hub_id = hub_id; phb->opal_id = phb_id; phb->type = PNV_PHB_P5IOC2; @@ -172,6 +198,15 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table, tce_mem, tce_size, 0, IOMMU_PAGE_SHIFT_4K); + /* + * We do not allocate iommu_table as we do not support + * hotplug or SRIOV on P5IOC2 and therefore iommu_free_table() + * should not be called for phb->p5ioc2.table_group.tables[0] ever. + */ + tbl = phb->p5ioc2.table_group.tables[0] = &phb->p5ioc2.iommu_table; + table_group = &phb->p5ioc2.table_group; + table_group->tce32_start = tbl->it_offset << tbl->it_page_shift; + table_group->tce32_size = tbl->it_size << tbl->it_page_shift; } void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) diff --git a/kernel/arch/powerpc/platforms/powernv/pci.c b/kernel/arch/powerpc/platforms/powernv/pci.c index bca2aeb6e..ad8c3f4a5 100644 --- a/kernel/arch/powerpc/platforms/powernv/pci.c +++ b/kernel/arch/powerpc/platforms/powernv/pci.c @@ -45,7 +45,7 @@ //#define cfg_dbg(fmt...) printk(fmt) #ifdef CONFIG_PCI_MSI -static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; @@ -61,7 +61,7 @@ static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) if (pdev->no_64bit_msi && !phb->msi32_support) return -ENODEV; - list_for_each_entry(entry, &pdev->msi_list, list) { + for_each_pci_msi_entry(entry, pdev) { if (!entry->msi_attrib.is_64 && !phb->msi32_support) { pr_warn("%s: Supports only 64-bit MSIs\n", pci_name(pdev)); @@ -94,22 +94,23 @@ static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return 0; } -static void pnv_teardown_msi_irqs(struct pci_dev *pdev) +void pnv_teardown_msi_irqs(struct pci_dev *pdev) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; struct msi_desc *entry; + irq_hw_number_t hwirq; if (WARN_ON(!phb)) return; - list_for_each_entry(entry, &pdev->msi_list, list) { + for_each_pci_msi_entry(entry, pdev) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&phb->msi_bmp, - virq_to_hw(entry->irq) - phb->msi_base, 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1); } } #endif /* CONFIG_PCI_MSI */ @@ -572,80 +573,158 @@ struct pci_ops pnv_pci_ops = { .write = pnv_pci_write_config, }; -static int pnv_tce_build(struct iommu_table *tbl, long index, long npages, - unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs, bool rm) +static __be64 *pnv_tce(struct iommu_table *tbl, long idx) { - u64 proto_tce; - __be64 *tcep, *tces; - u64 rpn; + __be64 *tmp = ((__be64 *)tbl->it_base); + int level = tbl->it_indirect_levels; + const long shift = ilog2(tbl->it_level_size); + unsigned long mask = (tbl->it_level_size - 1) << (level * shift); + + while (level) { + int n = (idx & mask) >> (level * shift); + unsigned long tce = be64_to_cpu(tmp[n]); + + tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE)); + idx &= ~mask; + mask >>= shift; + --level; + } - proto_tce = TCE_PCI_READ; // Read allowed + return tmp + idx; +} - if (direction != DMA_TO_DEVICE) - proto_tce |= TCE_PCI_WRITE; +int pnv_tce_build(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + u64 proto_tce = iommu_direction_to_tce_perm(direction); + u64 rpn = __pa(uaddr) >> tbl->it_page_shift; + long i; - tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; - rpn = __pa(uaddr) >> tbl->it_page_shift; + if (proto_tce & TCE_PCI_WRITE) + proto_tce |= TCE_PCI_READ; - while (npages--) - *(tcep++) = cpu_to_be64(proto_tce | - (rpn++ << tbl->it_page_shift)); + for (i = 0; i < npages; i++) { + unsigned long newtce = proto_tce | + ((rpn + i) << tbl->it_page_shift); + unsigned long idx = index - tbl->it_offset + i; - /* Some implementations won't cache invalid TCEs and thus may not - * need that flush. We'll probably turn it_type into a bit mask - * of flags if that becomes the case - */ - if (tbl->it_type & TCE_PCI_SWINV_CREATE) - pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm); + *(pnv_tce(tbl, idx)) = cpu_to_be64(newtce); + } return 0; } -static int pnv_tce_build_vm(struct iommu_table *tbl, long index, long npages, - unsigned long uaddr, - enum dma_data_direction direction, - struct dma_attrs *attrs) +#ifdef CONFIG_IOMMU_API +int pnv_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction) { - return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, - false); + u64 proto_tce = iommu_direction_to_tce_perm(*direction); + unsigned long newtce = *hpa | proto_tce, oldtce; + unsigned long idx = index - tbl->it_offset; + + BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)); + + if (newtce & TCE_PCI_WRITE) + newtce |= TCE_PCI_READ; + + oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce)); + *hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE); + *direction = iommu_tce_direction(oldtce); + + return 0; } +#endif -static void pnv_tce_free(struct iommu_table *tbl, long index, long npages, - bool rm) +void pnv_tce_free(struct iommu_table *tbl, long index, long npages) { - __be64 *tcep, *tces; + long i; - tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; + for (i = 0; i < npages; i++) { + unsigned long idx = index - tbl->it_offset + i; - while (npages--) - *(tcep++) = cpu_to_be64(0); + *(pnv_tce(tbl, idx)) = cpu_to_be64(0); + } +} - if (tbl->it_type & TCE_PCI_SWINV_FREE) - pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm); +unsigned long pnv_tce_get(struct iommu_table *tbl, long index) +{ + return *(pnv_tce(tbl, index - tbl->it_offset)); } -static void pnv_tce_free_vm(struct iommu_table *tbl, long index, long npages) +struct iommu_table *pnv_pci_table_alloc(int nid) { - pnv_tce_free(tbl, index, npages, false); + struct iommu_table *tbl; + + tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid); + INIT_LIST_HEAD_RCU(&tbl->it_group_list); + + return tbl; } -static unsigned long pnv_tce_get(struct iommu_table *tbl, long index) +long pnv_pci_link_table_and_group(int node, int num, + struct iommu_table *tbl, + struct iommu_table_group *table_group) { - return ((u64 *)tbl->it_base)[index - tbl->it_offset]; + struct iommu_table_group_link *tgl = NULL; + + if (WARN_ON(!tbl || !table_group)) + return -EINVAL; + + tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, + node); + if (!tgl) + return -ENOMEM; + + tgl->table_group = table_group; + list_add_rcu(&tgl->next, &tbl->it_group_list); + + table_group->tables[num] = tbl; + + return 0; } -static int pnv_tce_build_rm(struct iommu_table *tbl, long index, long npages, - unsigned long uaddr, - enum dma_data_direction direction, - struct dma_attrs *attrs) +static void pnv_iommu_table_group_link_free(struct rcu_head *head) { - return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, true); + struct iommu_table_group_link *tgl = container_of(head, + struct iommu_table_group_link, rcu); + + kfree(tgl); } -static void pnv_tce_free_rm(struct iommu_table *tbl, long index, long npages) +void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, + struct iommu_table_group *table_group) { - pnv_tce_free(tbl, index, npages, true); + long i; + bool found; + struct iommu_table_group_link *tgl; + + if (!tbl || !table_group) + return; + + /* Remove link to a group from table's list of attached groups */ + found = false; + list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { + if (tgl->table_group == table_group) { + list_del_rcu(&tgl->next); + call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free); + found = true; + break; + } + } + if (WARN_ON(!found)) + return; + + /* Clean a pointer to iommu_table in iommu_table_group::tables[] */ + found = false; + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + if (table_group->tables[i] == tbl) { + table_group->tables[i] = NULL; + found = true; + break; + } + } + WARN_ON(!found); } void pnv_pci_setup_iommu_table(struct iommu_table *tbl, @@ -662,7 +741,7 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl, tbl->it_type = TCE_PCI; } -static void pnv_pci_dma_dev_setup(struct pci_dev *pdev) +void pnv_pci_dma_dev_setup(struct pci_dev *pdev) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; @@ -689,37 +768,33 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev) phb->dma_dev_setup(phb, pdev); } -int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) +void pnv_pci_dma_bus_setup(struct pci_bus *bus) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pci_controller *hose = bus->sysdata; struct pnv_phb *phb = hose->private_data; + struct pnv_ioda_pe *pe; - if (phb && phb->dma_set_mask) - return phb->dma_set_mask(phb, pdev, dma_mask); - return __dma_set_mask(&pdev->dev, dma_mask); -} - -u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + list_for_each_entry(pe, &phb->ioda.pe_list, list) { + if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))) + continue; - if (phb && phb->dma_get_required_mask) - return phb->dma_get_required_mask(phb, pdev); + if (!pe->pbus) + continue; - return __dma_get_required_mask(&pdev->dev); + if (bus->number == ((pe->rid >> 8) & 0xFF)) { + pe->pbus = bus; + break; + } + } } void pnv_pci_shutdown(void) { struct pci_controller *hose; - list_for_each_entry(hose, &hose_list, list_node) { - struct pnv_phb *phb = hose->private_data; - - if (phb && phb->shutdown) - phb->shutdown(phb); - } + list_for_each_entry(hose, &hose_list, list_node) + if (hose->controller_ops.shutdown) + hose->controller_ops.shutdown(hose); } /* Fixup wrong class code in p7ioc and p8 root complex */ @@ -762,22 +837,7 @@ void __init pnv_pci_init(void) pci_devs_phb_init(); /* Configure IOMMU DMA hooks */ - ppc_md.tce_build = pnv_tce_build_vm; - ppc_md.tce_free = pnv_tce_free_vm; - ppc_md.tce_build_rm = pnv_tce_build_rm; - ppc_md.tce_free_rm = pnv_tce_free_rm; - ppc_md.tce_get = pnv_tce_get; set_pci_dma_ops(&dma_iommu_ops); - - /* Configure MSIs */ -#ifdef CONFIG_PCI_MSI - ppc_md.setup_msi_irqs = pnv_setup_msi_irqs; - ppc_md.teardown_msi_irqs = pnv_teardown_msi_irqs; -#endif } machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init); - -struct pci_controller_ops pnv_pci_controller_ops = { - .dma_dev_setup = pnv_pci_dma_dev_setup, -}; diff --git a/kernel/arch/powerpc/platforms/powernv/pci.h b/kernel/arch/powerpc/platforms/powernv/pci.h index 070ee888f..36a99feab 100644 --- a/kernel/arch/powerpc/platforms/powernv/pci.h +++ b/kernel/arch/powerpc/platforms/powernv/pci.h @@ -57,8 +57,7 @@ struct pnv_ioda_pe { /* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */ int tce32_seg; int tce32_segcount; - struct iommu_table *tce32_table; - phys_addr_t tce_inval_reg_phys; + struct iommu_table_group table_group; /* 64-bit TCE bypass region */ bool tce_bypass_enabled; @@ -106,16 +105,12 @@ struct pnv_phb { unsigned int hwirq, unsigned int virq, unsigned int is_64, struct msi_msg *msg); void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); - int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev, - u64 dma_mask); - u64 (*dma_get_required_mask)(struct pnv_phb *phb, - struct pci_dev *pdev); void (*fixup_phb)(struct pci_controller *hose); u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); - void (*shutdown)(struct pnv_phb *phb); int (*init_m64)(struct pnv_phb *phb); - void (*reserve_m64_pe)(struct pnv_phb *phb); - int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all); + void (*reserve_m64_pe)(struct pci_bus *bus, + unsigned long *pe_bitmap, bool all); + int (*pick_m64_pe)(struct pci_bus *bus, bool all); int (*get_pe_state)(struct pnv_phb *phb, int pe_no); void (*freeze_pe)(struct pnv_phb *phb, int pe_no); int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt); @@ -123,6 +118,7 @@ struct pnv_phb { union { struct { struct iommu_table iommu_table; + struct iommu_table_group table_group; } p5ioc2; struct { @@ -186,6 +182,12 @@ struct pnv_phb { * boot for resource allocation purposes */ struct list_head pe_dma_list; + + /* TCE cache invalidate registers (physical and + * remapped) + */ + phys_addr_t tce_inval_reg_phys; + __be64 __iomem *tce_inval_reg; } ioda; }; @@ -200,6 +202,13 @@ struct pnv_phb { }; extern struct pci_ops pnv_pci_ops; +extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction, + struct dma_attrs *attrs); +extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages); +extern int pnv_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction); +extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index); void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, unsigned char *log_buff); @@ -207,6 +216,13 @@ int pnv_pci_cfg_read(struct pci_dn *pdn, int where, int size, u32 *val); int pnv_pci_cfg_write(struct pci_dn *pdn, int where, int size, u32 val); +extern struct iommu_table *pnv_pci_table_alloc(int nid); + +extern long pnv_pci_link_table_and_group(int node, int num, + struct iommu_table *tbl, + struct iommu_table_group *table_group); +extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, + struct iommu_table_group *table_group); extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned page_shift); @@ -218,4 +234,9 @@ extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); +extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev); +extern void pnv_pci_dma_bus_setup(struct pci_bus *bus); +extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); +extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); + #endif /* __POWERNV_PCI_H */ diff --git a/kernel/arch/powerpc/platforms/powernv/powernv.h b/kernel/arch/powerpc/platforms/powernv/powernv.h index 826d2c9be..6dbc0a1da 100644 --- a/kernel/arch/powerpc/platforms/powernv/powernv.h +++ b/kernel/arch/powerpc/platforms/powernv/powernv.h @@ -12,29 +12,18 @@ struct pci_dev; #ifdef CONFIG_PCI extern void pnv_pci_init(void); extern void pnv_pci_shutdown(void); -extern int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask); -extern u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev); #else static inline void pnv_pci_init(void) { } static inline void pnv_pci_shutdown(void) { } - -static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) -{ - return -ENODEV; -} - -static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) -{ - return 0; -} #endif -extern struct pci_controller_ops pnv_pci_controller_ops; - extern u32 pnv_get_supported_cpuidle_states(void); extern void pnv_lpc_init(void); +extern void opal_handle_events(uint64_t events); +extern void opal_event_shutdown(void); + bool cpu_core_split_required(void); #endif /* _POWERNV_H */ diff --git a/kernel/arch/powerpc/platforms/powernv/rng.c b/kernel/arch/powerpc/platforms/powernv/rng.c index 6eb808ff6..5dcbdea1a 100644 --- a/kernel/arch/powerpc/platforms/powernv/rng.c +++ b/kernel/arch/powerpc/platforms/powernv/rng.c @@ -128,7 +128,7 @@ static __init int rng_create(struct device_node *dn) pr_info_once("Registering arch random hook.\n"); - ppc_md.get_random_long = powernv_get_random_long; + ppc_md.get_random_seed = powernv_get_random_long; return 0; } diff --git a/kernel/arch/powerpc/platforms/powernv/setup.c b/kernel/arch/powerpc/platforms/powernv/setup.c index 16fdcb23f..a9a8fa37a 100644 --- a/kernel/arch/powerpc/platforms/powernv/setup.c +++ b/kernel/arch/powerpc/platforms/powernv/setup.c @@ -35,12 +35,8 @@ #include <asm/opal.h> #include <asm/kexec.h> #include <asm/smp.h> -#include <asm/cputhreads.h> -#include <asm/cpuidle.h> -#include <asm/code-patching.h> #include "powernv.h" -#include "subcore.h" static void __init pnv_setup_arch(void) { @@ -111,7 +107,7 @@ static void pnv_prepare_going_down(void) * Disable all notifiers from OPAL, we can't * service interrupts anymore anyway */ - opal_notifier_disable(); + opal_event_shutdown(); /* Soft disable interrupts */ local_irq_disable(); @@ -169,21 +165,6 @@ static void pnv_progress(char *s, unsigned short hex) { } -static int pnv_dma_set_mask(struct device *dev, u64 dma_mask) -{ - if (dev_is_pci(dev)) - return pnv_pci_dma_set_mask(to_pci_dev(dev), dma_mask); - return __dma_set_mask(dev, dma_mask); -} - -static u64 pnv_dma_get_required_mask(struct device *dev) -{ - if (dev_is_pci(dev)) - return pnv_pci_dma_get_required_mask(to_pci_dev(dev)); - - return __dma_get_required_mask(dev); -} - static void pnv_shutdown(void) { /* Let the PCI code clear up IODA tables */ @@ -206,7 +187,7 @@ static void pnv_kexec_wait_secondaries_down(void) for_each_online_cpu(i) { uint8_t status; - int64_t rc; + int64_t rc, timeout = 1000; if (i == my_cpu) continue; @@ -223,6 +204,18 @@ static void pnv_kexec_wait_secondaries_down(void) i, paca[i].hw_cpu_id); notified = i; } + + /* + * On crash secondaries might be unreachable or hung, + * so timeout if we've waited too long + * */ + mdelay(1); + if (timeout-- == 0) { + printk(KERN_ERR "kexec: timed out waiting for " + "cpu %d (physical %d) to enter OPAL\n", + i, paca[i].hw_cpu_id); + break; + } } } } @@ -244,16 +237,16 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) /* Return the CPU to OPAL */ opal_return_cpu(); - } else if (crash_shutdown) { - /* - * On crash, we don't wait for secondaries to go - * down as they might be unreachable or hung, so - * instead we just wait a bit and move on. - */ - mdelay(1); } else { /* Primary waits for the secondaries to have reached OPAL */ pnv_kexec_wait_secondaries_down(); + + /* + * We might be running as little-endian - now that interrupts + * are disabled, reset the HILE bit to big-endian so we don't + * take interrupts in the wrong endian later + */ + opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE); } } #endif /* CONFIG_KEXEC */ @@ -277,173 +270,6 @@ static void __init pnv_setup_machdep_opal(void) ppc_md.handle_hmi_exception = opal_handle_hmi_exception; } -static u32 supported_cpuidle_states; - -int pnv_save_sprs_for_winkle(void) -{ - int cpu; - int rc; - - /* - * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross - * all cpus at boot. Get these reg values of current cpu and use the - * same accross all cpus. - */ - uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; - uint64_t hid0_val = mfspr(SPRN_HID0); - uint64_t hid1_val = mfspr(SPRN_HID1); - uint64_t hid4_val = mfspr(SPRN_HID4); - uint64_t hid5_val = mfspr(SPRN_HID5); - uint64_t hmeer_val = mfspr(SPRN_HMEER); - - for_each_possible_cpu(cpu) { - uint64_t pir = get_hard_smp_processor_id(cpu); - uint64_t hsprg0_val = (uint64_t)&paca[cpu]; - - /* - * HSPRG0 is used to store the cpu's pointer to paca. Hence last - * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0 - * with 63rd bit set, so that when a thread wakes up at 0x100 we - * can use this bit to distinguish between fastsleep and - * deep winkle. - */ - hsprg0_val |= 1; - - rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); - if (rc != 0) - return rc; - - rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); - if (rc != 0) - return rc; - - /* HIDs are per core registers */ - if (cpu_thread_in_core(cpu) == 0) { - - rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); - if (rc != 0) - return rc; - - rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); - if (rc != 0) - return rc; - - rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); - if (rc != 0) - return rc; - - rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); - if (rc != 0) - return rc; - - rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); - if (rc != 0) - return rc; - } - } - - return 0; -} - -static void pnv_alloc_idle_core_states(void) -{ - int i, j; - int nr_cores = cpu_nr_cores(); - u32 *core_idle_state; - - /* - * core_idle_state - First 8 bits track the idle state of each thread - * of the core. The 8th bit is the lock bit. Initially all thread bits - * are set. They are cleared when the thread enters deep idle state - * like sleep and winkle. Initially the lock bit is cleared. - * The lock bit has 2 purposes - * a. While the first thread is restoring core state, it prevents - * other threads in the core from switching to process context. - * b. While the last thread in the core is saving the core state, it - * prevents a different thread from waking up. - */ - for (i = 0; i < nr_cores; i++) { - int first_cpu = i * threads_per_core; - int node = cpu_to_node(first_cpu); - - core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); - *core_idle_state = PNV_CORE_IDLE_THREAD_BITS; - - for (j = 0; j < threads_per_core; j++) { - int cpu = first_cpu + j; - - paca[cpu].core_idle_state_ptr = core_idle_state; - paca[cpu].thread_idle_state = PNV_THREAD_RUNNING; - paca[cpu].thread_mask = 1 << j; - } - } - - update_subcore_sibling_mask(); - - if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) - pnv_save_sprs_for_winkle(); -} - -u32 pnv_get_supported_cpuidle_states(void) -{ - return supported_cpuidle_states; -} -EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); - -static int __init pnv_init_idle_states(void) -{ - struct device_node *power_mgt; - int dt_idle_states; - u32 *flags; - int i; - - supported_cpuidle_states = 0; - - if (cpuidle_disable != IDLE_NO_OVERRIDE) - goto out; - - if (!firmware_has_feature(FW_FEATURE_OPALv3)) - goto out; - - power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); - if (!power_mgt) { - pr_warn("opal: PowerMgmt Node not found\n"); - goto out; - } - dt_idle_states = of_property_count_u32_elems(power_mgt, - "ibm,cpu-idle-state-flags"); - if (dt_idle_states < 0) { - pr_warn("cpuidle-powernv: no idle states found in the DT\n"); - goto out; - } - - flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL); - if (of_property_read_u32_array(power_mgt, - "ibm,cpu-idle-state-flags", flags, dt_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); - goto out_free; - } - - for (i = 0; i < dt_idle_states; i++) - supported_cpuidle_states |= flags[i]; - - if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { - patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_entry, - PPC_INST_NOP); - patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_exit, - PPC_INST_NOP); - } - pnv_alloc_idle_core_states(); -out_free: - kfree(flags); -out: - return 0; -} - -subsys_initcall(pnv_init_idle_states); - static int __init pnv_probe(void) { unsigned long root = of_get_flat_dt_root(); @@ -492,8 +318,6 @@ define_machine(powernv) { .machine_shutdown = pnv_shutdown, .power_save = power7_idle, .calibrate_decr = generic_calibrate_decr, - .dma_set_mask = pnv_dma_set_mask, - .dma_get_required_mask = pnv_dma_get_required_mask, #ifdef CONFIG_KEXEC .kexec_cpu_down = pnv_kexec_cpu_down, #endif diff --git a/kernel/arch/powerpc/platforms/powernv/smp.c b/kernel/arch/powerpc/platforms/powernv/smp.c index 8f70ba681..ca264833e 100644 --- a/kernel/arch/powerpc/platforms/powernv/smp.c +++ b/kernel/arch/powerpc/platforms/powernv/smp.c @@ -171,7 +171,26 @@ static void pnv_smp_cpu_kill_self(void) * so clear LPCR:PECE1. We keep PECE2 enabled. */ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1); + + /* + * Hard-disable interrupts, and then clear irq_happened flags + * that we can safely ignore while off-line, since they + * are for things for which we do no processing when off-line + * (or in the case of HMI, all the processing we need to do + * is done in lower-level real-mode code). + */ + hard_irq_disable(); + local_paca->irq_happened &= ~(PACA_IRQ_DEC | PACA_IRQ_HMI); + while (!generic_check_cpu_restart(cpu)) { + /* + * Clear IPI flag, since we don't handle IPIs while + * offline, except for those when changing micro-threading + * mode, which are handled explicitly below, and those + * for coming online, which are handled via + * generic_check_cpu_restart() calls. + */ + kvmppc_set_host_ipi(cpu, 0); ppc64_runlatch_off(); @@ -196,20 +215,20 @@ static void pnv_smp_cpu_kill_self(void) * having finished executing in a KVM guest, then srr1 * contains 0. */ - if ((srr1 & wmask) == SRR1_WAKEEE) { + if (((srr1 & wmask) == SRR1_WAKEEE) || + (local_paca->irq_happened & PACA_IRQ_EE)) { icp_native_flush_interrupt(); - local_paca->irq_happened &= PACA_IRQ_HARD_DIS; - smp_mb(); } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); asm volatile(PPC_MSGCLR(%0) : : "r" (msg)); - kvmppc_set_host_ipi(cpu, 0); } + local_paca->irq_happened &= ~(PACA_IRQ_EE | PACA_IRQ_DBELL); + smp_mb(); if (cpu_core_split_required()) continue; - if (!generic_check_cpu_restart(cpu)) + if (srr1 && !generic_check_cpu_restart(cpu)) DBG("CPU%d Unexpected exit while offline !\n", cpu); } mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_PECE1); diff --git a/kernel/arch/powerpc/platforms/powernv/subcore.c b/kernel/arch/powerpc/platforms/powernv/subcore.c index f60f80ada..503a73f59 100644 --- a/kernel/arch/powerpc/platforms/powernv/subcore.c +++ b/kernel/arch/powerpc/platforms/powernv/subcore.c @@ -190,7 +190,7 @@ static void unsplit_core(void) hid0 = mfspr(SPRN_HID0); hid0 &= ~HID0_POWER8_DYNLPARDIS; - mtspr(SPRN_HID0, hid0); + update_power8_hid0(hid0); update_hid_in_slw(hid0); while (mfspr(SPRN_HID0) & mask) @@ -227,7 +227,7 @@ static void split_core(int new_mode) /* Write new mode */ hid0 = mfspr(SPRN_HID0); hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value; - mtspr(SPRN_HID0, hid0); + update_power8_hid0(hid0); update_hid_in_slw(hid0); /* Wait for it to happen */ diff --git a/kernel/arch/powerpc/platforms/ps3/Kconfig b/kernel/arch/powerpc/platforms/ps3/Kconfig index 56f274064..b27f40f26 100644 --- a/kernel/arch/powerpc/platforms/ps3/Kconfig +++ b/kernel/arch/powerpc/platforms/ps3/Kconfig @@ -1,6 +1,6 @@ config PPC_PS3 bool "Sony PS3" - depends on PPC64 && PPC_BOOK3S + depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN select PPC_CELL select USB_OHCI_LITTLE_ENDIAN select USB_OHCI_BIG_ENDIAN_MMIO diff --git a/kernel/arch/powerpc/platforms/ps3/interrupt.c b/kernel/arch/powerpc/platforms/ps3/interrupt.c index a6c42f343..638c40609 100644 --- a/kernel/arch/powerpc/platforms/ps3/interrupt.c +++ b/kernel/arch/powerpc/platforms/ps3/interrupt.c @@ -678,7 +678,8 @@ static int ps3_host_map(struct irq_domain *h, unsigned int virq, return 0; } -static int ps3_host_match(struct irq_domain *h, struct device_node *np) +static int ps3_host_match(struct irq_domain *h, struct device_node *np, + enum irq_domain_bus_token bus_token) { /* Match all */ return 1; diff --git a/kernel/arch/powerpc/platforms/ps3/os-area.c b/kernel/arch/powerpc/platforms/ps3/os-area.c index 097871398..3db53e8af 100644 --- a/kernel/arch/powerpc/platforms/ps3/os-area.c +++ b/kernel/arch/powerpc/platforms/ps3/os-area.c @@ -194,11 +194,6 @@ static const struct os_area_db_id os_area_db_id_rtc_diff = { .key = OS_AREA_DB_KEY_RTC_DIFF }; -static const struct os_area_db_id os_area_db_id_video_mode = { - .owner = OS_AREA_DB_OWNER_LINUX, - .key = OS_AREA_DB_KEY_VIDEO_MODE -}; - #define SECONDS_FROM_1970_TO_2000 946684800LL /** diff --git a/kernel/arch/powerpc/platforms/ps3/time.c b/kernel/arch/powerpc/platforms/ps3/time.c index ce73ce865..791c6142c 100644 --- a/kernel/arch/powerpc/platforms/ps3/time.c +++ b/kernel/arch/powerpc/platforms/ps3/time.c @@ -92,5 +92,4 @@ static int __init ps3_rtc_init(void) return PTR_ERR_OR_ZERO(pdev); } - -module_init(ps3_rtc_init); +device_initcall(ps3_rtc_init); diff --git a/kernel/arch/powerpc/platforms/pseries/Kconfig b/kernel/arch/powerpc/platforms/pseries/Kconfig index 54c87d5d3..bec90fb30 100644 --- a/kernel/arch/powerpc/platforms/pseries/Kconfig +++ b/kernel/arch/powerpc/platforms/pseries/Kconfig @@ -4,6 +4,7 @@ config PPC_PSERIES select HAVE_PCSPKR_PLATFORM select MPIC select OF_DYNAMIC + select PCI select PCI_MSI select PPC_XICS select PPC_ICP_NATIVE @@ -15,7 +16,6 @@ config PPC_PSERIES select RTAS_ERROR_LOGGING select PPC_UDBG_16550 select PPC_NATIVE - select PPC_PCI_CHOICE if EXPERT select PPC_DOORBELL select HAVE_CONTEXT_TRACKING select HOTPLUG_CPU if SMP @@ -43,11 +43,6 @@ config DTL Say N if you are unsure. -config PSERIES_MSI - bool - depends on PCI_MSI && PPC_PSERIES && EEH - default y - config PSERIES_ENERGY tristate "pSeries energy management capabilities driver" depends on PPC_PSERIES diff --git a/kernel/arch/powerpc/platforms/pseries/Makefile b/kernel/arch/powerpc/platforms/pseries/Makefile index 03480796a..fedc2ccf0 100644 --- a/kernel/arch/powerpc/platforms/pseries/Makefile +++ b/kernel/arch/powerpc/platforms/pseries/Makefile @@ -2,14 +2,13 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG obj-y := lpar.o hvCall.o nvram.o reconfig.o \ + of_helpers.o \ setup.o iommu.o event_sources.o ras.o \ - firmware.o power.o dlpar.o mobility.o rng.o + firmware.o power.o dlpar.o mobility.o rng.o \ + pci.o pci_dlpar.o eeh_pseries.o msi.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SCANLOG) += scanlog.o -obj-$(CONFIG_EEH) += eeh_pseries.o obj-$(CONFIG_KEXEC) += kexec.o -obj-$(CONFIG_PCI) += pci.o pci_dlpar.o -obj-$(CONFIG_PSERIES_MSI) += msi.o obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o diff --git a/kernel/arch/powerpc/platforms/pseries/dlpar.c b/kernel/arch/powerpc/platforms/pseries/dlpar.c index 019d34aaf..f244dcb4f 100644 --- a/kernel/arch/powerpc/platforms/pseries/dlpar.c +++ b/kernel/arch/powerpc/platforms/pseries/dlpar.c @@ -18,6 +18,8 @@ #include <linux/cpu.h> #include <linux/slab.h> #include <linux/of.h> + +#include "of_helpers.h" #include "offline_states.h" #include "pseries.h" @@ -244,36 +246,13 @@ cc_error: return first_dn; } -static struct device_node *derive_parent(const char *path) -{ - struct device_node *parent; - char *last_slash; - - last_slash = strrchr(path, '/'); - if (last_slash == path) { - parent = of_find_node_by_path("/"); - } else { - char *parent_path; - int parent_path_len = last_slash - path + 1; - parent_path = kmalloc(parent_path_len, GFP_KERNEL); - if (!parent_path) - return NULL; - - strlcpy(parent_path, path, parent_path_len); - parent = of_find_node_by_path(parent_path); - kfree(parent_path); - } - - return parent; -} - int dlpar_attach_node(struct device_node *dn) { int rc; - dn->parent = derive_parent(dn->full_name); - if (!dn->parent) - return -ENOMEM; + dn->parent = pseries_of_derive_parent(dn->full_name); + if (IS_ERR(dn->parent)) + return PTR_ERR(dn->parent); rc = of_attach_node(dn); if (rc) { @@ -421,10 +400,11 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count) return -ENODEV; dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent); - if (!dn) - return -EINVAL; - of_node_put(parent); + if (!dn) { + dlpar_release_drc(drc_index); + return -EINVAL; + } rc = dlpar_attach_node(dn); if (rc) { diff --git a/kernel/arch/powerpc/platforms/pseries/eeh_pseries.c b/kernel/arch/powerpc/platforms/pseries/eeh_pseries.c index 2039397cc..ac3ffd97e 100644 --- a/kernel/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/kernel/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -433,42 +433,34 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *state) return ret; /* Parse the result out */ - result = 0; - if (rets[1]) { - switch(rets[0]) { - case 0: - result &= ~EEH_STATE_RESET_ACTIVE; - result |= EEH_STATE_MMIO_ACTIVE; - result |= EEH_STATE_DMA_ACTIVE; - break; - case 1: - result |= EEH_STATE_RESET_ACTIVE; - result |= EEH_STATE_MMIO_ACTIVE; - result |= EEH_STATE_DMA_ACTIVE; - break; - case 2: - result &= ~EEH_STATE_RESET_ACTIVE; - result &= ~EEH_STATE_MMIO_ACTIVE; - result &= ~EEH_STATE_DMA_ACTIVE; - break; - case 4: - result &= ~EEH_STATE_RESET_ACTIVE; - result &= ~EEH_STATE_MMIO_ACTIVE; - result &= ~EEH_STATE_DMA_ACTIVE; - result |= EEH_STATE_MMIO_ENABLED; - break; - case 5: - if (rets[2]) { - if (state) *state = rets[2]; - result = EEH_STATE_UNAVAILABLE; - } else { - result = EEH_STATE_NOT_SUPPORT; - } - break; - default: + if (!rets[1]) + return EEH_STATE_NOT_SUPPORT; + + switch(rets[0]) { + case 0: + result = EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE; + break; + case 1: + result = EEH_STATE_RESET_ACTIVE | + EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE; + break; + case 2: + result = 0; + break; + case 4: + result = EEH_STATE_MMIO_ENABLED; + break; + case 5: + if (rets[2]) { + if (state) *state = rets[2]; + result = EEH_STATE_UNAVAILABLE; + } else { result = EEH_STATE_NOT_SUPPORT; } - } else { + break; + default: result = EEH_STATE_NOT_SUPPORT; } @@ -519,7 +511,7 @@ static int pseries_eeh_reset(struct eeh_pe *pe, int option) /** * pseries_eeh_wait_state - Wait for PE state * @pe: EEH PE - * @max_wait: maximal period in microsecond + * @max_wait: maximal period in millisecond * * Wait for the state of associated PE. It might take some time * to retrieve the PE's state. diff --git a/kernel/arch/powerpc/platforms/pseries/hotplug-memory.c b/kernel/arch/powerpc/platforms/pseries/hotplug-memory.c index 0ced387e1..e9ff44cd5 100644 --- a/kernel/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/kernel/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -92,13 +92,12 @@ static struct property *dlpar_clone_drconf_property(struct device_node *dn) return NULL; new_prop->name = kstrdup(prop->name, GFP_KERNEL); - new_prop->value = kmalloc(prop->length, GFP_KERNEL); + new_prop->value = kmemdup(prop->value, prop->length, GFP_KERNEL); if (!new_prop->name || !new_prop->value) { dlpar_free_drconf_property(new_prop); return NULL; } - memcpy(new_prop->value, prop->value, prop->length); new_prop->length = prop->length; /* Convert the property to cpu endian-ness */ diff --git a/kernel/arch/powerpc/platforms/pseries/hvcserver.c b/kernel/arch/powerpc/platforms/pseries/hvcserver.c index eedb64594..94a6e5612 100644 --- a/kernel/arch/powerpc/platforms/pseries/hvcserver.c +++ b/kernel/arch/powerpc/platforms/pseries/hvcserver.c @@ -142,11 +142,11 @@ int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head, int more = 1; int retval; - memset(pi_buff, 0x00, PAGE_SIZE); /* invalid parameters */ if (!head || !pi_buff) return -EINVAL; + memset(pi_buff, 0x00, PAGE_SIZE); last_p_partition_ID = last_p_unit_address = ~0UL; INIT_LIST_HEAD(head); diff --git a/kernel/arch/powerpc/platforms/pseries/iommu.c b/kernel/arch/powerpc/platforms/pseries/iommu.c index 61d5a17f4..bd98ce2be 100644 --- a/kernel/arch/powerpc/platforms/pseries/iommu.c +++ b/kernel/arch/powerpc/platforms/pseries/iommu.c @@ -36,6 +36,8 @@ #include <linux/crash_dump.h> #include <linux/memory.h> #include <linux/of.h> +#include <linux/iommu.h> +#include <linux/rculist.h> #include <asm/io.h> #include <asm/prom.h> #include <asm/rtas.h> @@ -51,6 +53,73 @@ #include "pseries.h" +static struct iommu_table_group *iommu_pseries_alloc_group(int node) +{ + struct iommu_table_group *table_group = NULL; + struct iommu_table *tbl = NULL; + struct iommu_table_group_link *tgl = NULL; + + table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL, + node); + if (!table_group) + goto fail_exit; + + tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node); + if (!tbl) + goto fail_exit; + + tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, + node); + if (!tgl) + goto fail_exit; + + INIT_LIST_HEAD_RCU(&tbl->it_group_list); + tgl->table_group = table_group; + list_add_rcu(&tgl->next, &tbl->it_group_list); + + table_group->tables[0] = tbl; + + return table_group; + +fail_exit: + kfree(tgl); + kfree(table_group); + kfree(tbl); + + return NULL; +} + +static void iommu_pseries_free_group(struct iommu_table_group *table_group, + const char *node_name) +{ + struct iommu_table *tbl; +#ifdef CONFIG_IOMMU_API + struct iommu_table_group_link *tgl; +#endif + + if (!table_group) + return; + + tbl = table_group->tables[0]; +#ifdef CONFIG_IOMMU_API + tgl = list_first_entry_or_null(&tbl->it_group_list, + struct iommu_table_group_link, next); + + WARN_ON_ONCE(!tgl); + if (tgl) { + list_del_rcu(&tgl->next); + kfree(tgl); + } + if (table_group->group) { + iommu_group_put(table_group->group); + BUG_ON(table_group->group); + } +#endif + iommu_free_table(tbl, node_name); + + kfree(table_group); +} + static void tce_invalidate_pSeries_sw(struct iommu_table *tbl, __be64 *startp, __be64 *endp) { @@ -193,7 +262,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, int ret = 0; unsigned long flags; - if (npages == 1) { + if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) { return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); } @@ -285,6 +354,9 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n { u64 rc; + if (!firmware_has_feature(FW_FEATURE_MULTITCE)) + return tce_free_pSeriesLP(tbl, tcenum, npages); + rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages); if (rc && printk_ratelimit()) { @@ -460,8 +532,6 @@ static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn, return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg); } - -#ifdef CONFIG_PCI static void iommu_table_setparms(struct pci_controller *phb, struct device_node *dn, struct iommu_table *tbl) @@ -546,6 +616,12 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb, tbl->it_size = size >> tbl->it_page_shift; } +struct iommu_table_ops iommu_table_pseries_ops = { + .set = tce_build_pSeries, + .clear = tce_free_pSeries, + .get = tce_get_pseries +}; + static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) { struct device_node *dn; @@ -610,12 +686,13 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) pci->phb->dma_window_size = 0x8000000ul; pci->phb->dma_window_base_cur = 0x8000000ul; - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - pci->phb->node); + pci->table_group = iommu_pseries_alloc_group(pci->phb->node); + tbl = pci->table_group->tables[0]; iommu_table_setparms(pci->phb, dn, tbl); - pci->iommu_table = iommu_init_table(tbl, pci->phb->node); - iommu_register_group(tbl, pci_domain_nr(bus), 0); + tbl->it_ops = &iommu_table_pseries_ops; + iommu_init_table(tbl, pci->phb->node); + iommu_register_group(pci->table_group, pci_domain_nr(bus), 0); /* Divide the rest (1.75GB) among the children */ pci->phb->dma_window_size = 0x80000000ul; @@ -625,6 +702,11 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size); } +struct iommu_table_ops iommu_table_lpar_multi_ops = { + .set = tce_buildmulti_pSeriesLP, + .clear = tce_freemulti_pSeriesLP, + .get = tce_get_pSeriesLP +}; static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) { @@ -653,15 +735,17 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) ppci = PCI_DN(pdn); pr_debug(" parent is %s, iommu_table: 0x%p\n", - pdn->full_name, ppci->iommu_table); + pdn->full_name, ppci->table_group); - if (!ppci->iommu_table) { - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - ppci->phb->node); + if (!ppci->table_group) { + ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); + tbl = ppci->table_group->tables[0]; iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); - ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); - iommu_register_group(tbl, pci_domain_nr(bus), 0); - pr_debug(" created table: %p\n", ppci->iommu_table); + tbl->it_ops = &iommu_table_lpar_multi_ops; + iommu_init_table(tbl, ppci->phb->node); + iommu_register_group(ppci->table_group, + pci_domain_nr(bus), 0); + pr_debug(" created table: %p\n", ppci->table_group); } } @@ -683,13 +767,15 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) struct pci_controller *phb = PCI_DN(dn)->phb; pr_debug(" --> first child, no bridge. Allocating iommu table.\n"); - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - phb->node); + PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node); + tbl = PCI_DN(dn)->table_group->tables[0]; iommu_table_setparms(phb, dn, tbl); - PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); - iommu_register_group(tbl, pci_domain_nr(phb->bus), 0); - set_iommu_table_base_and_group(&dev->dev, - PCI_DN(dn)->iommu_table); + tbl->it_ops = &iommu_table_pseries_ops; + iommu_init_table(tbl, phb->node); + iommu_register_group(PCI_DN(dn)->table_group, + pci_domain_nr(phb->bus), 0); + set_iommu_table_base(&dev->dev, tbl); + iommu_add_device(&dev->dev); return; } @@ -697,13 +783,14 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) * an already allocated iommu table is found and use that. */ - while (dn && PCI_DN(dn) && PCI_DN(dn)->iommu_table == NULL) + while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL) dn = dn->parent; - if (dn && PCI_DN(dn)) - set_iommu_table_base_and_group(&dev->dev, - PCI_DN(dn)->iommu_table); - else + if (dn && PCI_DN(dn)) { + set_iommu_table_base(&dev->dev, + PCI_DN(dn)->table_group->tables[0]); + iommu_add_device(&dev->dev); + } else printk(KERN_WARNING "iommu: Device %s has no iommu table\n", pci_name(dev)); } @@ -1088,7 +1175,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) dn = pci_device_to_OF_node(dev); pr_debug(" node is %s\n", dn->full_name); - for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table; + for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; pdn = pdn->parent) { dma_window = of_get_property(pdn, "ibm,dma-window", NULL); if (dma_window) @@ -1104,18 +1191,21 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) pr_debug(" parent is %s\n", pdn->full_name); pci = PCI_DN(pdn); - if (!pci->iommu_table) { - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - pci->phb->node); + if (!pci->table_group) { + pci->table_group = iommu_pseries_alloc_group(pci->phb->node); + tbl = pci->table_group->tables[0]; iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); - pci->iommu_table = iommu_init_table(tbl, pci->phb->node); - iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0); - pr_debug(" created table: %p\n", pci->iommu_table); + tbl->it_ops = &iommu_table_lpar_multi_ops; + iommu_init_table(tbl, pci->phb->node); + iommu_register_group(pci->table_group, + pci_domain_nr(pci->phb->bus), 0); + pr_debug(" created table: %p\n", pci->table_group); } else { - pr_debug(" found DMA window, table: %p\n", pci->iommu_table); + pr_debug(" found DMA window, table: %p\n", pci->table_group); } - set_iommu_table_base_and_group(&dev->dev, pci->iommu_table); + set_iommu_table_base(&dev->dev, pci->table_group->tables[0]); + iommu_add_device(&dev->dev); } static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) @@ -1145,7 +1235,7 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) * search upwards in the tree until we either hit a dma-window * property, OR find a parent with a table already allocated. */ - for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table; + for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; pdn = pdn->parent) { dma_window = of_get_property(pdn, "ibm,dma-window", NULL); if (dma_window) @@ -1162,11 +1252,10 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) } } - /* fall back on iommu ops, restore table pointer with ops */ + /* fall back on iommu ops */ if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) { dev_info(dev, "Restoring 32-bit DMA via iommu\n"); set_dma_ops(dev, &dma_iommu_ops); - pci_dma_dev_setup_pSeriesLP(pdev); } check_mask: @@ -1189,7 +1278,7 @@ static u64 dma_get_required_mask_pSeriesLP(struct device *dev) dn = pci_device_to_OF_node(pdev); /* search upwards for ibm,dma-window */ - for (; dn && PCI_DN(dn) && !PCI_DN(dn)->iommu_table; + for (; dn && PCI_DN(dn) && !PCI_DN(dn)->table_group; dn = dn->parent) if (of_get_property(dn, "ibm,dma-window", NULL)) break; @@ -1202,15 +1291,6 @@ static u64 dma_get_required_mask_pSeriesLP(struct device *dev) return dma_iommu_ops.get_required_mask(dev); } -#else /* CONFIG_PCI */ -#define pci_dma_bus_setup_pSeries NULL -#define pci_dma_dev_setup_pSeries NULL -#define pci_dma_bus_setup_pSeriesLP NULL -#define pci_dma_dev_setup_pSeriesLP NULL -#define dma_set_mask_pSeriesLP NULL -#define dma_get_required_mask_pSeriesLP NULL -#endif /* !CONFIG_PCI */ - static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -1269,8 +1349,9 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti * the device node. */ remove_ddw(np, false); - if (pci && pci->iommu_table) - iommu_free_table(pci->iommu_table, np->full_name); + if (pci && pci->table_group) + iommu_pseries_free_group(pci->table_group, + np->full_name); spin_lock(&direct_window_list_lock); list_for_each_entry(window, &direct_window_list, list) { @@ -1300,22 +1381,11 @@ void iommu_init_early_pSeries(void) return; if (firmware_has_feature(FW_FEATURE_LPAR)) { - if (firmware_has_feature(FW_FEATURE_MULTITCE)) { - ppc_md.tce_build = tce_buildmulti_pSeriesLP; - ppc_md.tce_free = tce_freemulti_pSeriesLP; - } else { - ppc_md.tce_build = tce_build_pSeriesLP; - ppc_md.tce_free = tce_free_pSeriesLP; - } - ppc_md.tce_get = tce_get_pSeriesLP; pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP; pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP; ppc_md.dma_set_mask = dma_set_mask_pSeriesLP; ppc_md.dma_get_required_mask = dma_get_required_mask_pSeriesLP; } else { - ppc_md.tce_build = tce_build_pSeries; - ppc_md.tce_free = tce_free_pSeries; - ppc_md.tce_get = tce_get_pseries; pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries; pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries; } @@ -1333,8 +1403,6 @@ static int __init disable_multitce(char *str) firmware_has_feature(FW_FEATURE_LPAR) && firmware_has_feature(FW_FEATURE_MULTITCE)) { printk(KERN_INFO "Disabling MULTITCE firmware feature\n"); - ppc_md.tce_build = tce_build_pSeriesLP; - ppc_md.tce_free = tce_free_pSeriesLP; powerpc_firmware_features &= ~FW_FEATURE_MULTITCE; } return 1; diff --git a/kernel/arch/powerpc/platforms/pseries/msi.c b/kernel/arch/powerpc/platforms/pseries/msi.c index c8d24f9a6..272e9ec1a 100644 --- a/kernel/arch/powerpc/platforms/pseries/msi.c +++ b/kernel/arch/powerpc/platforms/pseries/msi.c @@ -18,6 +18,8 @@ #include <asm/ppc-pci.h> #include <asm/machdep.h> +#include "pseries.h" + static int query_token, change_token; #define RTAS_QUERY_FN 0 @@ -116,7 +118,7 @@ static void rtas_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; - list_for_each_entry(entry, &pdev->msi_list, list) { + for_each_pci_msi_entry(entry, pdev) { if (entry->irq == NO_IRQ) continue; @@ -348,7 +350,7 @@ static int check_msix_entries(struct pci_dev *pdev) * So we must reject such requests. */ expected = 0; - list_for_each_entry(entry, &pdev->msi_list, list) { + for_each_pci_msi_entry(entry, pdev) { if (entry->msi_attrib.entry_nr != expected) { pr_debug("rtas_msi: bad MSI-X entries.\n"); return -EINVAL; @@ -460,7 +462,7 @@ again: } i = 0; - list_for_each_entry(entry, &pdev->msi_list, list) { + for_each_pci_msi_entry(entry, pdev) { hwirq = rtas_query_irq_number(pdn, i++); if (hwirq < 0) { pr_debug("rtas_msi: error (%d) getting hwirq\n", rc); @@ -505,6 +507,8 @@ static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev) static int rtas_msi_init(void) { + struct pci_controller *phb; + query_token = rtas_token("ibm,query-interrupt-source-number"); change_token = rtas_token("ibm,change-msi"); @@ -516,9 +520,15 @@ static int rtas_msi_init(void) pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n"); - WARN_ON(ppc_md.setup_msi_irqs); - ppc_md.setup_msi_irqs = rtas_setup_msi_irqs; - ppc_md.teardown_msi_irqs = rtas_teardown_msi_irqs; + WARN_ON(pseries_pci_controller_ops.setup_msi_irqs); + pseries_pci_controller_ops.setup_msi_irqs = rtas_setup_msi_irqs; + pseries_pci_controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs; + + list_for_each_entry(phb, &hose_list, list_node) { + WARN_ON(phb->controller_ops.setup_msi_irqs); + phb->controller_ops.setup_msi_irqs = rtas_setup_msi_irqs; + phb->controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs; + } WARN_ON(ppc_md.pci_irq_fixup); ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup; diff --git a/kernel/arch/powerpc/platforms/pseries/of_helpers.c b/kernel/arch/powerpc/platforms/pseries/of_helpers.c new file mode 100644 index 000000000..2798933c0 --- /dev/null +++ b/kernel/arch/powerpc/platforms/pseries/of_helpers.c @@ -0,0 +1,38 @@ +#include <linux/string.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/of.h> + +#include "of_helpers.h" + +/** + * pseries_of_derive_parent - basically like dirname(1) + * @path: the full_name of a node to be added to the tree + * + * Returns the node which should be the parent of the node + * described by path. E.g., for path = "/foo/bar", returns + * the node with full_name = "/foo". + */ +struct device_node *pseries_of_derive_parent(const char *path) +{ + struct device_node *parent; + char *parent_path = "/"; + const char *tail; + + /* We do not want the trailing '/' character */ + tail = kbasename(path) - 1; + + /* reject if path is "/" */ + if (!strcmp(path, "/")) + return ERR_PTR(-EINVAL); + + if (tail > path) { + parent_path = kstrndup(path, tail - path, GFP_KERNEL); + if (!parent_path) + return ERR_PTR(-ENOMEM); + } + parent = of_find_node_by_path(parent_path); + if (strcmp(parent_path, "/")) + kfree(parent_path); + return parent ? parent : ERR_PTR(-EINVAL); +} diff --git a/kernel/arch/powerpc/platforms/pseries/of_helpers.h b/kernel/arch/powerpc/platforms/pseries/of_helpers.h new file mode 100644 index 000000000..bb83d39ae --- /dev/null +++ b/kernel/arch/powerpc/platforms/pseries/of_helpers.h @@ -0,0 +1,8 @@ +#ifndef _PSERIES_OF_HELPERS_H +#define _PSERIES_OF_HELPERS_H + +#include <linux/of.h> + +struct device_node *pseries_of_derive_parent(const char *path); + +#endif /* _PSERIES_OF_HELPERS_H */ diff --git a/kernel/arch/powerpc/platforms/pseries/reconfig.c b/kernel/arch/powerpc/platforms/pseries/reconfig.c index 0f319521e..7c7fcc042 100644 --- a/kernel/arch/powerpc/platforms/pseries/reconfig.c +++ b/kernel/arch/powerpc/platforms/pseries/reconfig.c @@ -22,37 +22,7 @@ #include <asm/uaccess.h> #include <asm/mmu.h> -/** - * derive_parent - basically like dirname(1) - * @path: the full_name of a node to be added to the tree - * - * Returns the node which should be the parent of the node - * described by path. E.g., for path = "/foo/bar", returns - * the node with full_name = "/foo". - */ -static struct device_node *derive_parent(const char *path) -{ - struct device_node *parent = NULL; - char *parent_path = "/"; - size_t parent_path_len = strrchr(path, '/') - path + 1; - - /* reject if path is "/" */ - if (!strcmp(path, "/")) - return ERR_PTR(-EINVAL); - - if (strrchr(path, '/') != path) { - parent_path = kmalloc(parent_path_len, GFP_KERNEL); - if (!parent_path) - return ERR_PTR(-ENOMEM); - strlcpy(parent_path, path, parent_path_len); - } - parent = of_find_node_by_path(parent_path); - if (!parent) - return ERR_PTR(-EINVAL); - if (strcmp(parent_path, "/")) - kfree(parent_path); - return parent; -} +#include "of_helpers.h" static int pSeries_reconfig_add_node(const char *path, struct property *proplist) { @@ -71,7 +41,7 @@ static int pSeries_reconfig_add_node(const char *path, struct property *proplist of_node_set_flag(np, OF_DYNAMIC); of_node_init(np); - np->parent = derive_parent(path); + np->parent = pseries_of_derive_parent(path); if (IS_ERR(np->parent)) { err = PTR_ERR(np->parent); goto out_err; diff --git a/kernel/arch/powerpc/platforms/pseries/rng.c b/kernel/arch/powerpc/platforms/pseries/rng.c index e09608770..31ca557af 100644 --- a/kernel/arch/powerpc/platforms/pseries/rng.c +++ b/kernel/arch/powerpc/platforms/pseries/rng.c @@ -38,7 +38,7 @@ static __init int rng_init(void) pr_info("Registering arch random hook.\n"); - ppc_md.get_random_long = pseries_get_random_long; + ppc_md.get_random_seed = pseries_get_random_long; return 0; } diff --git a/kernel/arch/powerpc/platforms/pseries/setup.c b/kernel/arch/powerpc/platforms/pseries/setup.c index e6e8b241d..36df46eab 100644 --- a/kernel/arch/powerpc/platforms/pseries/setup.c +++ b/kernel/arch/powerpc/platforms/pseries/setup.c @@ -40,6 +40,7 @@ #include <linux/seq_file.h> #include <linux/root_dev.h> #include <linux/of.h> +#include <linux/of_pci.h> #include <linux/kexec.h> #include <asm/mmu.h> @@ -111,7 +112,7 @@ static void __init fwnmi_init(void) fwnmi_active = 1; } -static void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc) +static void pseries_8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); @@ -254,24 +255,26 @@ static void __init pseries_discover_pic(void) static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct of_reconfig_data *rd = data; - struct device_node *np = rd->dn; - struct pci_dn *pci = NULL; + struct device_node *parent, *np = rd->dn; + struct pci_dn *pdn; int err = NOTIFY_OK; switch (action) { case OF_RECONFIG_ATTACH_NODE: - pci = np->parent->data; - if (pci) { - update_dn_pci_info(np, pci->phb); - - /* Create EEH device for the OF node */ - eeh_dev_init(PCI_DN(np), pci->phb); + parent = of_get_parent(np); + pdn = parent ? PCI_DN(parent) : NULL; + if (pdn) { + /* Create pdn and EEH device */ + update_dn_pci_info(np, pdn->phb); + eeh_dev_init(PCI_DN(np), pdn->phb); } + + of_node_put(parent); break; case OF_RECONFIG_DETACH_NODE: - pci = PCI_DN(np); - if (pci) - list_del(&pci->list); + pdn = PCI_DN(np); + if (pdn) + list_del(&pdn->list); break; default: err = NOTIFY_DONE; @@ -493,18 +496,7 @@ static void __init find_and_init_phbs(void) * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties * in chosen. */ - if (of_chosen) { - const int *prop; - - prop = of_get_property(of_chosen, - "linux,pci-probe-only", NULL); - if (prop) { - if (*prop) - pci_add_flags(PCI_PROBE_ONLY); - else - pci_clear_flags(PCI_PROBE_ONLY); - } - } + of_pci_check_probe_only(); } static void __init pSeries_setup_arch(void) @@ -835,10 +827,6 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus) return PCI_PROBE_NORMAL; } -#ifndef CONFIG_PCI -void pSeries_final_fixup(void) { } -#endif - struct pci_controller_ops pseries_pci_controller_ops = { .probe_mode = pSeries_pci_probe_mode, }; |