diff options
Diffstat (limited to 'kernel/arch/powerpc/platforms/cell')
52 files changed, 19587 insertions, 0 deletions
diff --git a/kernel/arch/powerpc/platforms/cell/Kconfig b/kernel/arch/powerpc/platforms/cell/Kconfig new file mode 100644 index 000000000..2f23133ab --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/Kconfig @@ -0,0 +1,129 @@ +config PPC_CELL + bool + default n + +config PPC_CELL_COMMON + bool + select PPC_CELL + select PPC_DCR_MMIO + select PPC_INDIRECT_PIO + select PPC_INDIRECT_MMIO + select PPC_NATIVE + select PPC_RTAS + select IRQ_EDGE_EOI_HANDLER + +config PPC_CELL_NATIVE + bool + select PPC_CELL_COMMON + select MPIC + select PPC_IO_WORKAROUNDS + select IBM_EMAC_EMAC4 + select IBM_EMAC_RGMII + select IBM_EMAC_ZMII #test only + select IBM_EMAC_TAH #test only + default n + +config PPC_IBM_CELL_BLADE + bool "IBM Cell Blade" + depends on PPC64 && PPC_BOOK3S + select PPC_CELL_NATIVE + select PPC_OF_PLATFORM_PCI + select PCI + select MMIO_NVRAM + select PPC_UDBG_16550 + select UDBG_RTAS_CONSOLE + +config PPC_CELL_QPACE + bool "IBM Cell - QPACE" + depends on PPC64 && PPC_BOOK3S + select PPC_CELL_COMMON + +config AXON_MSI + bool + depends on PPC_IBM_CELL_BLADE && PCI_MSI + default y + +menu "Cell Broadband Engine options" + depends on PPC_CELL + +config SPU_FS + tristate "SPU file system" + default m + depends on PPC_CELL + select SPU_BASE + select MEMORY_HOTPLUG + help + The SPU file system is used to access Synergistic Processing + Units on machines implementing the Broadband Processor + Architecture. + +config SPU_FS_64K_LS + bool "Use 64K pages to map SPE local store" + # we depend on PPC_MM_SLICES for now rather than selecting + # it because we depend on hugetlbfs hooks being present. We + # will fix that when the generic code has been improved to + # not require hijacking hugetlbfs hooks. + depends on SPU_FS && PPC_MM_SLICES && !PPC_64K_PAGES + default y + select PPC_HAS_HASH_64K + help + This option causes SPE local stores to be mapped in process + address spaces using 64K pages while the rest of the kernel + uses 4K pages. This can improve performances of applications + using multiple SPEs by lowering the TLB pressure on them. + +config SPU_BASE + bool + default n + select PPC_COPRO_BASE + +config CBE_RAS + bool "RAS features for bare metal Cell BE" + depends on PPC_CELL_NATIVE + default y + +config PPC_IBM_CELL_RESETBUTTON + bool "IBM Cell Blade Pinhole reset button" + depends on CBE_RAS && PPC_IBM_CELL_BLADE + default y + help + Support Pinhole Resetbutton on IBM Cell blades. + This adds a method to trigger system reset via front panel pinhole button. + +config PPC_IBM_CELL_POWERBUTTON + tristate "IBM Cell Blade power button" + depends on PPC_IBM_CELL_BLADE && INPUT_EVDEV + default y + help + Support Powerbutton on IBM Cell blades. + This will enable the powerbutton as an input device. + +config CBE_THERM + tristate "CBE thermal support" + default m + depends on CBE_RAS && SPU_BASE + +config PPC_PMI + tristate + default y + depends on CPU_FREQ_CBE_PMI || PPC_IBM_CELL_POWERBUTTON + help + PMI (Platform Management Interrupt) is a way to + communicate with the BMC (Baseboard Management Controller). + It is used in some IBM Cell blades. + +config CBE_CPUFREQ_SPU_GOVERNOR + tristate "CBE frequency scaling based on SPU usage" + depends on SPU_FS && CPU_FREQ + default m + help + This governor checks for spu usage to adjust the cpu frequency. + If no spu is running on a given cpu, that cpu will be throttled to + the minimal possible frequency. + +endmenu + +config OPROFILE_CELL + def_bool y + depends on PPC_CELL_NATIVE && (OPROFILE = m || OPROFILE = y) && SPU_BASE + diff --git a/kernel/arch/powerpc/platforms/cell/Makefile b/kernel/arch/powerpc/platforms/cell/Makefile new file mode 100644 index 000000000..34699bddf --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/Makefile @@ -0,0 +1,31 @@ +obj-$(CONFIG_PPC_CELL_COMMON) += cbe_regs.o interrupt.o pervasive.o + +obj-$(CONFIG_PPC_CELL_NATIVE) += iommu.o setup.o spider-pic.o \ + pmu.o spider-pci.o +obj-$(CONFIG_CBE_RAS) += ras.o + +obj-$(CONFIG_CBE_THERM) += cbe_thermal.o +obj-$(CONFIG_CBE_CPUFREQ_SPU_GOVERNOR) += cpufreq_spudemand.o + +obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON) += cbe_powerbutton.o + +ifeq ($(CONFIG_SMP),y) +obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o +obj-$(CONFIG_PPC_CELL_QPACE) += smp.o +endif + +# needed only when building loadable spufs.ko +spu-priv1-$(CONFIG_PPC_CELL_COMMON) += spu_priv1_mmio.o +spu-manage-$(CONFIG_PPC_CELL_COMMON) += spu_manage.o + +obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \ + spu_notify.o \ + spu_syscalls.o \ + $(spu-priv1-y) \ + $(spu-manage-y) \ + spufs/ + +obj-$(CONFIG_AXON_MSI) += axon_msi.o + +# qpace setup +obj-$(CONFIG_PPC_CELL_QPACE) += qpace_setup.o diff --git a/kernel/arch/powerpc/platforms/cell/axon_msi.c b/kernel/arch/powerpc/platforms/cell/axon_msi.c new file mode 100644 index 000000000..623bd9614 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/axon_msi.c @@ -0,0 +1,492 @@ +/* + * Copyright 2007, Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/msi.h> +#include <linux/export.h> +#include <linux/of_platform.h> +#include <linux/debugfs.h> +#include <linux/slab.h> + +#include <asm/dcr.h> +#include <asm/machdep.h> +#include <asm/prom.h> + + +/* + * MSIC registers, specified as offsets from dcr_base + */ +#define MSIC_CTRL_REG 0x0 + +/* Base Address registers specify FIFO location in BE memory */ +#define MSIC_BASE_ADDR_HI_REG 0x3 +#define MSIC_BASE_ADDR_LO_REG 0x4 + +/* Hold the read/write offsets into the FIFO */ +#define MSIC_READ_OFFSET_REG 0x5 +#define MSIC_WRITE_OFFSET_REG 0x6 + + +/* MSIC control register flags */ +#define MSIC_CTRL_ENABLE 0x0001 +#define MSIC_CTRL_FIFO_FULL_ENABLE 0x0002 +#define MSIC_CTRL_IRQ_ENABLE 0x0008 +#define MSIC_CTRL_FULL_STOP_ENABLE 0x0010 + +/* + * The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB. + * Currently we're using a 64KB FIFO size. + */ +#define MSIC_FIFO_SIZE_SHIFT 16 +#define MSIC_FIFO_SIZE_BYTES (1 << MSIC_FIFO_SIZE_SHIFT) + +/* + * To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits + * 8-9 of the MSIC control reg. + */ +#define MSIC_CTRL_FIFO_SIZE (((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300) + +/* + * We need to mask the read/write offsets to make sure they stay within + * the bounds of the FIFO. Also they should always be 16-byte aligned. + */ +#define MSIC_FIFO_SIZE_MASK ((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu) + +/* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */ +#define MSIC_FIFO_ENTRY_SIZE 0x10 + + +struct axon_msic { + struct irq_domain *irq_domain; + __le32 *fifo_virt; + dma_addr_t fifo_phys; + dcr_host_t dcr_host; + u32 read_offset; +#ifdef DEBUG + u32 __iomem *trigger; +#endif +}; + +#ifdef DEBUG +void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic); +#else +static inline void axon_msi_debug_setup(struct device_node *dn, + struct axon_msic *msic) { } +#endif + + +static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val) +{ + pr_devel("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n); + + dcr_write(msic->dcr_host, dcr_n, val); +} + +static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct axon_msic *msic = irq_get_handler_data(irq); + u32 write_offset, msi; + int idx; + int retry = 0; + + write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG); + pr_devel("axon_msi: original write_offset 0x%x\n", write_offset); + + /* write_offset doesn't wrap properly, so we have to mask it */ + write_offset &= MSIC_FIFO_SIZE_MASK; + + while (msic->read_offset != write_offset && retry < 100) { + idx = msic->read_offset / sizeof(__le32); + msi = le32_to_cpu(msic->fifo_virt[idx]); + msi &= 0xFFFF; + + pr_devel("axon_msi: woff %x roff %x msi %x\n", + write_offset, msic->read_offset, msi); + + if (msi < nr_irqs && irq_get_chip_data(msi) == msic) { + generic_handle_irq(msi); + msic->fifo_virt[idx] = cpu_to_le32(0xffffffff); + } else { + /* + * Reading the MSIC_WRITE_OFFSET_REG does not + * reliably flush the outstanding DMA to the + * FIFO buffer. Here we were reading stale + * data, so we need to retry. + */ + udelay(1); + retry++; + pr_devel("axon_msi: invalid irq 0x%x!\n", msi); + continue; + } + + if (retry) { + pr_devel("axon_msi: late irq 0x%x, retry %d\n", + msi, retry); + retry = 0; + } + + msic->read_offset += MSIC_FIFO_ENTRY_SIZE; + msic->read_offset &= MSIC_FIFO_SIZE_MASK; + } + + if (retry) { + printk(KERN_WARNING "axon_msi: irq timed out\n"); + + msic->read_offset += MSIC_FIFO_ENTRY_SIZE; + msic->read_offset &= MSIC_FIFO_SIZE_MASK; + } + + chip->irq_eoi(&desc->irq_data); +} + +static struct axon_msic *find_msi_translator(struct pci_dev *dev) +{ + struct irq_domain *irq_domain; + struct device_node *dn, *tmp; + const phandle *ph; + struct axon_msic *msic = NULL; + + dn = of_node_get(pci_device_to_OF_node(dev)); + if (!dn) { + dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n"); + return NULL; + } + + for (; dn; dn = of_get_next_parent(dn)) { + ph = of_get_property(dn, "msi-translator", NULL); + if (ph) + break; + } + + if (!ph) { + dev_dbg(&dev->dev, + "axon_msi: no msi-translator property found\n"); + goto out_error; + } + + tmp = dn; + dn = of_find_node_by_phandle(*ph); + of_node_put(tmp); + if (!dn) { + dev_dbg(&dev->dev, + "axon_msi: msi-translator doesn't point to a node\n"); + goto out_error; + } + + irq_domain = irq_find_host(dn); + if (!irq_domain) { + dev_dbg(&dev->dev, "axon_msi: no irq_domain found for node %s\n", + dn->full_name); + goto out_error; + } + + msic = irq_domain->host_data; + +out_error: + of_node_put(dn); + + return msic; +} + +static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg) +{ + struct device_node *dn; + struct msi_desc *entry; + int len; + const u32 *prop; + + dn = of_node_get(pci_device_to_OF_node(dev)); + if (!dn) { + dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n"); + return -ENODEV; + } + + entry = list_first_entry(&dev->msi_list, struct msi_desc, list); + + for (; dn; dn = of_get_next_parent(dn)) { + if (entry->msi_attrib.is_64) { + prop = of_get_property(dn, "msi-address-64", &len); + if (prop) + break; + } + + prop = of_get_property(dn, "msi-address-32", &len); + if (prop) + break; + } + + if (!prop) { + dev_dbg(&dev->dev, + "axon_msi: no msi-address-(32|64) properties found\n"); + return -ENOENT; + } + + switch (len) { + case 8: + msg->address_hi = prop[0]; + msg->address_lo = prop[1]; + break; + case 4: + msg->address_hi = 0; + msg->address_lo = prop[0]; + break; + default: + dev_dbg(&dev->dev, + "axon_msi: malformed msi-address-(32|64) property\n"); + of_node_put(dn); + return -EINVAL; + } + + of_node_put(dn); + + return 0; +} + +static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + unsigned int virq, rc; + struct msi_desc *entry; + struct msi_msg msg; + struct axon_msic *msic; + + msic = find_msi_translator(dev); + if (!msic) + return -ENODEV; + + rc = setup_msi_msg_address(dev, &msg); + if (rc) + return rc; + + list_for_each_entry(entry, &dev->msi_list, list) { + virq = irq_create_direct_mapping(msic->irq_domain); + if (virq == NO_IRQ) { + dev_warn(&dev->dev, + "axon_msi: virq allocation failed!\n"); + return -1; + } + dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq); + + irq_set_msi_desc(virq, entry); + msg.data = virq; + pci_write_msi_msg(virq, &msg); + } + + return 0; +} + +static void axon_msi_teardown_msi_irqs(struct pci_dev *dev) +{ + struct msi_desc *entry; + + dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n"); + + list_for_each_entry(entry, &dev->msi_list, list) { + if (entry->irq == NO_IRQ) + continue; + + irq_set_msi_desc(entry->irq, NULL); + irq_dispose_mapping(entry->irq); + } +} + +static struct irq_chip msic_irq_chip = { + .irq_mask = pci_msi_mask_irq, + .irq_unmask = pci_msi_unmask_irq, + .irq_shutdown = pci_msi_mask_irq, + .name = "AXON-MSI", +}; + +static int msic_host_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + irq_set_chip_data(virq, h->host_data); + irq_set_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq); + + return 0; +} + +static const struct irq_domain_ops msic_host_ops = { + .map = msic_host_map, +}; + +static void axon_msi_shutdown(struct platform_device *device) +{ + struct axon_msic *msic = dev_get_drvdata(&device->dev); + u32 tmp; + + pr_devel("axon_msi: disabling %s\n", + msic->irq_domain->of_node->full_name); + tmp = dcr_read(msic->dcr_host, MSIC_CTRL_REG); + tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE; + msic_dcr_write(msic, MSIC_CTRL_REG, tmp); +} + +static int axon_msi_probe(struct platform_device *device) +{ + struct device_node *dn = device->dev.of_node; + struct axon_msic *msic; + unsigned int virq; + int dcr_base, dcr_len; + + pr_devel("axon_msi: setting up dn %s\n", dn->full_name); + + msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL); + if (!msic) { + printk(KERN_ERR "axon_msi: couldn't allocate msic for %s\n", + dn->full_name); + goto out; + } + + dcr_base = dcr_resource_start(dn, 0); + dcr_len = dcr_resource_len(dn, 0); + + if (dcr_base == 0 || dcr_len == 0) { + printk(KERN_ERR + "axon_msi: couldn't parse dcr properties on %s\n", + dn->full_name); + goto out_free_msic; + } + + msic->dcr_host = dcr_map(dn, dcr_base, dcr_len); + if (!DCR_MAP_OK(msic->dcr_host)) { + printk(KERN_ERR "axon_msi: dcr_map failed for %s\n", + dn->full_name); + goto out_free_msic; + } + + msic->fifo_virt = dma_alloc_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES, + &msic->fifo_phys, GFP_KERNEL); + if (!msic->fifo_virt) { + printk(KERN_ERR "axon_msi: couldn't allocate fifo for %s\n", + dn->full_name); + goto out_free_msic; + } + + virq = irq_of_parse_and_map(dn, 0); + if (virq == NO_IRQ) { + printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n", + dn->full_name); + goto out_free_fifo; + } + memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES); + + /* We rely on being able to stash a virq in a u16, so limit irqs to < 65536 */ + msic->irq_domain = irq_domain_add_nomap(dn, 65536, &msic_host_ops, msic); + if (!msic->irq_domain) { + printk(KERN_ERR "axon_msi: couldn't allocate irq_domain for %s\n", + dn->full_name); + goto out_free_fifo; + } + + irq_set_handler_data(virq, msic); + irq_set_chained_handler(virq, axon_msi_cascade); + pr_devel("axon_msi: irq 0x%x setup for axon_msi\n", virq); + + /* Enable the MSIC hardware */ + msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, msic->fifo_phys >> 32); + msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG, + msic->fifo_phys & 0xFFFFFFFF); + msic_dcr_write(msic, MSIC_CTRL_REG, + MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE | + MSIC_CTRL_FIFO_SIZE); + + msic->read_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG) + & MSIC_FIFO_SIZE_MASK; + + dev_set_drvdata(&device->dev, msic); + + ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs; + ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs; + + axon_msi_debug_setup(dn, msic); + + printk(KERN_DEBUG "axon_msi: setup MSIC on %s\n", dn->full_name); + + return 0; + +out_free_fifo: + dma_free_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES, msic->fifo_virt, + msic->fifo_phys); +out_free_msic: + kfree(msic); +out: + + return -1; +} + +static const struct of_device_id axon_msi_device_id[] = { + { + .compatible = "ibm,axon-msic" + }, + {} +}; + +static struct platform_driver axon_msi_driver = { + .probe = axon_msi_probe, + .shutdown = axon_msi_shutdown, + .driver = { + .name = "axon-msi", + .of_match_table = axon_msi_device_id, + }, +}; + +static int __init axon_msi_init(void) +{ + return platform_driver_register(&axon_msi_driver); +} +subsys_initcall(axon_msi_init); + + +#ifdef DEBUG +static int msic_set(void *data, u64 val) +{ + struct axon_msic *msic = data; + out_le32(msic->trigger, val); + return 0; +} + +static int msic_get(void *data, u64 *val) +{ + *val = 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_msic, msic_get, msic_set, "%llu\n"); + +void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic) +{ + char name[8]; + u64 addr; + + addr = of_translate_address(dn, of_get_property(dn, "reg", NULL)); + if (addr == OF_BAD_ADDR) { + pr_devel("axon_msi: couldn't translate reg property\n"); + return; + } + + msic->trigger = ioremap(addr, 0x4); + if (!msic->trigger) { + pr_devel("axon_msi: ioremap failed\n"); + return; + } + + snprintf(name, sizeof(name), "msic_%d", of_node_to_nid(dn)); + + if (!debugfs_create_file(name, 0600, powerpc_debugfs_root, + msic, &fops_msic)) { + pr_devel("axon_msi: debugfs_create_file failed!\n"); + return; + } +} +#endif /* DEBUG */ diff --git a/kernel/arch/powerpc/platforms/cell/cbe_powerbutton.c b/kernel/arch/powerpc/platforms/cell/cbe_powerbutton.c new file mode 100644 index 000000000..2bb803130 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/cbe_powerbutton.c @@ -0,0 +1,118 @@ +/* + * driver for powerbutton on IBM cell blades + * + * (C) Copyright IBM Corp. 2005-2008 + * + * Author: Christian Krafft <krafft@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/input.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <asm/pmi.h> +#include <asm/prom.h> + +static struct input_dev *button_dev; +static struct platform_device *button_pdev; + +static void cbe_powerbutton_handle_pmi(pmi_message_t pmi_msg) +{ + BUG_ON(pmi_msg.type != PMI_TYPE_POWER_BUTTON); + + input_report_key(button_dev, KEY_POWER, 1); + input_sync(button_dev); + input_report_key(button_dev, KEY_POWER, 0); + input_sync(button_dev); +} + +static struct pmi_handler cbe_pmi_handler = { + .type = PMI_TYPE_POWER_BUTTON, + .handle_pmi_message = cbe_powerbutton_handle_pmi, +}; + +static int __init cbe_powerbutton_init(void) +{ + int ret = 0; + struct input_dev *dev; + + if (!of_machine_is_compatible("IBM,CBPLUS-1.0")) { + printk(KERN_ERR "%s: Not a cell blade.\n", __func__); + ret = -ENODEV; + goto out; + } + + dev = input_allocate_device(); + if (!dev) { + ret = -ENOMEM; + printk(KERN_ERR "%s: Not enough memory.\n", __func__); + goto out; + } + + set_bit(EV_KEY, dev->evbit); + set_bit(KEY_POWER, dev->keybit); + + dev->name = "Power Button"; + dev->id.bustype = BUS_HOST; + + /* this makes the button look like an acpi power button + * no clue whether anyone relies on that though */ + dev->id.product = 0x02; + dev->phys = "LNXPWRBN/button/input0"; + + button_pdev = platform_device_register_simple("power_button", 0, NULL, 0); + if (IS_ERR(button_pdev)) { + ret = PTR_ERR(button_pdev); + goto out_free_input; + } + + dev->dev.parent = &button_pdev->dev; + ret = input_register_device(dev); + if (ret) { + printk(KERN_ERR "%s: Failed to register device\n", __func__); + goto out_free_pdev; + } + + button_dev = dev; + + ret = pmi_register_handler(&cbe_pmi_handler); + if (ret) { + printk(KERN_ERR "%s: Failed to register with pmi.\n", __func__); + goto out_free_pdev; + } + + goto out; + +out_free_pdev: + platform_device_unregister(button_pdev); +out_free_input: + input_free_device(dev); +out: + return ret; +} + +static void __exit cbe_powerbutton_exit(void) +{ + pmi_unregister_handler(&cbe_pmi_handler); + platform_device_unregister(button_pdev); + input_free_device(button_dev); +} + +module_init(cbe_powerbutton_init); +module_exit(cbe_powerbutton_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>"); diff --git a/kernel/arch/powerpc/platforms/cell/cbe_regs.c b/kernel/arch/powerpc/platforms/cell/cbe_regs.c new file mode 100644 index 000000000..1428d583c --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/cbe_regs.c @@ -0,0 +1,281 @@ +/* + * cbe_regs.c + * + * Accessor routines for the various MMIO register blocks of the CBE + * + * (c) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp. + */ + +#include <linux/percpu.h> +#include <linux/types.h> +#include <linux/export.h> +#include <linux/of_device.h> +#include <linux/of_platform.h> + +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/prom.h> +#include <asm/ptrace.h> +#include <asm/cell-regs.h> + +/* + * Current implementation uses "cpu" nodes. We build our own mapping + * array of cpu numbers to cpu nodes locally for now to allow interrupt + * time code to have a fast path rather than call of_get_cpu_node(). If + * we implement cpu hotplug, we'll have to install an appropriate norifier + * in order to release references to the cpu going away + */ +static struct cbe_regs_map +{ + struct device_node *cpu_node; + struct device_node *be_node; + struct cbe_pmd_regs __iomem *pmd_regs; + struct cbe_iic_regs __iomem *iic_regs; + struct cbe_mic_tm_regs __iomem *mic_tm_regs; + struct cbe_pmd_shadow_regs pmd_shadow_regs; +} cbe_regs_maps[MAX_CBE]; +static int cbe_regs_map_count; + +static struct cbe_thread_map +{ + struct device_node *cpu_node; + struct device_node *be_node; + struct cbe_regs_map *regs; + unsigned int thread_id; + unsigned int cbe_id; +} cbe_thread_map[NR_CPUS]; + +static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = {CPU_BITS_NONE} }; +static cpumask_t cbe_first_online_cpu = { CPU_BITS_NONE }; + +static struct cbe_regs_map *cbe_find_map(struct device_node *np) +{ + int i; + struct device_node *tmp_np; + + if (strcasecmp(np->type, "spe")) { + for (i = 0; i < cbe_regs_map_count; i++) + if (cbe_regs_maps[i].cpu_node == np || + cbe_regs_maps[i].be_node == np) + return &cbe_regs_maps[i]; + return NULL; + } + + if (np->data) + return np->data; + + /* walk up path until cpu or be node was found */ + tmp_np = np; + do { + tmp_np = tmp_np->parent; + /* on a correct devicetree we wont get up to root */ + BUG_ON(!tmp_np); + } while (strcasecmp(tmp_np->type, "cpu") && + strcasecmp(tmp_np->type, "be")); + + np->data = cbe_find_map(tmp_np); + + return np->data; +} + +struct cbe_pmd_regs __iomem *cbe_get_pmd_regs(struct device_node *np) +{ + struct cbe_regs_map *map = cbe_find_map(np); + if (map == NULL) + return NULL; + return map->pmd_regs; +} +EXPORT_SYMBOL_GPL(cbe_get_pmd_regs); + +struct cbe_pmd_regs __iomem *cbe_get_cpu_pmd_regs(int cpu) +{ + struct cbe_regs_map *map = cbe_thread_map[cpu].regs; + if (map == NULL) + return NULL; + return map->pmd_regs; +} +EXPORT_SYMBOL_GPL(cbe_get_cpu_pmd_regs); + +struct cbe_pmd_shadow_regs *cbe_get_pmd_shadow_regs(struct device_node *np) +{ + struct cbe_regs_map *map = cbe_find_map(np); + if (map == NULL) + return NULL; + return &map->pmd_shadow_regs; +} + +struct cbe_pmd_shadow_regs *cbe_get_cpu_pmd_shadow_regs(int cpu) +{ + struct cbe_regs_map *map = cbe_thread_map[cpu].regs; + if (map == NULL) + return NULL; + return &map->pmd_shadow_regs; +} + +struct cbe_iic_regs __iomem *cbe_get_iic_regs(struct device_node *np) +{ + struct cbe_regs_map *map = cbe_find_map(np); + if (map == NULL) + return NULL; + return map->iic_regs; +} + +struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu) +{ + struct cbe_regs_map *map = cbe_thread_map[cpu].regs; + if (map == NULL) + return NULL; + return map->iic_regs; +} + +struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np) +{ + struct cbe_regs_map *map = cbe_find_map(np); + if (map == NULL) + return NULL; + return map->mic_tm_regs; +} + +struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu) +{ + struct cbe_regs_map *map = cbe_thread_map[cpu].regs; + if (map == NULL) + return NULL; + return map->mic_tm_regs; +} +EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs); + +u32 cbe_get_hw_thread_id(int cpu) +{ + return cbe_thread_map[cpu].thread_id; +} +EXPORT_SYMBOL_GPL(cbe_get_hw_thread_id); + +u32 cbe_cpu_to_node(int cpu) +{ + return cbe_thread_map[cpu].cbe_id; +} +EXPORT_SYMBOL_GPL(cbe_cpu_to_node); + +u32 cbe_node_to_cpu(int node) +{ + return cpumask_first(&cbe_local_mask[node]); + +} +EXPORT_SYMBOL_GPL(cbe_node_to_cpu); + +static struct device_node *cbe_get_be_node(int cpu_id) +{ + struct device_node *np; + + for_each_node_by_type (np, "be") { + int len,i; + const phandle *cpu_handle; + + cpu_handle = of_get_property(np, "cpus", &len); + + /* + * the CAB SLOF tree is non compliant, so we just assume + * there is only one node + */ + if (WARN_ON_ONCE(!cpu_handle)) + return np; + + for (i=0; i<len; i++) + if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL)) + return np; + } + + return NULL; +} + +void __init cbe_fill_regs_map(struct cbe_regs_map *map) +{ + if(map->be_node) { + struct device_node *be, *np; + + be = map->be_node; + + for_each_node_by_type(np, "pervasive") + if (of_get_parent(np) == be) + map->pmd_regs = of_iomap(np, 0); + + for_each_node_by_type(np, "CBEA-Internal-Interrupt-Controller") + if (of_get_parent(np) == be) + map->iic_regs = of_iomap(np, 2); + + for_each_node_by_type(np, "mic-tm") + if (of_get_parent(np) == be) + map->mic_tm_regs = of_iomap(np, 0); + } else { + struct device_node *cpu; + /* That hack must die die die ! */ + const struct address_prop { + unsigned long address; + unsigned int len; + } __attribute__((packed)) *prop; + + cpu = map->cpu_node; + + prop = of_get_property(cpu, "pervasive", NULL); + if (prop != NULL) + map->pmd_regs = ioremap(prop->address, prop->len); + + prop = of_get_property(cpu, "iic", NULL); + if (prop != NULL) + map->iic_regs = ioremap(prop->address, prop->len); + + prop = of_get_property(cpu, "mic-tm", NULL); + if (prop != NULL) + map->mic_tm_regs = ioremap(prop->address, prop->len); + } +} + + +void __init cbe_regs_init(void) +{ + int i; + unsigned int thread_id; + struct device_node *cpu; + + /* Build local fast map of CPUs */ + for_each_possible_cpu(i) { + cbe_thread_map[i].cpu_node = of_get_cpu_node(i, &thread_id); + cbe_thread_map[i].be_node = cbe_get_be_node(i); + cbe_thread_map[i].thread_id = thread_id; + } + + /* Find maps for each device tree CPU */ + for_each_node_by_type(cpu, "cpu") { + struct cbe_regs_map *map; + unsigned int cbe_id; + + cbe_id = cbe_regs_map_count++; + map = &cbe_regs_maps[cbe_id]; + + if (cbe_regs_map_count > MAX_CBE) { + printk(KERN_ERR "cbe_regs: More BE chips than supported" + "!\n"); + cbe_regs_map_count--; + of_node_put(cpu); + return; + } + map->cpu_node = cpu; + + for_each_possible_cpu(i) { + struct cbe_thread_map *thread = &cbe_thread_map[i]; + + if (thread->cpu_node == cpu) { + thread->regs = map; + thread->cbe_id = cbe_id; + map->be_node = thread->be_node; + cpumask_set_cpu(i, &cbe_local_mask[cbe_id]); + if(thread->thread_id == 0) + cpumask_set_cpu(i, &cbe_first_online_cpu); + } + } + + cbe_fill_regs_map(map); + } +} + diff --git a/kernel/arch/powerpc/platforms/cell/cbe_thermal.c b/kernel/arch/powerpc/platforms/cell/cbe_thermal.c new file mode 100644 index 000000000..2c15ff094 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/cbe_thermal.c @@ -0,0 +1,399 @@ +/* + * thermal support for the cell processor + * + * This module adds some sysfs attributes to cpu and spu nodes. + * Base for measurements are the digital thermal sensors (DTS) + * located on the chip. + * The accuracy is 2 degrees, starting from 65 up to 125 degrees celsius + * The attributes can be found under + * /sys/devices/system/cpu/cpuX/thermal + * /sys/devices/system/spu/spuX/thermal + * + * The following attributes are added for each node: + * temperature: + * contains the current temperature measured by the DTS + * throttle_begin: + * throttling begins when temperature is greater or equal to + * throttle_begin. Setting this value to 125 prevents throttling. + * throttle_end: + * throttling is being ceased, if the temperature is lower than + * throttle_end. Due to a delay between applying throttling and + * a reduced temperature this value should be less than throttle_begin. + * A value equal to throttle_begin provides only a very little hysteresis. + * throttle_full_stop: + * If the temperatrue is greater or equal to throttle_full_stop, + * full throttling is applied to the cpu or spu. This value should be + * greater than throttle_begin and throttle_end. Setting this value to + * 65 prevents the unit from running code at all. + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Christian Krafft <krafft@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/cpu.h> +#include <asm/spu.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/cell-regs.h> + +#include "spu_priv1_mmio.h" + +#define TEMP_MIN 65 +#define TEMP_MAX 125 + +#define DEVICE_PREFIX_ATTR(_prefix,_name,_mode) \ +struct device_attribute attr_ ## _prefix ## _ ## _name = { \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ + .show = _prefix ## _show_ ## _name, \ + .store = _prefix ## _store_ ## _name, \ +}; + +static inline u8 reg_to_temp(u8 reg_value) +{ + return ((reg_value & 0x3f) << 1) + TEMP_MIN; +} + +static inline u8 temp_to_reg(u8 temp) +{ + return ((temp - TEMP_MIN) >> 1) & 0x3f; +} + +static struct cbe_pmd_regs __iomem *get_pmd_regs(struct device *dev) +{ + struct spu *spu; + + spu = container_of(dev, struct spu, dev); + + return cbe_get_pmd_regs(spu_devnode(spu)); +} + +/* returns the value for a given spu in a given register */ +static u8 spu_read_register_value(struct device *dev, union spe_reg __iomem *reg) +{ + union spe_reg value; + struct spu *spu; + + spu = container_of(dev, struct spu, dev); + value.val = in_be64(®->val); + + return value.spe[spu->spe_id]; +} + +static ssize_t spu_show_temp(struct device *dev, struct device_attribute *attr, + char *buf) +{ + u8 value; + struct cbe_pmd_regs __iomem *pmd_regs; + + pmd_regs = get_pmd_regs(dev); + + value = spu_read_register_value(dev, &pmd_regs->ts_ctsr1); + + return sprintf(buf, "%d\n", reg_to_temp(value)); +} + +static ssize_t show_throttle(struct cbe_pmd_regs __iomem *pmd_regs, char *buf, int pos) +{ + u64 value; + + value = in_be64(&pmd_regs->tm_tpr.val); + /* access the corresponding byte */ + value >>= pos; + value &= 0x3F; + + return sprintf(buf, "%d\n", reg_to_temp(value)); +} + +static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char *buf, size_t size, int pos) +{ + u64 reg_value; + unsigned int temp; + u64 new_value; + int ret; + + ret = sscanf(buf, "%u", &temp); + + if (ret != 1 || temp < TEMP_MIN || temp > TEMP_MAX) + return -EINVAL; + + new_value = temp_to_reg(temp); + + reg_value = in_be64(&pmd_regs->tm_tpr.val); + + /* zero out bits for new value */ + reg_value &= ~(0xffull << pos); + /* set bits to new value */ + reg_value |= new_value << pos; + + out_be64(&pmd_regs->tm_tpr.val, reg_value); + return size; +} + +static ssize_t spu_show_throttle_end(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_throttle(get_pmd_regs(dev), buf, 0); +} + +static ssize_t spu_show_throttle_begin(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_throttle(get_pmd_regs(dev), buf, 8); +} + +static ssize_t spu_show_throttle_full_stop(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_throttle(get_pmd_regs(dev), buf, 16); +} + +static ssize_t spu_store_throttle_end(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + return store_throttle(get_pmd_regs(dev), buf, size, 0); +} + +static ssize_t spu_store_throttle_begin(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + return store_throttle(get_pmd_regs(dev), buf, size, 8); +} + +static ssize_t spu_store_throttle_full_stop(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + return store_throttle(get_pmd_regs(dev), buf, size, 16); +} + +static ssize_t ppe_show_temp(struct device *dev, char *buf, int pos) +{ + struct cbe_pmd_regs __iomem *pmd_regs; + u64 value; + + pmd_regs = cbe_get_cpu_pmd_regs(dev->id); + value = in_be64(&pmd_regs->ts_ctsr2); + + value = (value >> pos) & 0x3f; + + return sprintf(buf, "%d\n", reg_to_temp(value)); +} + + +/* shows the temperature of the DTS on the PPE, + * located near the linear thermal sensor */ +static ssize_t ppe_show_temp0(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return ppe_show_temp(dev, buf, 32); +} + +/* shows the temperature of the second DTS on the PPE */ +static ssize_t ppe_show_temp1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return ppe_show_temp(dev, buf, 0); +} + +static ssize_t ppe_show_throttle_end(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 32); +} + +static ssize_t ppe_show_throttle_begin(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 40); +} + +static ssize_t ppe_show_throttle_full_stop(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 48); +} + +static ssize_t ppe_store_throttle_end(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 32); +} + +static ssize_t ppe_store_throttle_begin(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 40); +} + +static ssize_t ppe_store_throttle_full_stop(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 48); +} + + +static struct device_attribute attr_spu_temperature = { + .attr = {.name = "temperature", .mode = 0400 }, + .show = spu_show_temp, +}; + +static DEVICE_PREFIX_ATTR(spu, throttle_end, 0600); +static DEVICE_PREFIX_ATTR(spu, throttle_begin, 0600); +static DEVICE_PREFIX_ATTR(spu, throttle_full_stop, 0600); + + +static struct attribute *spu_attributes[] = { + &attr_spu_temperature.attr, + &attr_spu_throttle_end.attr, + &attr_spu_throttle_begin.attr, + &attr_spu_throttle_full_stop.attr, + NULL, +}; + +static struct attribute_group spu_attribute_group = { + .name = "thermal", + .attrs = spu_attributes, +}; + +static struct device_attribute attr_ppe_temperature0 = { + .attr = {.name = "temperature0", .mode = 0400 }, + .show = ppe_show_temp0, +}; + +static struct device_attribute attr_ppe_temperature1 = { + .attr = {.name = "temperature1", .mode = 0400 }, + .show = ppe_show_temp1, +}; + +static DEVICE_PREFIX_ATTR(ppe, throttle_end, 0600); +static DEVICE_PREFIX_ATTR(ppe, throttle_begin, 0600); +static DEVICE_PREFIX_ATTR(ppe, throttle_full_stop, 0600); + +static struct attribute *ppe_attributes[] = { + &attr_ppe_temperature0.attr, + &attr_ppe_temperature1.attr, + &attr_ppe_throttle_end.attr, + &attr_ppe_throttle_begin.attr, + &attr_ppe_throttle_full_stop.attr, + NULL, +}; + +static struct attribute_group ppe_attribute_group = { + .name = "thermal", + .attrs = ppe_attributes, +}; + +/* + * initialize throttling with default values + */ +static int __init init_default_values(void) +{ + int cpu; + struct cbe_pmd_regs __iomem *pmd_regs; + struct device *dev; + union ppe_spe_reg tpr; + union spe_reg str1; + u64 str2; + union spe_reg cr1; + u64 cr2; + + /* TPR defaults */ + /* ppe + * 1F - no full stop + * 08 - dynamic throttling starts if over 80 degrees + * 03 - dynamic throttling ceases if below 70 degrees */ + tpr.ppe = 0x1F0803; + /* spe + * 10 - full stopped when over 96 degrees + * 08 - dynamic throttling starts if over 80 degrees + * 03 - dynamic throttling ceases if below 70 degrees + */ + tpr.spe = 0x100803; + + /* STR defaults */ + /* str1 + * 10 - stop 16 of 32 cycles + */ + str1.val = 0x1010101010101010ull; + /* str2 + * 10 - stop 16 of 32 cycles + */ + str2 = 0x10; + + /* CR defaults */ + /* cr1 + * 4 - normal operation + */ + cr1.val = 0x0404040404040404ull; + /* cr2 + * 4 - normal operation + */ + cr2 = 0x04; + + for_each_possible_cpu (cpu) { + pr_debug("processing cpu %d\n", cpu); + dev = get_cpu_device(cpu); + + if (!dev) { + pr_info("invalid dev pointer for cbe_thermal\n"); + return -EINVAL; + } + + pmd_regs = cbe_get_cpu_pmd_regs(dev->id); + + if (!pmd_regs) { + pr_info("invalid CBE regs pointer for cbe_thermal\n"); + return -EINVAL; + } + + out_be64(&pmd_regs->tm_str2, str2); + out_be64(&pmd_regs->tm_str1.val, str1.val); + out_be64(&pmd_regs->tm_tpr.val, tpr.val); + out_be64(&pmd_regs->tm_cr1.val, cr1.val); + out_be64(&pmd_regs->tm_cr2, cr2); + } + + return 0; +} + + +static int __init thermal_init(void) +{ + int rc = init_default_values(); + + if (rc == 0) { + spu_add_dev_attr_group(&spu_attribute_group); + cpu_add_dev_attr_group(&ppe_attribute_group); + } + + return rc; +} +module_init(thermal_init); + +static void __exit thermal_exit(void) +{ + spu_remove_dev_attr_group(&spu_attribute_group); + cpu_remove_dev_attr_group(&ppe_attribute_group); +} +module_exit(thermal_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>"); + diff --git a/kernel/arch/powerpc/platforms/cell/cell.h b/kernel/arch/powerpc/platforms/cell/cell.h new file mode 100644 index 000000000..ef143dfee --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/cell.h @@ -0,0 +1,24 @@ +/* + * Cell Platform common data structures + * + * Copyright 2015, Daniel Axtens, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef CELL_H +#define CELL_H + +#include <asm/pci-bridge.h> + +extern struct pci_controller_ops cell_pci_controller_ops; + +#endif diff --git a/kernel/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/kernel/arch/powerpc/platforms/cell/cpufreq_spudemand.c new file mode 100644 index 000000000..82607d621 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/cpufreq_spudemand.c @@ -0,0 +1,171 @@ +/* + * spu aware cpufreq governor for the cell processor + * + * © Copyright IBM Corporation 2006-2008 + * + * Author: Christian Krafft <krafft@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/cpufreq.h> +#include <linux/sched.h> +#include <linux/module.h> +#include <linux/timer.h> +#include <linux/workqueue.h> +#include <linux/atomic.h> +#include <asm/machdep.h> +#include <asm/spu.h> + +#define POLL_TIME 100000 /* in µs */ +#define EXP 753 /* exp(-1) in fixed-point */ + +struct spu_gov_info_struct { + unsigned long busy_spus; /* fixed-point */ + struct cpufreq_policy *policy; + struct delayed_work work; + unsigned int poll_int; /* µs */ +}; +static DEFINE_PER_CPU(struct spu_gov_info_struct, spu_gov_info); + +static int calc_freq(struct spu_gov_info_struct *info) +{ + int cpu; + int busy_spus; + + cpu = info->policy->cpu; + busy_spus = atomic_read(&cbe_spu_info[cpu_to_node(cpu)].busy_spus); + + CALC_LOAD(info->busy_spus, EXP, busy_spus * FIXED_1); + pr_debug("cpu %d: busy_spus=%d, info->busy_spus=%ld\n", + cpu, busy_spus, info->busy_spus); + + return info->policy->max * info->busy_spus / FIXED_1; +} + +static void spu_gov_work(struct work_struct *work) +{ + struct spu_gov_info_struct *info; + int delay; + unsigned long target_freq; + + info = container_of(work, struct spu_gov_info_struct, work.work); + + /* after cancel_delayed_work_sync we unset info->policy */ + BUG_ON(info->policy == NULL); + + target_freq = calc_freq(info); + __cpufreq_driver_target(info->policy, target_freq, CPUFREQ_RELATION_H); + + delay = usecs_to_jiffies(info->poll_int); + schedule_delayed_work_on(info->policy->cpu, &info->work, delay); +} + +static void spu_gov_init_work(struct spu_gov_info_struct *info) +{ + int delay = usecs_to_jiffies(info->poll_int); + INIT_DEFERRABLE_WORK(&info->work, spu_gov_work); + schedule_delayed_work_on(info->policy->cpu, &info->work, delay); +} + +static void spu_gov_cancel_work(struct spu_gov_info_struct *info) +{ + cancel_delayed_work_sync(&info->work); +} + +static int spu_gov_govern(struct cpufreq_policy *policy, unsigned int event) +{ + unsigned int cpu = policy->cpu; + struct spu_gov_info_struct *info, *affected_info; + int i; + int ret = 0; + + info = &per_cpu(spu_gov_info, cpu); + + switch (event) { + case CPUFREQ_GOV_START: + if (!cpu_online(cpu)) { + printk(KERN_ERR "cpu %d is not online\n", cpu); + ret = -EINVAL; + break; + } + + if (!policy->cur) { + printk(KERN_ERR "no cpu specified in policy\n"); + ret = -EINVAL; + break; + } + + /* initialize spu_gov_info for all affected cpus */ + for_each_cpu(i, policy->cpus) { + affected_info = &per_cpu(spu_gov_info, i); + affected_info->policy = policy; + } + + info->poll_int = POLL_TIME; + + /* setup timer */ + spu_gov_init_work(info); + + break; + + case CPUFREQ_GOV_STOP: + /* cancel timer */ + spu_gov_cancel_work(info); + + /* clean spu_gov_info for all affected cpus */ + for_each_cpu (i, policy->cpus) { + info = &per_cpu(spu_gov_info, i); + info->policy = NULL; + } + + break; + } + + return ret; +} + +static struct cpufreq_governor spu_governor = { + .name = "spudemand", + .governor = spu_gov_govern, + .owner = THIS_MODULE, +}; + +/* + * module init and destoy + */ + +static int __init spu_gov_init(void) +{ + int ret; + + ret = cpufreq_register_governor(&spu_governor); + if (ret) + printk(KERN_ERR "registration of governor failed\n"); + return ret; +} + +static void __exit spu_gov_exit(void) +{ + cpufreq_unregister_governor(&spu_governor); +} + + +module_init(spu_gov_init); +module_exit(spu_gov_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>"); + diff --git a/kernel/arch/powerpc/platforms/cell/interrupt.c b/kernel/arch/powerpc/platforms/cell/interrupt.c new file mode 100644 index 000000000..3af8324c1 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/interrupt.c @@ -0,0 +1,411 @@ +/* + * Cell Internal Interrupt Controller + * + * Copyright (C) 2006 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * IBM, Corp. + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * TODO: + * - Fix various assumptions related to HW CPU numbers vs. linux CPU numbers + * vs node numbers in the setup code + * - Implement proper handling of maxcpus=1/2 (that is, routing of irqs from + * a non-active node to the active node) + */ + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/export.h> +#include <linux/percpu.h> +#include <linux/types.h> +#include <linux/ioport.h> +#include <linux/kernel_stat.h> + +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/prom.h> +#include <asm/ptrace.h> +#include <asm/machdep.h> +#include <asm/cell-regs.h> + +#include "interrupt.h" + +struct iic { + struct cbe_iic_thread_regs __iomem *regs; + u8 target_id; + u8 eoi_stack[16]; + int eoi_ptr; + struct device_node *node; +}; + +static DEFINE_PER_CPU(struct iic, cpu_iic); +#define IIC_NODE_COUNT 2 +static struct irq_domain *iic_host; + +/* Convert between "pending" bits and hw irq number */ +static irq_hw_number_t iic_pending_to_hwnum(struct cbe_iic_pending_bits bits) +{ + unsigned char unit = bits.source & 0xf; + unsigned char node = bits.source >> 4; + unsigned char class = bits.class & 3; + + /* Decode IPIs */ + if (bits.flags & CBE_IIC_IRQ_IPI) + return IIC_IRQ_TYPE_IPI | (bits.prio >> 4); + else + return (node << IIC_IRQ_NODE_SHIFT) | (class << 4) | unit; +} + +static void iic_mask(struct irq_data *d) +{ +} + +static void iic_unmask(struct irq_data *d) +{ +} + +static void iic_eoi(struct irq_data *d) +{ + struct iic *iic = this_cpu_ptr(&cpu_iic); + out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]); + BUG_ON(iic->eoi_ptr < 0); +} + +static struct irq_chip iic_chip = { + .name = "CELL-IIC", + .irq_mask = iic_mask, + .irq_unmask = iic_unmask, + .irq_eoi = iic_eoi, +}; + + +static void iic_ioexc_eoi(struct irq_data *d) +{ +} + +static void iic_ioexc_cascade(unsigned int irq, struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct cbe_iic_regs __iomem *node_iic = + (void __iomem *)irq_desc_get_handler_data(desc); + unsigned int base = (irq & 0xffffff00) | IIC_IRQ_TYPE_IOEXC; + unsigned long bits, ack; + int cascade; + + for (;;) { + bits = in_be64(&node_iic->iic_is); + if (bits == 0) + break; + /* pre-ack edge interrupts */ + ack = bits & IIC_ISR_EDGE_MASK; + if (ack) + out_be64(&node_iic->iic_is, ack); + /* handle them */ + for (cascade = 63; cascade >= 0; cascade--) + if (bits & (0x8000000000000000UL >> cascade)) { + unsigned int cirq = + irq_linear_revmap(iic_host, + base | cascade); + if (cirq != NO_IRQ) + generic_handle_irq(cirq); + } + /* post-ack level interrupts */ + ack = bits & ~IIC_ISR_EDGE_MASK; + if (ack) + out_be64(&node_iic->iic_is, ack); + } + chip->irq_eoi(&desc->irq_data); +} + + +static struct irq_chip iic_ioexc_chip = { + .name = "CELL-IOEX", + .irq_mask = iic_mask, + .irq_unmask = iic_unmask, + .irq_eoi = iic_ioexc_eoi, +}; + +/* Get an IRQ number from the pending state register of the IIC */ +static unsigned int iic_get_irq(void) +{ + struct cbe_iic_pending_bits pending; + struct iic *iic; + unsigned int virq; + + iic = this_cpu_ptr(&cpu_iic); + *(unsigned long *) &pending = + in_be64((u64 __iomem *) &iic->regs->pending_destr); + if (!(pending.flags & CBE_IIC_IRQ_VALID)) + return NO_IRQ; + virq = irq_linear_revmap(iic_host, iic_pending_to_hwnum(pending)); + if (virq == NO_IRQ) + return NO_IRQ; + iic->eoi_stack[++iic->eoi_ptr] = pending.prio; + BUG_ON(iic->eoi_ptr > 15); + return virq; +} + +void iic_setup_cpu(void) +{ + out_be64(&this_cpu_ptr(&cpu_iic)->regs->prio, 0xff); +} + +u8 iic_get_target_id(int cpu) +{ + return per_cpu(cpu_iic, cpu).target_id; +} + +EXPORT_SYMBOL_GPL(iic_get_target_id); + +#ifdef CONFIG_SMP + +/* Use the highest interrupt priorities for IPI */ +static inline int iic_msg_to_irq(int msg) +{ + return IIC_IRQ_TYPE_IPI + 0xf - msg; +} + +void iic_message_pass(int cpu, int msg) +{ + out_be64(&per_cpu(cpu_iic, cpu).regs->generate, (0xf - msg) << 4); +} + +struct irq_domain *iic_get_irq_host(int node) +{ + return iic_host; +} +EXPORT_SYMBOL_GPL(iic_get_irq_host); + +static void iic_request_ipi(int msg) +{ + int virq; + + virq = irq_create_mapping(iic_host, iic_msg_to_irq(msg)); + if (virq == NO_IRQ) { + printk(KERN_ERR + "iic: failed to map IPI %s\n", smp_ipi_name[msg]); + return; + } + + /* + * If smp_request_message_ipi encounters an error it will notify + * the error. If a message is not needed it will return non-zero. + */ + if (smp_request_message_ipi(virq, msg)) + irq_dispose_mapping(virq); +} + +void iic_request_IPIs(void) +{ + iic_request_ipi(PPC_MSG_CALL_FUNCTION); + iic_request_ipi(PPC_MSG_RESCHEDULE); + iic_request_ipi(PPC_MSG_TICK_BROADCAST); + iic_request_ipi(PPC_MSG_DEBUGGER_BREAK); +} + +#endif /* CONFIG_SMP */ + + +static int iic_host_match(struct irq_domain *h, struct device_node *node) +{ + return of_device_is_compatible(node, + "IBM,CBEA-Internal-Interrupt-Controller"); +} + +static int iic_host_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + switch (hw & IIC_IRQ_TYPE_MASK) { + case IIC_IRQ_TYPE_IPI: + irq_set_chip_and_handler(virq, &iic_chip, handle_percpu_irq); + break; + case IIC_IRQ_TYPE_IOEXC: + irq_set_chip_and_handler(virq, &iic_ioexc_chip, + handle_edge_eoi_irq); + break; + default: + irq_set_chip_and_handler(virq, &iic_chip, handle_edge_eoi_irq); + } + return 0; +} + +static int iic_host_xlate(struct irq_domain *h, struct device_node *ct, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) + +{ + unsigned int node, ext, unit, class; + const u32 *val; + + if (!of_device_is_compatible(ct, + "IBM,CBEA-Internal-Interrupt-Controller")) + return -ENODEV; + if (intsize != 1) + return -ENODEV; + val = of_get_property(ct, "#interrupt-cells", NULL); + if (val == NULL || *val != 1) + return -ENODEV; + + node = intspec[0] >> 24; + ext = (intspec[0] >> 16) & 0xff; + class = (intspec[0] >> 8) & 0xff; + unit = intspec[0] & 0xff; + + /* Check if node is in supported range */ + if (node > 1) + return -EINVAL; + + /* Build up interrupt number, special case for IO exceptions */ + *out_hwirq = (node << IIC_IRQ_NODE_SHIFT); + if (unit == IIC_UNIT_IIC && class == 1) + *out_hwirq |= IIC_IRQ_TYPE_IOEXC | ext; + else + *out_hwirq |= IIC_IRQ_TYPE_NORMAL | + (class << IIC_IRQ_CLASS_SHIFT) | unit; + + /* Dummy flags, ignored by iic code */ + *out_flags = IRQ_TYPE_EDGE_RISING; + + return 0; +} + +static const struct irq_domain_ops iic_host_ops = { + .match = iic_host_match, + .map = iic_host_map, + .xlate = iic_host_xlate, +}; + +static void __init init_one_iic(unsigned int hw_cpu, unsigned long addr, + struct device_node *node) +{ + /* XXX FIXME: should locate the linux CPU number from the HW cpu + * number properly. We are lucky for now + */ + struct iic *iic = &per_cpu(cpu_iic, hw_cpu); + + iic->regs = ioremap(addr, sizeof(struct cbe_iic_thread_regs)); + BUG_ON(iic->regs == NULL); + + iic->target_id = ((hw_cpu & 2) << 3) | ((hw_cpu & 1) ? 0xf : 0xe); + iic->eoi_stack[0] = 0xff; + iic->node = of_node_get(node); + out_be64(&iic->regs->prio, 0); + + printk(KERN_INFO "IIC for CPU %d target id 0x%x : %s\n", + hw_cpu, iic->target_id, node->full_name); +} + +static int __init setup_iic(void) +{ + struct device_node *dn; + struct resource r0, r1; + unsigned int node, cascade, found = 0; + struct cbe_iic_regs __iomem *node_iic; + const u32 *np; + + for (dn = NULL; + (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) { + if (!of_device_is_compatible(dn, + "IBM,CBEA-Internal-Interrupt-Controller")) + continue; + np = of_get_property(dn, "ibm,interrupt-server-ranges", NULL); + if (np == NULL) { + printk(KERN_WARNING "IIC: CPU association not found\n"); + of_node_put(dn); + return -ENODEV; + } + if (of_address_to_resource(dn, 0, &r0) || + of_address_to_resource(dn, 1, &r1)) { + printk(KERN_WARNING "IIC: Can't resolve addresses\n"); + of_node_put(dn); + return -ENODEV; + } + found++; + init_one_iic(np[0], r0.start, dn); + init_one_iic(np[1], r1.start, dn); + + /* Setup cascade for IO exceptions. XXX cleanup tricks to get + * node vs CPU etc... + * Note that we configure the IIC_IRR here with a hard coded + * priority of 1. We might want to improve that later. + */ + node = np[0] >> 1; + node_iic = cbe_get_cpu_iic_regs(np[0]); + cascade = node << IIC_IRQ_NODE_SHIFT; + cascade |= 1 << IIC_IRQ_CLASS_SHIFT; + cascade |= IIC_UNIT_IIC; + cascade = irq_create_mapping(iic_host, cascade); + if (cascade == NO_IRQ) + continue; + /* + * irq_data is a generic pointer that gets passed back + * to us later, so the forced cast is fine. + */ + irq_set_handler_data(cascade, (void __force *)node_iic); + irq_set_chained_handler(cascade, iic_ioexc_cascade); + out_be64(&node_iic->iic_ir, + (1 << 12) /* priority */ | + (node << 4) /* dest node */ | + IIC_UNIT_THREAD_0 /* route them to thread 0 */); + /* Flush pending (make sure it triggers if there is + * anything pending + */ + out_be64(&node_iic->iic_is, 0xfffffffffffffffful); + } + + if (found) + return 0; + else + return -ENODEV; +} + +void __init iic_init_IRQ(void) +{ + /* Setup an irq host data structure */ + iic_host = irq_domain_add_linear(NULL, IIC_SOURCE_COUNT, &iic_host_ops, + NULL); + BUG_ON(iic_host == NULL); + irq_set_default_host(iic_host); + + /* Discover and initialize iics */ + if (setup_iic() < 0) + panic("IIC: Failed to initialize !\n"); + + /* Set master interrupt handling function */ + ppc_md.get_irq = iic_get_irq; + + /* Enable on current CPU */ + iic_setup_cpu(); +} + +void iic_set_interrupt_routing(int cpu, int thread, int priority) +{ + struct cbe_iic_regs __iomem *iic_regs = cbe_get_cpu_iic_regs(cpu); + u64 iic_ir = 0; + int node = cpu >> 1; + + /* Set which node and thread will handle the next interrupt */ + iic_ir |= CBE_IIC_IR_PRIO(priority) | + CBE_IIC_IR_DEST_NODE(node); + if (thread == 0) + iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_0); + else + iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_1); + out_be64(&iic_regs->iic_ir, iic_ir); +} diff --git a/kernel/arch/powerpc/platforms/cell/interrupt.h b/kernel/arch/powerpc/platforms/cell/interrupt.h new file mode 100644 index 000000000..4f60ae6ca --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/interrupt.h @@ -0,0 +1,89 @@ +#ifndef ASM_CELL_PIC_H +#define ASM_CELL_PIC_H +#ifdef __KERNEL__ +/* + * Mapping of IIC pending bits into per-node interrupt numbers. + * + * Interrupt numbers are in the range 0...0x1ff where the top bit + * (0x100) represent the source node. Only 2 nodes are supported with + * the current code though it's trivial to extend that if necessary using + * higher level bits + * + * The bottom 8 bits are split into 2 type bits and 6 data bits that + * depend on the type: + * + * 00 (0x00 | data) : normal interrupt. data is (class << 4) | source + * 01 (0x40 | data) : IO exception. data is the exception number as + * defined by bit numbers in IIC_SR + * 10 (0x80 | data) : IPI. data is the IPI number (obtained from the priority) + * and node is always 0 (IPIs are per-cpu, their source is + * not relevant) + * 11 (0xc0 | data) : reserved + * + * In addition, interrupt number 0x80000000 is defined as always invalid + * (that is the node field is expected to never extend to move than 23 bits) + * + */ + +enum { + IIC_IRQ_INVALID = 0x80000000u, + IIC_IRQ_NODE_MASK = 0x100, + IIC_IRQ_NODE_SHIFT = 8, + IIC_IRQ_MAX = 0x1ff, + IIC_IRQ_TYPE_MASK = 0xc0, + IIC_IRQ_TYPE_NORMAL = 0x00, + IIC_IRQ_TYPE_IOEXC = 0x40, + IIC_IRQ_TYPE_IPI = 0x80, + IIC_IRQ_CLASS_SHIFT = 4, + IIC_IRQ_CLASS_0 = 0x00, + IIC_IRQ_CLASS_1 = 0x10, + IIC_IRQ_CLASS_2 = 0x20, + IIC_SOURCE_COUNT = 0x200, + + /* Here are defined the various source/dest units. Avoid using those + * definitions if you can, they are mostly here for reference + */ + IIC_UNIT_SPU_0 = 0x4, + IIC_UNIT_SPU_1 = 0x7, + IIC_UNIT_SPU_2 = 0x3, + IIC_UNIT_SPU_3 = 0x8, + IIC_UNIT_SPU_4 = 0x2, + IIC_UNIT_SPU_5 = 0x9, + IIC_UNIT_SPU_6 = 0x1, + IIC_UNIT_SPU_7 = 0xa, + IIC_UNIT_IOC_0 = 0x0, + IIC_UNIT_IOC_1 = 0xb, + IIC_UNIT_THREAD_0 = 0xe, /* target only */ + IIC_UNIT_THREAD_1 = 0xf, /* target only */ + IIC_UNIT_IIC = 0xe, /* source only (IO exceptions) */ + + /* Base numbers for the external interrupts */ + IIC_IRQ_EXT_IOIF0 = + IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_0, + IIC_IRQ_EXT_IOIF1 = + IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_1, + + /* Base numbers for the IIC_ISR interrupts */ + IIC_IRQ_IOEX_TMI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 63, + IIC_IRQ_IOEX_PMI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 62, + IIC_IRQ_IOEX_ATI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 61, + IIC_IRQ_IOEX_MATBFI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 60, + IIC_IRQ_IOEX_ELDI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 59, + + /* Which bits in IIC_ISR are edge sensitive */ + IIC_ISR_EDGE_MASK = 0x4ul, +}; + +extern void iic_init_IRQ(void); +extern void iic_message_pass(int cpu, int msg); +extern void iic_request_IPIs(void); +extern void iic_setup_cpu(void); + +extern u8 iic_get_target_id(int cpu); + +extern void spider_init_IRQ(void); + +extern void iic_set_interrupt_routing(int cpu, int thread, int priority); + +#endif +#endif /* ASM_CELL_PIC_H */ diff --git a/kernel/arch/powerpc/platforms/cell/iommu.c b/kernel/arch/powerpc/platforms/cell/iommu.c new file mode 100644 index 000000000..21b502398 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/iommu.c @@ -0,0 +1,1237 @@ +/* + * IOMMU implementation for Cell Broadband Processor Architecture + * + * (C) Copyright IBM Corporation 2006-2008 + * + * Author: Jeremy Kerr <jk@ozlabs.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/slab.h> +#include <linux/memblock.h> + +#include <asm/prom.h> +#include <asm/iommu.h> +#include <asm/machdep.h> +#include <asm/pci-bridge.h> +#include <asm/udbg.h> +#include <asm/firmware.h> +#include <asm/cell-regs.h> + +#include "cell.h" +#include "interrupt.h" + +/* Define CELL_IOMMU_REAL_UNMAP to actually unmap non-used pages + * instead of leaving them mapped to some dummy page. This can be + * enabled once the appropriate workarounds for spider bugs have + * been enabled + */ +#define CELL_IOMMU_REAL_UNMAP + +/* Define CELL_IOMMU_STRICT_PROTECTION to enforce protection of + * IO PTEs based on the transfer direction. That can be enabled + * once spider-net has been fixed to pass the correct direction + * to the DMA mapping functions + */ +#define CELL_IOMMU_STRICT_PROTECTION + + +#define NR_IOMMUS 2 + +/* IOC mmap registers */ +#define IOC_Reg_Size 0x2000 + +#define IOC_IOPT_CacheInvd 0x908 +#define IOC_IOPT_CacheInvd_NE_Mask 0xffe0000000000000ul +#define IOC_IOPT_CacheInvd_IOPTE_Mask 0x000003fffffffff8ul +#define IOC_IOPT_CacheInvd_Busy 0x0000000000000001ul + +#define IOC_IOST_Origin 0x918 +#define IOC_IOST_Origin_E 0x8000000000000000ul +#define IOC_IOST_Origin_HW 0x0000000000000800ul +#define IOC_IOST_Origin_HL 0x0000000000000400ul + +#define IOC_IO_ExcpStat 0x920 +#define IOC_IO_ExcpStat_V 0x8000000000000000ul +#define IOC_IO_ExcpStat_SPF_Mask 0x6000000000000000ul +#define IOC_IO_ExcpStat_SPF_S 0x6000000000000000ul +#define IOC_IO_ExcpStat_SPF_P 0x2000000000000000ul +#define IOC_IO_ExcpStat_ADDR_Mask 0x00000007fffff000ul +#define IOC_IO_ExcpStat_RW_Mask 0x0000000000000800ul +#define IOC_IO_ExcpStat_IOID_Mask 0x00000000000007fful + +#define IOC_IO_ExcpMask 0x928 +#define IOC_IO_ExcpMask_SFE 0x4000000000000000ul +#define IOC_IO_ExcpMask_PFE 0x2000000000000000ul + +#define IOC_IOCmd_Offset 0x1000 + +#define IOC_IOCmd_Cfg 0xc00 +#define IOC_IOCmd_Cfg_TE 0x0000800000000000ul + + +/* Segment table entries */ +#define IOSTE_V 0x8000000000000000ul /* valid */ +#define IOSTE_H 0x4000000000000000ul /* cache hint */ +#define IOSTE_PT_Base_RPN_Mask 0x3ffffffffffff000ul /* base RPN of IOPT */ +#define IOSTE_NPPT_Mask 0x0000000000000fe0ul /* no. pages in IOPT */ +#define IOSTE_PS_Mask 0x0000000000000007ul /* page size */ +#define IOSTE_PS_4K 0x0000000000000001ul /* - 4kB */ +#define IOSTE_PS_64K 0x0000000000000003ul /* - 64kB */ +#define IOSTE_PS_1M 0x0000000000000005ul /* - 1MB */ +#define IOSTE_PS_16M 0x0000000000000007ul /* - 16MB */ + + +/* IOMMU sizing */ +#define IO_SEGMENT_SHIFT 28 +#define IO_PAGENO_BITS(shift) (IO_SEGMENT_SHIFT - (shift)) + +/* The high bit needs to be set on every DMA address */ +#define SPIDER_DMA_OFFSET 0x80000000ul + +struct iommu_window { + struct list_head list; + struct cbe_iommu *iommu; + unsigned long offset; + unsigned long size; + unsigned int ioid; + struct iommu_table table; +}; + +#define NAMESIZE 8 +struct cbe_iommu { + int nid; + char name[NAMESIZE]; + void __iomem *xlate_regs; + void __iomem *cmd_regs; + unsigned long *stab; + unsigned long *ptab; + void *pad_page; + struct list_head windows; +}; + +/* Static array of iommus, one per node + * each contains a list of windows, keyed from dma_window property + * - on bus setup, look for a matching window, or create one + * - on dev setup, assign iommu_table ptr + */ +static struct cbe_iommu iommus[NR_IOMMUS]; +static int cbe_nr_iommus; + +static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte, + long n_ptes) +{ + u64 __iomem *reg; + u64 val; + long n; + + reg = iommu->xlate_regs + IOC_IOPT_CacheInvd; + + while (n_ptes > 0) { + /* we can invalidate up to 1 << 11 PTEs at once */ + n = min(n_ptes, 1l << 11); + val = (((n /*- 1*/) << 53) & IOC_IOPT_CacheInvd_NE_Mask) + | (__pa(pte) & IOC_IOPT_CacheInvd_IOPTE_Mask) + | IOC_IOPT_CacheInvd_Busy; + + out_be64(reg, val); + while (in_be64(reg) & IOC_IOPT_CacheInvd_Busy) + ; + + n_ptes -= n; + pte += n; + } +} + +static int tce_build_cell(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + int i; + unsigned long *io_pte, base_pte; + struct iommu_window *window = + container_of(tbl, struct iommu_window, table); + + /* implementing proper protection causes problems with the spidernet + * driver - check mapping directions later, but allow read & write by + * default for now.*/ +#ifdef CELL_IOMMU_STRICT_PROTECTION + /* to avoid referencing a global, we use a trick here to setup the + * protection bit. "prot" is setup to be 3 fields of 4 bits apprended + * together for each of the 3 supported direction values. It is then + * shifted left so that the fields matching the desired direction + * lands on the appropriate bits, and other bits are masked out. + */ + const unsigned long prot = 0xc48; + base_pte = + ((prot << (52 + 4 * direction)) & + (CBE_IOPTE_PP_W | CBE_IOPTE_PP_R)) | + CBE_IOPTE_M | CBE_IOPTE_SO_RW | + (window->ioid & CBE_IOPTE_IOID_Mask); +#else + base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M | + CBE_IOPTE_SO_RW | (window->ioid & CBE_IOPTE_IOID_Mask); +#endif + if (unlikely(dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))) + base_pte &= ~CBE_IOPTE_SO_RW; + + io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset); + + for (i = 0; i < npages; i++, uaddr += (1 << tbl->it_page_shift)) + io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask); + + mb(); + + invalidate_tce_cache(window->iommu, io_pte, npages); + + pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n", + index, npages, direction, base_pte); + return 0; +} + +static void tce_free_cell(struct iommu_table *tbl, long index, long npages) +{ + + int i; + unsigned long *io_pte, pte; + struct iommu_window *window = + container_of(tbl, struct iommu_window, table); + + pr_debug("tce_free_cell(index=%lx,n=%lx)\n", index, npages); + +#ifdef CELL_IOMMU_REAL_UNMAP + pte = 0; +#else + /* spider bridge does PCI reads after freeing - insert a mapping + * to a scratch page instead of an invalid entry */ + pte = CBE_IOPTE_PP_R | CBE_IOPTE_M | CBE_IOPTE_SO_RW | + __pa(window->iommu->pad_page) | + (window->ioid & CBE_IOPTE_IOID_Mask); +#endif + + io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset); + + for (i = 0; i < npages; i++) + io_pte[i] = pte; + + mb(); + + invalidate_tce_cache(window->iommu, io_pte, npages); +} + +static irqreturn_t ioc_interrupt(int irq, void *data) +{ + unsigned long stat, spf; + struct cbe_iommu *iommu = data; + + stat = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat); + spf = stat & IOC_IO_ExcpStat_SPF_Mask; + + /* Might want to rate limit it */ + printk(KERN_ERR "iommu: DMA exception 0x%016lx\n", stat); + printk(KERN_ERR " V=%d, SPF=[%c%c], RW=%s, IOID=0x%04x\n", + !!(stat & IOC_IO_ExcpStat_V), + (spf == IOC_IO_ExcpStat_SPF_S) ? 'S' : ' ', + (spf == IOC_IO_ExcpStat_SPF_P) ? 'P' : ' ', + (stat & IOC_IO_ExcpStat_RW_Mask) ? "Read" : "Write", + (unsigned int)(stat & IOC_IO_ExcpStat_IOID_Mask)); + printk(KERN_ERR " page=0x%016lx\n", + stat & IOC_IO_ExcpStat_ADDR_Mask); + + /* clear interrupt */ + stat &= ~IOC_IO_ExcpStat_V; + out_be64(iommu->xlate_regs + IOC_IO_ExcpStat, stat); + + return IRQ_HANDLED; +} + +static int cell_iommu_find_ioc(int nid, unsigned long *base) +{ + struct device_node *np; + struct resource r; + + *base = 0; + + /* First look for new style /be nodes */ + for_each_node_by_name(np, "ioc") { + if (of_node_to_nid(np) != nid) + continue; + if (of_address_to_resource(np, 0, &r)) { + printk(KERN_ERR "iommu: can't get address for %s\n", + np->full_name); + continue; + } + *base = r.start; + of_node_put(np); + return 0; + } + + /* Ok, let's try the old way */ + for_each_node_by_type(np, "cpu") { + const unsigned int *nidp; + const unsigned long *tmp; + + nidp = of_get_property(np, "node-id", NULL); + if (nidp && *nidp == nid) { + tmp = of_get_property(np, "ioc-translation", NULL); + if (tmp) { + *base = *tmp; + of_node_put(np); + return 0; + } + } + } + + return -ENODEV; +} + +static void cell_iommu_setup_stab(struct cbe_iommu *iommu, + unsigned long dbase, unsigned long dsize, + unsigned long fbase, unsigned long fsize) +{ + struct page *page; + unsigned long segments, stab_size; + + segments = max(dbase + dsize, fbase + fsize) >> IO_SEGMENT_SHIFT; + + pr_debug("%s: iommu[%d]: segments: %lu\n", + __func__, iommu->nid, segments); + + /* set up the segment table */ + stab_size = segments * sizeof(unsigned long); + page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(stab_size)); + BUG_ON(!page); + iommu->stab = page_address(page); + memset(iommu->stab, 0, stab_size); +} + +static unsigned long *cell_iommu_alloc_ptab(struct cbe_iommu *iommu, + unsigned long base, unsigned long size, unsigned long gap_base, + unsigned long gap_size, unsigned long page_shift) +{ + struct page *page; + int i; + unsigned long reg, segments, pages_per_segment, ptab_size, + n_pte_pages, start_seg, *ptab; + + start_seg = base >> IO_SEGMENT_SHIFT; + segments = size >> IO_SEGMENT_SHIFT; + pages_per_segment = 1ull << IO_PAGENO_BITS(page_shift); + /* PTEs for each segment must start on a 4K bounday */ + pages_per_segment = max(pages_per_segment, + (1 << 12) / sizeof(unsigned long)); + + ptab_size = segments * pages_per_segment * sizeof(unsigned long); + pr_debug("%s: iommu[%d]: ptab_size: %lu, order: %d\n", __func__, + iommu->nid, ptab_size, get_order(ptab_size)); + page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(ptab_size)); + BUG_ON(!page); + + ptab = page_address(page); + memset(ptab, 0, ptab_size); + + /* number of 4K pages needed for a page table */ + n_pte_pages = (pages_per_segment * sizeof(unsigned long)) >> 12; + + pr_debug("%s: iommu[%d]: stab at %p, ptab at %p, n_pte_pages: %lu\n", + __func__, iommu->nid, iommu->stab, ptab, + n_pte_pages); + + /* initialise the STEs */ + reg = IOSTE_V | ((n_pte_pages - 1) << 5); + + switch (page_shift) { + case 12: reg |= IOSTE_PS_4K; break; + case 16: reg |= IOSTE_PS_64K; break; + case 20: reg |= IOSTE_PS_1M; break; + case 24: reg |= IOSTE_PS_16M; break; + default: BUG(); + } + + gap_base = gap_base >> IO_SEGMENT_SHIFT; + gap_size = gap_size >> IO_SEGMENT_SHIFT; + + pr_debug("Setting up IOMMU stab:\n"); + for (i = start_seg; i < (start_seg + segments); i++) { + if (i >= gap_base && i < (gap_base + gap_size)) { + pr_debug("\toverlap at %d, skipping\n", i); + continue; + } + iommu->stab[i] = reg | (__pa(ptab) + (n_pte_pages << 12) * + (i - start_seg)); + pr_debug("\t[%d] 0x%016lx\n", i, iommu->stab[i]); + } + + return ptab; +} + +static void cell_iommu_enable_hardware(struct cbe_iommu *iommu) +{ + int ret; + unsigned long reg, xlate_base; + unsigned int virq; + + if (cell_iommu_find_ioc(iommu->nid, &xlate_base)) + panic("%s: missing IOC register mappings for node %d\n", + __func__, iommu->nid); + + iommu->xlate_regs = ioremap(xlate_base, IOC_Reg_Size); + iommu->cmd_regs = iommu->xlate_regs + IOC_IOCmd_Offset; + + /* ensure that the STEs have updated */ + mb(); + + /* setup interrupts for the iommu. */ + reg = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat); + out_be64(iommu->xlate_regs + IOC_IO_ExcpStat, + reg & ~IOC_IO_ExcpStat_V); + out_be64(iommu->xlate_regs + IOC_IO_ExcpMask, + IOC_IO_ExcpMask_PFE | IOC_IO_ExcpMask_SFE); + + virq = irq_create_mapping(NULL, + IIC_IRQ_IOEX_ATI | (iommu->nid << IIC_IRQ_NODE_SHIFT)); + BUG_ON(virq == NO_IRQ); + + ret = request_irq(virq, ioc_interrupt, 0, iommu->name, iommu); + BUG_ON(ret); + + /* set the IOC segment table origin register (and turn on the iommu) */ + reg = IOC_IOST_Origin_E | __pa(iommu->stab) | IOC_IOST_Origin_HW; + out_be64(iommu->xlate_regs + IOC_IOST_Origin, reg); + in_be64(iommu->xlate_regs + IOC_IOST_Origin); + + /* turn on IO translation */ + reg = in_be64(iommu->cmd_regs + IOC_IOCmd_Cfg) | IOC_IOCmd_Cfg_TE; + out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg); +} + +static void cell_iommu_setup_hardware(struct cbe_iommu *iommu, + unsigned long base, unsigned long size) +{ + cell_iommu_setup_stab(iommu, base, size, 0, 0); + iommu->ptab = cell_iommu_alloc_ptab(iommu, base, size, 0, 0, + IOMMU_PAGE_SHIFT_4K); + cell_iommu_enable_hardware(iommu); +} + +#if 0/* Unused for now */ +static struct iommu_window *find_window(struct cbe_iommu *iommu, + unsigned long offset, unsigned long size) +{ + struct iommu_window *window; + + /* todo: check for overlapping (but not equal) windows) */ + + list_for_each_entry(window, &(iommu->windows), list) { + if (window->offset == offset && window->size == size) + return window; + } + + return NULL; +} +#endif + +static inline u32 cell_iommu_get_ioid(struct device_node *np) +{ + const u32 *ioid; + + ioid = of_get_property(np, "ioid", NULL); + if (ioid == NULL) { + printk(KERN_WARNING "iommu: missing ioid for %s using 0\n", + np->full_name); + return 0; + } + + return *ioid; +} + +static struct iommu_window * __init +cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, + unsigned long offset, unsigned long size, + unsigned long pte_offset) +{ + struct iommu_window *window; + struct page *page; + u32 ioid; + + ioid = cell_iommu_get_ioid(np); + + window = kzalloc_node(sizeof(*window), GFP_KERNEL, iommu->nid); + BUG_ON(window == NULL); + + window->offset = offset; + window->size = size; + window->ioid = ioid; + window->iommu = iommu; + + window->table.it_blocksize = 16; + window->table.it_base = (unsigned long)iommu->ptab; + window->table.it_index = iommu->nid; + window->table.it_page_shift = IOMMU_PAGE_SHIFT_4K; + window->table.it_offset = + (offset >> window->table.it_page_shift) + pte_offset; + window->table.it_size = size >> window->table.it_page_shift; + + iommu_init_table(&window->table, iommu->nid); + + pr_debug("\tioid %d\n", window->ioid); + pr_debug("\tblocksize %ld\n", window->table.it_blocksize); + pr_debug("\tbase 0x%016lx\n", window->table.it_base); + pr_debug("\toffset 0x%lx\n", window->table.it_offset); + pr_debug("\tsize %ld\n", window->table.it_size); + + list_add(&window->list, &iommu->windows); + + if (offset != 0) + return window; + + /* We need to map and reserve the first IOMMU page since it's used + * by the spider workaround. In theory, we only need to do that when + * running on spider but it doesn't really matter. + * + * This code also assumes that we have a window that starts at 0, + * which is the case on all spider based blades. + */ + page = alloc_pages_node(iommu->nid, GFP_KERNEL, 0); + BUG_ON(!page); + iommu->pad_page = page_address(page); + clear_page(iommu->pad_page); + + __set_bit(0, window->table.it_map); + tce_build_cell(&window->table, window->table.it_offset, 1, + (unsigned long)iommu->pad_page, DMA_TO_DEVICE, NULL); + + return window; +} + +static struct cbe_iommu *cell_iommu_for_node(int nid) +{ + int i; + + for (i = 0; i < cbe_nr_iommus; i++) + if (iommus[i].nid == nid) + return &iommus[i]; + return NULL; +} + +static unsigned long cell_dma_direct_offset; + +static unsigned long dma_iommu_fixed_base; + +/* iommu_fixed_is_weak is set if booted with iommu_fixed=weak */ +static int iommu_fixed_is_weak; + +static struct iommu_table *cell_get_iommu_table(struct device *dev) +{ + struct iommu_window *window; + struct cbe_iommu *iommu; + + /* Current implementation uses the first window available in that + * node's iommu. We -might- do something smarter later though it may + * never be necessary + */ + iommu = cell_iommu_for_node(dev_to_node(dev)); + if (iommu == NULL || list_empty(&iommu->windows)) { + dev_err(dev, "iommu: missing iommu for %s (node %d)\n", + of_node_full_name(dev->of_node), dev_to_node(dev)); + return NULL; + } + window = list_entry(iommu->windows.next, struct iommu_window, list); + + return &window->table; +} + +/* A coherent allocation implies strong ordering */ + +static void *dma_fixed_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + if (iommu_fixed_is_weak) + return iommu_alloc_coherent(dev, cell_get_iommu_table(dev), + size, dma_handle, + device_to_mask(dev), flag, + dev_to_node(dev)); + else + return dma_direct_ops.alloc(dev, size, dma_handle, flag, + attrs); +} + +static void dma_fixed_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + if (iommu_fixed_is_weak) + iommu_free_coherent(cell_get_iommu_table(dev), size, vaddr, + dma_handle); + else + dma_direct_ops.free(dev, size, vaddr, dma_handle, attrs); +} + +static dma_addr_t dma_fixed_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)) + return dma_direct_ops.map_page(dev, page, offset, size, + direction, attrs); + else + return iommu_map_page(dev, cell_get_iommu_table(dev), page, + offset, size, device_to_mask(dev), + direction, attrs); +} + +static void dma_fixed_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)) + dma_direct_ops.unmap_page(dev, dma_addr, size, direction, + attrs); + else + iommu_unmap_page(cell_get_iommu_table(dev), dma_addr, size, + direction, attrs); +} + +static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)) + return dma_direct_ops.map_sg(dev, sg, nents, direction, attrs); + else + return ppc_iommu_map_sg(dev, cell_get_iommu_table(dev), sg, + nents, device_to_mask(dev), + direction, attrs); +} + +static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)) + dma_direct_ops.unmap_sg(dev, sg, nents, direction, attrs); + else + ppc_iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents, + direction, attrs); +} + +static int dma_fixed_dma_supported(struct device *dev, u64 mask) +{ + return mask == DMA_BIT_MASK(64); +} + +static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask); + +struct dma_map_ops dma_iommu_fixed_ops = { + .alloc = dma_fixed_alloc_coherent, + .free = dma_fixed_free_coherent, + .map_sg = dma_fixed_map_sg, + .unmap_sg = dma_fixed_unmap_sg, + .dma_supported = dma_fixed_dma_supported, + .set_dma_mask = dma_set_mask_and_switch, + .map_page = dma_fixed_map_page, + .unmap_page = dma_fixed_unmap_page, +}; + +static void cell_dma_dev_setup_fixed(struct device *dev); + +static void cell_dma_dev_setup(struct device *dev) +{ + /* Order is important here, these are not mutually exclusive */ + if (get_dma_ops(dev) == &dma_iommu_fixed_ops) + cell_dma_dev_setup_fixed(dev); + else if (get_pci_dma_ops() == &dma_iommu_ops) + set_iommu_table_base(dev, cell_get_iommu_table(dev)); + else if (get_pci_dma_ops() == &dma_direct_ops) + set_dma_offset(dev, cell_dma_direct_offset); + else + BUG(); +} + +static void cell_pci_dma_dev_setup(struct pci_dev *dev) +{ + cell_dma_dev_setup(&dev->dev); +} + +static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct device *dev = data; + + /* We are only intereted in device addition */ + if (action != BUS_NOTIFY_ADD_DEVICE) + return 0; + + /* We use the PCI DMA ops */ + dev->archdata.dma_ops = get_pci_dma_ops(); + + cell_dma_dev_setup(dev); + + return 0; +} + +static struct notifier_block cell_of_bus_notifier = { + .notifier_call = cell_of_bus_notify +}; + +static int __init cell_iommu_get_window(struct device_node *np, + unsigned long *base, + unsigned long *size) +{ + const __be32 *dma_window; + unsigned long index; + + /* Use ibm,dma-window if available, else, hard code ! */ + dma_window = of_get_property(np, "ibm,dma-window", NULL); + if (dma_window == NULL) { + *base = 0; + *size = 0x80000000u; + return -ENODEV; + } + + of_parse_dma_window(np, dma_window, &index, base, size); + return 0; +} + +static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np) +{ + struct cbe_iommu *iommu; + int nid, i; + + /* Get node ID */ + nid = of_node_to_nid(np); + if (nid < 0) { + printk(KERN_ERR "iommu: failed to get node for %s\n", + np->full_name); + return NULL; + } + pr_debug("iommu: setting up iommu for node %d (%s)\n", + nid, np->full_name); + + /* XXX todo: If we can have multiple windows on the same IOMMU, which + * isn't the case today, we probably want here to check whether the + * iommu for that node is already setup. + * However, there might be issue with getting the size right so let's + * ignore that for now. We might want to completely get rid of the + * multiple window support since the cell iommu supports per-page ioids + */ + + if (cbe_nr_iommus >= NR_IOMMUS) { + printk(KERN_ERR "iommu: too many IOMMUs detected ! (%s)\n", + np->full_name); + return NULL; + } + + /* Init base fields */ + i = cbe_nr_iommus++; + iommu = &iommus[i]; + iommu->stab = NULL; + iommu->nid = nid; + snprintf(iommu->name, sizeof(iommu->name), "iommu%d", i); + INIT_LIST_HEAD(&iommu->windows); + + return iommu; +} + +static void __init cell_iommu_init_one(struct device_node *np, + unsigned long offset) +{ + struct cbe_iommu *iommu; + unsigned long base, size; + + iommu = cell_iommu_alloc(np); + if (!iommu) + return; + + /* Obtain a window for it */ + cell_iommu_get_window(np, &base, &size); + + pr_debug("\ttranslating window 0x%lx...0x%lx\n", + base, base + size - 1); + + /* Initialize the hardware */ + cell_iommu_setup_hardware(iommu, base, size); + + /* Setup the iommu_table */ + cell_iommu_setup_window(iommu, np, base, size, + offset >> IOMMU_PAGE_SHIFT_4K); +} + +static void __init cell_disable_iommus(void) +{ + int node; + unsigned long base, val; + void __iomem *xregs, *cregs; + + /* Make sure IOC translation is disabled on all nodes */ + for_each_online_node(node) { + if (cell_iommu_find_ioc(node, &base)) + continue; + xregs = ioremap(base, IOC_Reg_Size); + if (xregs == NULL) + continue; + cregs = xregs + IOC_IOCmd_Offset; + + pr_debug("iommu: cleaning up iommu on node %d\n", node); + + out_be64(xregs + IOC_IOST_Origin, 0); + (void)in_be64(xregs + IOC_IOST_Origin); + val = in_be64(cregs + IOC_IOCmd_Cfg); + val &= ~IOC_IOCmd_Cfg_TE; + out_be64(cregs + IOC_IOCmd_Cfg, val); + (void)in_be64(cregs + IOC_IOCmd_Cfg); + + iounmap(xregs); + } +} + +static int __init cell_iommu_init_disabled(void) +{ + struct device_node *np = NULL; + unsigned long base = 0, size; + + /* When no iommu is present, we use direct DMA ops */ + set_pci_dma_ops(&dma_direct_ops); + + /* First make sure all IOC translation is turned off */ + cell_disable_iommus(); + + /* If we have no Axon, we set up the spider DMA magic offset */ + if (of_find_node_by_name(NULL, "axon") == NULL) + cell_dma_direct_offset = SPIDER_DMA_OFFSET; + + /* Now we need to check to see where the memory is mapped + * in PCI space. We assume that all busses use the same dma + * window which is always the case so far on Cell, thus we + * pick up the first pci-internal node we can find and check + * the DMA window from there. + */ + for_each_node_by_name(np, "axon") { + if (np->parent == NULL || np->parent->parent != NULL) + continue; + if (cell_iommu_get_window(np, &base, &size) == 0) + break; + } + if (np == NULL) { + for_each_node_by_name(np, "pci-internal") { + if (np->parent == NULL || np->parent->parent != NULL) + continue; + if (cell_iommu_get_window(np, &base, &size) == 0) + break; + } + } + of_node_put(np); + + /* If we found a DMA window, we check if it's big enough to enclose + * all of physical memory. If not, we force enable IOMMU + */ + if (np && size < memblock_end_of_DRAM()) { + printk(KERN_WARNING "iommu: force-enabled, dma window" + " (%ldMB) smaller than total memory (%lldMB)\n", + size >> 20, memblock_end_of_DRAM() >> 20); + return -ENODEV; + } + + cell_dma_direct_offset += base; + + if (cell_dma_direct_offset != 0) + cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup; + + printk("iommu: disabled, direct DMA offset is 0x%lx\n", + cell_dma_direct_offset); + + return 0; +} + +/* + * Fixed IOMMU mapping support + * + * This code adds support for setting up a fixed IOMMU mapping on certain + * cell machines. For 64-bit devices this avoids the performance overhead of + * mapping and unmapping pages at runtime. 32-bit devices are unable to use + * the fixed mapping. + * + * The fixed mapping is established at boot, and maps all of physical memory + * 1:1 into device space at some offset. On machines with < 30 GB of memory + * we setup the fixed mapping immediately above the normal IOMMU window. + * + * For example a machine with 4GB of memory would end up with the normal + * IOMMU window from 0-2GB and the fixed mapping window from 2GB to 6GB. In + * this case a 64-bit device wishing to DMA to 1GB would be told to DMA to + * 3GB, plus any offset required by firmware. The firmware offset is encoded + * in the "dma-ranges" property. + * + * On machines with 30GB or more of memory, we are unable to place the fixed + * mapping above the normal IOMMU window as we would run out of address space. + * Instead we move the normal IOMMU window to coincide with the hash page + * table, this region does not need to be part of the fixed mapping as no + * device should ever be DMA'ing to it. We then setup the fixed mapping + * from 0 to 32GB. + */ + +static u64 cell_iommu_get_fixed_address(struct device *dev) +{ + u64 cpu_addr, size, best_size, dev_addr = OF_BAD_ADDR; + struct device_node *np; + const u32 *ranges = NULL; + int i, len, best, naddr, nsize, pna, range_size; + + np = of_node_get(dev->of_node); + while (1) { + naddr = of_n_addr_cells(np); + nsize = of_n_size_cells(np); + np = of_get_next_parent(np); + if (!np) + break; + + ranges = of_get_property(np, "dma-ranges", &len); + + /* Ignore empty ranges, they imply no translation required */ + if (ranges && len > 0) + break; + } + + if (!ranges) { + dev_dbg(dev, "iommu: no dma-ranges found\n"); + goto out; + } + + len /= sizeof(u32); + + pna = of_n_addr_cells(np); + range_size = naddr + nsize + pna; + + /* dma-ranges format: + * child addr : naddr cells + * parent addr : pna cells + * size : nsize cells + */ + for (i = 0, best = -1, best_size = 0; i < len; i += range_size) { + cpu_addr = of_translate_dma_address(np, ranges + i + naddr); + size = of_read_number(ranges + i + naddr + pna, nsize); + + if (cpu_addr == 0 && size > best_size) { + best = i; + best_size = size; + } + } + + if (best >= 0) { + dev_addr = of_read_number(ranges + best, naddr); + } else + dev_dbg(dev, "iommu: no suitable range found!\n"); + +out: + of_node_put(np); + + return dev_addr; +} + +static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask) +{ + if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + return -EIO; + + if (dma_mask == DMA_BIT_MASK(64) && + cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR) + { + dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n"); + set_dma_ops(dev, &dma_iommu_fixed_ops); + } else { + dev_dbg(dev, "iommu: not 64-bit, using default ops\n"); + set_dma_ops(dev, get_pci_dma_ops()); + } + + cell_dma_dev_setup(dev); + + *dev->dma_mask = dma_mask; + + return 0; +} + +static void cell_dma_dev_setup_fixed(struct device *dev) +{ + u64 addr; + + addr = cell_iommu_get_fixed_address(dev) + dma_iommu_fixed_base; + set_dma_offset(dev, addr); + + dev_dbg(dev, "iommu: fixed addr = %llx\n", addr); +} + +static void insert_16M_pte(unsigned long addr, unsigned long *ptab, + unsigned long base_pte) +{ + unsigned long segment, offset; + + segment = addr >> IO_SEGMENT_SHIFT; + offset = (addr >> 24) - (segment << IO_PAGENO_BITS(24)); + ptab = ptab + (segment * (1 << 12) / sizeof(unsigned long)); + + pr_debug("iommu: addr %lx ptab %p segment %lx offset %lx\n", + addr, ptab, segment, offset); + + ptab[offset] = base_pte | (__pa(addr) & CBE_IOPTE_RPN_Mask); +} + +static void cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu, + struct device_node *np, unsigned long dbase, unsigned long dsize, + unsigned long fbase, unsigned long fsize) +{ + unsigned long base_pte, uaddr, ioaddr, *ptab; + + ptab = cell_iommu_alloc_ptab(iommu, fbase, fsize, dbase, dsize, 24); + + dma_iommu_fixed_base = fbase; + + pr_debug("iommu: mapping 0x%lx pages from 0x%lx\n", fsize, fbase); + + base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M | + (cell_iommu_get_ioid(np) & CBE_IOPTE_IOID_Mask); + + if (iommu_fixed_is_weak) + pr_info("IOMMU: Using weak ordering for fixed mapping\n"); + else { + pr_info("IOMMU: Using strong ordering for fixed mapping\n"); + base_pte |= CBE_IOPTE_SO_RW; + } + + for (uaddr = 0; uaddr < fsize; uaddr += (1 << 24)) { + /* Don't touch the dynamic region */ + ioaddr = uaddr + fbase; + if (ioaddr >= dbase && ioaddr < (dbase + dsize)) { + pr_debug("iommu: fixed/dynamic overlap, skipping\n"); + continue; + } + + insert_16M_pte(uaddr, ptab, base_pte); + } + + mb(); +} + +static int __init cell_iommu_fixed_mapping_init(void) +{ + unsigned long dbase, dsize, fbase, fsize, hbase, hend; + struct cbe_iommu *iommu; + struct device_node *np; + + /* The fixed mapping is only supported on axon machines */ + np = of_find_node_by_name(NULL, "axon"); + of_node_put(np); + + if (!np) { + pr_debug("iommu: fixed mapping disabled, no axons found\n"); + return -1; + } + + /* We must have dma-ranges properties for fixed mapping to work */ + np = of_find_node_with_property(NULL, "dma-ranges"); + of_node_put(np); + + if (!np) { + pr_debug("iommu: no dma-ranges found, no fixed mapping\n"); + return -1; + } + + /* The default setup is to have the fixed mapping sit after the + * dynamic region, so find the top of the largest IOMMU window + * on any axon, then add the size of RAM and that's our max value. + * If that is > 32GB we have to do other shennanigans. + */ + fbase = 0; + for_each_node_by_name(np, "axon") { + cell_iommu_get_window(np, &dbase, &dsize); + fbase = max(fbase, dbase + dsize); + } + + fbase = _ALIGN_UP(fbase, 1 << IO_SEGMENT_SHIFT); + fsize = memblock_phys_mem_size(); + + if ((fbase + fsize) <= 0x800000000ul) + hbase = 0; /* use the device tree window */ + else { + /* If we're over 32 GB we need to cheat. We can't map all of + * RAM with the fixed mapping, and also fit the dynamic + * region. So try to place the dynamic region where the hash + * table sits, drivers never need to DMA to it, we don't + * need a fixed mapping for that area. + */ + if (!htab_address) { + pr_debug("iommu: htab is NULL, on LPAR? Huh?\n"); + return -1; + } + hbase = __pa(htab_address); + hend = hbase + htab_size_bytes; + + /* The window must start and end on a segment boundary */ + if ((hbase != _ALIGN_UP(hbase, 1 << IO_SEGMENT_SHIFT)) || + (hend != _ALIGN_UP(hend, 1 << IO_SEGMENT_SHIFT))) { + pr_debug("iommu: hash window not segment aligned\n"); + return -1; + } + + /* Check the hash window fits inside the real DMA window */ + for_each_node_by_name(np, "axon") { + cell_iommu_get_window(np, &dbase, &dsize); + + if (hbase < dbase || (hend > (dbase + dsize))) { + pr_debug("iommu: hash window doesn't fit in" + "real DMA window\n"); + return -1; + } + } + + fbase = 0; + } + + /* Setup the dynamic regions */ + for_each_node_by_name(np, "axon") { + iommu = cell_iommu_alloc(np); + BUG_ON(!iommu); + + if (hbase == 0) + cell_iommu_get_window(np, &dbase, &dsize); + else { + dbase = hbase; + dsize = htab_size_bytes; + } + + printk(KERN_DEBUG "iommu: node %d, dynamic window 0x%lx-0x%lx " + "fixed window 0x%lx-0x%lx\n", iommu->nid, dbase, + dbase + dsize, fbase, fbase + fsize); + + cell_iommu_setup_stab(iommu, dbase, dsize, fbase, fsize); + iommu->ptab = cell_iommu_alloc_ptab(iommu, dbase, dsize, 0, 0, + IOMMU_PAGE_SHIFT_4K); + cell_iommu_setup_fixed_ptab(iommu, np, dbase, dsize, + fbase, fsize); + cell_iommu_enable_hardware(iommu); + cell_iommu_setup_window(iommu, np, dbase, dsize, 0); + } + + dma_iommu_ops.set_dma_mask = dma_set_mask_and_switch; + set_pci_dma_ops(&dma_iommu_ops); + + return 0; +} + +static int iommu_fixed_disabled; + +static int __init setup_iommu_fixed(char *str) +{ + struct device_node *pciep; + + if (strcmp(str, "off") == 0) + iommu_fixed_disabled = 1; + + /* If we can find a pcie-endpoint in the device tree assume that + * we're on a triblade or a CAB so by default the fixed mapping + * should be set to be weakly ordered; but only if the boot + * option WASN'T set for strong ordering + */ + pciep = of_find_node_by_type(NULL, "pcie-endpoint"); + + if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0)) + iommu_fixed_is_weak = 1; + + of_node_put(pciep); + + return 1; +} +__setup("iommu_fixed=", setup_iommu_fixed); + +static u64 cell_dma_get_required_mask(struct device *dev) +{ + struct dma_map_ops *dma_ops; + + if (!dev->dma_mask) + return 0; + + if (!iommu_fixed_disabled && + cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR) + return DMA_BIT_MASK(64); + + dma_ops = get_dma_ops(dev); + if (dma_ops->get_required_mask) + return dma_ops->get_required_mask(dev); + + WARN_ONCE(1, "no get_required_mask in %p ops", dma_ops); + + return DMA_BIT_MASK(64); +} + +static int __init cell_iommu_init(void) +{ + struct device_node *np; + + /* If IOMMU is disabled or we have little enough RAM to not need + * to enable it, we setup a direct mapping. + * + * Note: should we make sure we have the IOMMU actually disabled ? + */ + if (iommu_is_off || + (!iommu_force_on && memblock_end_of_DRAM() <= 0x80000000ull)) + if (cell_iommu_init_disabled() == 0) + goto bail; + + /* Setup various callbacks */ + cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup; + ppc_md.dma_get_required_mask = cell_dma_get_required_mask; + ppc_md.tce_build = tce_build_cell; + ppc_md.tce_free = tce_free_cell; + + if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0) + goto bail; + + /* Create an iommu for each /axon node. */ + for_each_node_by_name(np, "axon") { + if (np->parent == NULL || np->parent->parent != NULL) + continue; + cell_iommu_init_one(np, 0); + } + + /* Create an iommu for each toplevel /pci-internal node for + * old hardware/firmware + */ + for_each_node_by_name(np, "pci-internal") { + if (np->parent == NULL || np->parent->parent != NULL) + continue; + cell_iommu_init_one(np, SPIDER_DMA_OFFSET); + } + + /* Setup default PCI iommu ops */ + set_pci_dma_ops(&dma_iommu_ops); + + bail: + /* Register callbacks on OF platform device addition/removal + * to handle linking them to the right DMA operations + */ + bus_register_notifier(&platform_bus_type, &cell_of_bus_notifier); + + return 0; +} +machine_arch_initcall(cell, cell_iommu_init); diff --git a/kernel/arch/powerpc/platforms/cell/pervasive.c b/kernel/arch/powerpc/platforms/cell/pervasive.c new file mode 100644 index 000000000..d17e98bc0 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/pervasive.c @@ -0,0 +1,133 @@ +/* + * CBE Pervasive Monitor and Debug + * + * (C) Copyright IBM Corporation 2005 + * + * Authors: Maximino Aguilar (maguilar@us.ibm.com) + * Michael N. Day (mnday@us.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#undef DEBUG + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/percpu.h> +#include <linux/types.h> +#include <linux/kallsyms.h> + +#include <asm/io.h> +#include <asm/machdep.h> +#include <asm/prom.h> +#include <asm/pgtable.h> +#include <asm/reg.h> +#include <asm/cell-regs.h> + +#include "pervasive.h" + +static void cbe_power_save(void) +{ + unsigned long ctrl, thread_switch_control; + + /* Ensure our interrupt state is properly tracked */ + if (!prep_irq_for_idle()) + return; + + ctrl = mfspr(SPRN_CTRLF); + + /* Enable DEC and EE interrupt request */ + thread_switch_control = mfspr(SPRN_TSC_CELL); + thread_switch_control |= TSC_CELL_EE_ENABLE | TSC_CELL_EE_BOOST; + + switch (ctrl & CTRL_CT) { + case CTRL_CT0: + thread_switch_control |= TSC_CELL_DEC_ENABLE_0; + break; + case CTRL_CT1: + thread_switch_control |= TSC_CELL_DEC_ENABLE_1; + break; + default: + printk(KERN_WARNING "%s: unknown configuration\n", + __func__); + break; + } + mtspr(SPRN_TSC_CELL, thread_switch_control); + + /* + * go into low thread priority, medium priority will be + * restored for us after wake-up. + */ + HMT_low(); + + /* + * atomically disable thread execution and runlatch. + * External and Decrementer exceptions are still handled when the + * thread is disabled but now enter in cbe_system_reset_exception() + */ + ctrl &= ~(CTRL_RUNLATCH | CTRL_TE); + mtspr(SPRN_CTRLT, ctrl); + + /* Re-enable interrupts in MSR */ + __hard_irq_enable(); +} + +static int cbe_system_reset_exception(struct pt_regs *regs) +{ + switch (regs->msr & SRR1_WAKEMASK) { + case SRR1_WAKEEE: + do_IRQ(regs); + break; + case SRR1_WAKEDEC: + timer_interrupt(regs); + break; + case SRR1_WAKEMT: + return cbe_sysreset_hack(); +#ifdef CONFIG_CBE_RAS + case SRR1_WAKESYSERR: + cbe_system_error_exception(regs); + break; + case SRR1_WAKETHERM: + cbe_thermal_exception(regs); + break; +#endif /* CONFIG_CBE_RAS */ + default: + /* do system reset */ + return 0; + } + /* everything handled */ + return 1; +} + +void __init cbe_pervasive_init(void) +{ + int cpu; + + if (!cpu_has_feature(CPU_FTR_PAUSE_ZERO)) + return; + + for_each_possible_cpu(cpu) { + struct cbe_pmd_regs __iomem *regs = cbe_get_cpu_pmd_regs(cpu); + if (!regs) + continue; + + /* Enable Pause(0) control bit */ + out_be64(®s->pmcr, in_be64(®s->pmcr) | + CBE_PMD_PAUSE_ZERO_CONTROL); + } + + ppc_md.power_save = cbe_power_save; + ppc_md.system_reset_exception = cbe_system_reset_exception; +} diff --git a/kernel/arch/powerpc/platforms/cell/pervasive.h b/kernel/arch/powerpc/platforms/cell/pervasive.h new file mode 100644 index 000000000..fd4d7b709 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/pervasive.h @@ -0,0 +1,42 @@ +/* + * Cell Pervasive Monitor and Debug interface and HW structures + * + * (C) Copyright IBM Corporation 2005 + * + * Authors: Maximino Aguilar (maguilar@us.ibm.com) + * David J. Erb (djerb@us.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#ifndef PERVASIVE_H +#define PERVASIVE_H + +extern void cbe_pervasive_init(void); +extern void cbe_system_error_exception(struct pt_regs *regs); +extern void cbe_maintenance_exception(struct pt_regs *regs); +extern void cbe_thermal_exception(struct pt_regs *regs); + +#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON +extern int cbe_sysreset_hack(void); +#else +static inline int cbe_sysreset_hack(void) +{ + return 1; +} +#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */ + +#endif diff --git a/kernel/arch/powerpc/platforms/cell/pmu.c b/kernel/arch/powerpc/platforms/cell/pmu.c new file mode 100644 index 000000000..348a27b12 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/pmu.c @@ -0,0 +1,424 @@ +/* + * Cell Broadband Engine Performance Monitor + * + * (C) Copyright IBM Corporation 2001,2006 + * + * Author: + * David Erb (djerb@us.ibm.com) + * Kevin Corry (kevcorry@us.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/interrupt.h> +#include <linux/types.h> +#include <linux/export.h> +#include <asm/io.h> +#include <asm/irq_regs.h> +#include <asm/machdep.h> +#include <asm/pmc.h> +#include <asm/reg.h> +#include <asm/spu.h> +#include <asm/cell-regs.h> + +#include "interrupt.h" + +/* + * When writing to write-only mmio addresses, save a shadow copy. All of the + * registers are 32-bit, but stored in the upper-half of a 64-bit field in + * pmd_regs. + */ + +#define WRITE_WO_MMIO(reg, x) \ + do { \ + u32 _x = (x); \ + struct cbe_pmd_regs __iomem *pmd_regs; \ + struct cbe_pmd_shadow_regs *shadow_regs; \ + pmd_regs = cbe_get_cpu_pmd_regs(cpu); \ + shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); \ + out_be64(&(pmd_regs->reg), (((u64)_x) << 32)); \ + shadow_regs->reg = _x; \ + } while (0) + +#define READ_SHADOW_REG(val, reg) \ + do { \ + struct cbe_pmd_shadow_regs *shadow_regs; \ + shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); \ + (val) = shadow_regs->reg; \ + } while (0) + +#define READ_MMIO_UPPER32(val, reg) \ + do { \ + struct cbe_pmd_regs __iomem *pmd_regs; \ + pmd_regs = cbe_get_cpu_pmd_regs(cpu); \ + (val) = (u32)(in_be64(&pmd_regs->reg) >> 32); \ + } while (0) + +/* + * Physical counter registers. + * Each physical counter can act as one 32-bit counter or two 16-bit counters. + */ + +u32 cbe_read_phys_ctr(u32 cpu, u32 phys_ctr) +{ + u32 val_in_latch, val = 0; + + if (phys_ctr < NR_PHYS_CTRS) { + READ_SHADOW_REG(val_in_latch, counter_value_in_latch); + + /* Read the latch or the actual counter, whichever is newer. */ + if (val_in_latch & (1 << phys_ctr)) { + READ_SHADOW_REG(val, pm_ctr[phys_ctr]); + } else { + READ_MMIO_UPPER32(val, pm_ctr[phys_ctr]); + } + } + + return val; +} +EXPORT_SYMBOL_GPL(cbe_read_phys_ctr); + +void cbe_write_phys_ctr(u32 cpu, u32 phys_ctr, u32 val) +{ + struct cbe_pmd_shadow_regs *shadow_regs; + u32 pm_ctrl; + + if (phys_ctr < NR_PHYS_CTRS) { + /* Writing to a counter only writes to a hardware latch. + * The new value is not propagated to the actual counter + * until the performance monitor is enabled. + */ + WRITE_WO_MMIO(pm_ctr[phys_ctr], val); + + pm_ctrl = cbe_read_pm(cpu, pm_control); + if (pm_ctrl & CBE_PM_ENABLE_PERF_MON) { + /* The counters are already active, so we need to + * rewrite the pm_control register to "re-enable" + * the PMU. + */ + cbe_write_pm(cpu, pm_control, pm_ctrl); + } else { + shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); + shadow_regs->counter_value_in_latch |= (1 << phys_ctr); + } + } +} +EXPORT_SYMBOL_GPL(cbe_write_phys_ctr); + +/* + * "Logical" counter registers. + * These will read/write 16-bits or 32-bits depending on the + * current size of the counter. Counters 4 - 7 are always 16-bit. + */ + +u32 cbe_read_ctr(u32 cpu, u32 ctr) +{ + u32 val; + u32 phys_ctr = ctr & (NR_PHYS_CTRS - 1); + + val = cbe_read_phys_ctr(cpu, phys_ctr); + + if (cbe_get_ctr_size(cpu, phys_ctr) == 16) + val = (ctr < NR_PHYS_CTRS) ? (val >> 16) : (val & 0xffff); + + return val; +} +EXPORT_SYMBOL_GPL(cbe_read_ctr); + +void cbe_write_ctr(u32 cpu, u32 ctr, u32 val) +{ + u32 phys_ctr; + u32 phys_val; + + phys_ctr = ctr & (NR_PHYS_CTRS - 1); + + if (cbe_get_ctr_size(cpu, phys_ctr) == 16) { + phys_val = cbe_read_phys_ctr(cpu, phys_ctr); + + if (ctr < NR_PHYS_CTRS) + val = (val << 16) | (phys_val & 0xffff); + else + val = (val & 0xffff) | (phys_val & 0xffff0000); + } + + cbe_write_phys_ctr(cpu, phys_ctr, val); +} +EXPORT_SYMBOL_GPL(cbe_write_ctr); + +/* + * Counter-control registers. + * Each "logical" counter has a corresponding control register. + */ + +u32 cbe_read_pm07_control(u32 cpu, u32 ctr) +{ + u32 pm07_control = 0; + + if (ctr < NR_CTRS) + READ_SHADOW_REG(pm07_control, pm07_control[ctr]); + + return pm07_control; +} +EXPORT_SYMBOL_GPL(cbe_read_pm07_control); + +void cbe_write_pm07_control(u32 cpu, u32 ctr, u32 val) +{ + if (ctr < NR_CTRS) + WRITE_WO_MMIO(pm07_control[ctr], val); +} +EXPORT_SYMBOL_GPL(cbe_write_pm07_control); + +/* + * Other PMU control registers. Most of these are write-only. + */ + +u32 cbe_read_pm(u32 cpu, enum pm_reg_name reg) +{ + u32 val = 0; + + switch (reg) { + case group_control: + READ_SHADOW_REG(val, group_control); + break; + + case debug_bus_control: + READ_SHADOW_REG(val, debug_bus_control); + break; + + case trace_address: + READ_MMIO_UPPER32(val, trace_address); + break; + + case ext_tr_timer: + READ_SHADOW_REG(val, ext_tr_timer); + break; + + case pm_status: + READ_MMIO_UPPER32(val, pm_status); + break; + + case pm_control: + READ_SHADOW_REG(val, pm_control); + break; + + case pm_interval: + READ_MMIO_UPPER32(val, pm_interval); + break; + + case pm_start_stop: + READ_SHADOW_REG(val, pm_start_stop); + break; + } + + return val; +} +EXPORT_SYMBOL_GPL(cbe_read_pm); + +void cbe_write_pm(u32 cpu, enum pm_reg_name reg, u32 val) +{ + switch (reg) { + case group_control: + WRITE_WO_MMIO(group_control, val); + break; + + case debug_bus_control: + WRITE_WO_MMIO(debug_bus_control, val); + break; + + case trace_address: + WRITE_WO_MMIO(trace_address, val); + break; + + case ext_tr_timer: + WRITE_WO_MMIO(ext_tr_timer, val); + break; + + case pm_status: + WRITE_WO_MMIO(pm_status, val); + break; + + case pm_control: + WRITE_WO_MMIO(pm_control, val); + break; + + case pm_interval: + WRITE_WO_MMIO(pm_interval, val); + break; + + case pm_start_stop: + WRITE_WO_MMIO(pm_start_stop, val); + break; + } +} +EXPORT_SYMBOL_GPL(cbe_write_pm); + +/* + * Get/set the size of a physical counter to either 16 or 32 bits. + */ + +u32 cbe_get_ctr_size(u32 cpu, u32 phys_ctr) +{ + u32 pm_ctrl, size = 0; + + if (phys_ctr < NR_PHYS_CTRS) { + pm_ctrl = cbe_read_pm(cpu, pm_control); + size = (pm_ctrl & CBE_PM_16BIT_CTR(phys_ctr)) ? 16 : 32; + } + + return size; +} +EXPORT_SYMBOL_GPL(cbe_get_ctr_size); + +void cbe_set_ctr_size(u32 cpu, u32 phys_ctr, u32 ctr_size) +{ + u32 pm_ctrl; + + if (phys_ctr < NR_PHYS_CTRS) { + pm_ctrl = cbe_read_pm(cpu, pm_control); + switch (ctr_size) { + case 16: + pm_ctrl |= CBE_PM_16BIT_CTR(phys_ctr); + break; + + case 32: + pm_ctrl &= ~CBE_PM_16BIT_CTR(phys_ctr); + break; + } + cbe_write_pm(cpu, pm_control, pm_ctrl); + } +} +EXPORT_SYMBOL_GPL(cbe_set_ctr_size); + +/* + * Enable/disable the entire performance monitoring unit. + * When we enable the PMU, all pending writes to counters get committed. + */ + +void cbe_enable_pm(u32 cpu) +{ + struct cbe_pmd_shadow_regs *shadow_regs; + u32 pm_ctrl; + + shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); + shadow_regs->counter_value_in_latch = 0; + + pm_ctrl = cbe_read_pm(cpu, pm_control) | CBE_PM_ENABLE_PERF_MON; + cbe_write_pm(cpu, pm_control, pm_ctrl); +} +EXPORT_SYMBOL_GPL(cbe_enable_pm); + +void cbe_disable_pm(u32 cpu) +{ + u32 pm_ctrl; + pm_ctrl = cbe_read_pm(cpu, pm_control) & ~CBE_PM_ENABLE_PERF_MON; + cbe_write_pm(cpu, pm_control, pm_ctrl); +} +EXPORT_SYMBOL_GPL(cbe_disable_pm); + +/* + * Reading from the trace_buffer. + * The trace buffer is two 64-bit registers. Reading from + * the second half automatically increments the trace_address. + */ + +void cbe_read_trace_buffer(u32 cpu, u64 *buf) +{ + struct cbe_pmd_regs __iomem *pmd_regs = cbe_get_cpu_pmd_regs(cpu); + + *buf++ = in_be64(&pmd_regs->trace_buffer_0_63); + *buf++ = in_be64(&pmd_regs->trace_buffer_64_127); +} +EXPORT_SYMBOL_GPL(cbe_read_trace_buffer); + +/* + * Enabling/disabling interrupts for the entire performance monitoring unit. + */ + +u32 cbe_get_and_clear_pm_interrupts(u32 cpu) +{ + /* Reading pm_status clears the interrupt bits. */ + return cbe_read_pm(cpu, pm_status); +} +EXPORT_SYMBOL_GPL(cbe_get_and_clear_pm_interrupts); + +void cbe_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask) +{ + /* Set which node and thread will handle the next interrupt. */ + iic_set_interrupt_routing(cpu, thread, 0); + + /* Enable the interrupt bits in the pm_status register. */ + if (mask) + cbe_write_pm(cpu, pm_status, mask); +} +EXPORT_SYMBOL_GPL(cbe_enable_pm_interrupts); + +void cbe_disable_pm_interrupts(u32 cpu) +{ + cbe_get_and_clear_pm_interrupts(cpu); + cbe_write_pm(cpu, pm_status, 0); +} +EXPORT_SYMBOL_GPL(cbe_disable_pm_interrupts); + +static irqreturn_t cbe_pm_irq(int irq, void *dev_id) +{ + perf_irq(get_irq_regs()); + return IRQ_HANDLED; +} + +static int __init cbe_init_pm_irq(void) +{ + unsigned int irq; + int rc, node; + + for_each_online_node(node) { + irq = irq_create_mapping(NULL, IIC_IRQ_IOEX_PMI | + (node << IIC_IRQ_NODE_SHIFT)); + if (irq == NO_IRQ) { + printk("ERROR: Unable to allocate irq for node %d\n", + node); + return -EINVAL; + } + + rc = request_irq(irq, cbe_pm_irq, + 0, "cbe-pmu-0", NULL); + if (rc) { + printk("ERROR: Request for irq on node %d failed\n", + node); + return rc; + } + } + + return 0; +} +machine_arch_initcall(cell, cbe_init_pm_irq); + +void cbe_sync_irq(int node) +{ + unsigned int irq; + + irq = irq_find_mapping(NULL, + IIC_IRQ_IOEX_PMI + | (node << IIC_IRQ_NODE_SHIFT)); + + if (irq == NO_IRQ) { + printk(KERN_WARNING "ERROR, unable to get existing irq %d " \ + "for node %d\n", irq, node); + return; + } + + synchronize_irq(irq); +} +EXPORT_SYMBOL_GPL(cbe_sync_irq); + diff --git a/kernel/arch/powerpc/platforms/cell/qpace_setup.c b/kernel/arch/powerpc/platforms/cell/qpace_setup.c new file mode 100644 index 000000000..d328140dc --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/qpace_setup.c @@ -0,0 +1,148 @@ +/* + * linux/arch/powerpc/platforms/cell/qpace_setup.c + * + * Copyright (C) 1995 Linus Torvalds + * Adapted from 'alpha' version by Gary Thomas + * Modified by Cort Dougan (cort@cs.nmt.edu) + * Modified by PPC64 Team, IBM Corp + * Modified by Cell Team, IBM Deutschland Entwicklung GmbH + * Modified by Benjamin Krill <ben@codiert.org>, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/export.h> +#include <linux/delay.h> +#include <linux/irq.h> +#include <linux/console.h> +#include <linux/of_platform.h> + +#include <asm/mmu.h> +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/kexec.h> +#include <asm/pgtable.h> +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/dma.h> +#include <asm/machdep.h> +#include <asm/time.h> +#include <asm/cputable.h> +#include <asm/irq.h> +#include <asm/spu.h> +#include <asm/spu_priv1.h> +#include <asm/udbg.h> +#include <asm/cell-regs.h> + +#include "interrupt.h" +#include "pervasive.h" +#include "ras.h" + +static void qpace_show_cpuinfo(struct seq_file *m) +{ + struct device_node *root; + const char *model = ""; + + root = of_find_node_by_path("/"); + if (root) + model = of_get_property(root, "model", NULL); + seq_printf(m, "machine\t\t: CHRP %s\n", model); + of_node_put(root); +} + +static void qpace_progress(char *s, unsigned short hex) +{ + printk("*** %04x : %s\n", hex, s ? s : ""); +} + +static const struct of_device_id qpace_bus_ids[] __initconst = { + { .type = "soc", }, + { .compatible = "soc", }, + { .type = "spider", }, + { .type = "axon", }, + { .type = "plb5", }, + { .type = "plb4", }, + { .type = "opb", }, + { .type = "ebc", }, + {}, +}; + +static int __init qpace_publish_devices(void) +{ + int node; + + /* Publish OF platform devices for southbridge IOs */ + of_platform_bus_probe(NULL, qpace_bus_ids, NULL); + + /* There is no device for the MIC memory controller, thus we create + * a platform device for it to attach the EDAC driver to. + */ + for_each_online_node(node) { + if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL) + continue; + platform_device_register_simple("cbe-mic", node, NULL, 0); + } + + return 0; +} +machine_subsys_initcall(qpace, qpace_publish_devices); + +static void __init qpace_setup_arch(void) +{ +#ifdef CONFIG_SPU_BASE + spu_priv1_ops = &spu_priv1_mmio_ops; + spu_management_ops = &spu_management_of_ops; +#endif + + cbe_regs_init(); + +#ifdef CONFIG_CBE_RAS + cbe_ras_init(); +#endif + +#ifdef CONFIG_SMP + smp_init_cell(); +#endif + + /* init to some ~sane value until calibrate_delay() runs */ + loops_per_jiffy = 50000000; + + cbe_pervasive_init(); +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif +} + +static int __init qpace_probe(void) +{ + unsigned long root = of_get_flat_dt_root(); + + if (!of_flat_dt_is_compatible(root, "IBM,QPACE")) + return 0; + + hpte_init_native(); + pm_power_off = rtas_power_off; + + return 1; +} + +define_machine(qpace) { + .name = "QPACE", + .probe = qpace_probe, + .setup_arch = qpace_setup_arch, + .show_cpuinfo = qpace_show_cpuinfo, + .restart = rtas_restart, + .halt = rtas_halt, + .get_boot_time = rtas_get_boot_time, + .get_rtc_time = rtas_get_rtc_time, + .set_rtc_time = rtas_set_rtc_time, + .calibrate_decr = generic_calibrate_decr, + .progress = qpace_progress, + .init_IRQ = iic_init_IRQ, +}; diff --git a/kernel/arch/powerpc/platforms/cell/ras.c b/kernel/arch/powerpc/platforms/cell/ras.c new file mode 100644 index 000000000..e865d7481 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/ras.c @@ -0,0 +1,356 @@ +/* + * Copyright 2006-2008, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/reboot.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> + +#include <asm/kexec.h> +#include <asm/reg.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/rtas.h> +#include <asm/cell-regs.h> + +#include "ras.h" + + +static void dump_fir(int cpu) +{ + struct cbe_pmd_regs __iomem *pregs = cbe_get_cpu_pmd_regs(cpu); + struct cbe_iic_regs __iomem *iregs = cbe_get_cpu_iic_regs(cpu); + + if (pregs == NULL) + return; + + /* Todo: do some nicer parsing of bits and based on them go down + * to other sub-units FIRs and not only IIC + */ + printk(KERN_ERR "Global Checkstop FIR : 0x%016llx\n", + in_be64(&pregs->checkstop_fir)); + printk(KERN_ERR "Global Recoverable FIR : 0x%016llx\n", + in_be64(&pregs->checkstop_fir)); + printk(KERN_ERR "Global MachineCheck FIR : 0x%016llx\n", + in_be64(&pregs->spec_att_mchk_fir)); + + if (iregs == NULL) + return; + printk(KERN_ERR "IOC FIR : 0x%016llx\n", + in_be64(&iregs->ioc_fir)); + +} + +void cbe_system_error_exception(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + printk(KERN_ERR "System Error Interrupt on CPU %d !\n", cpu); + dump_fir(cpu); + dump_stack(); +} + +void cbe_maintenance_exception(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + /* + * Nothing implemented for the maintenance interrupt at this point + */ + + printk(KERN_ERR "Unhandled Maintenance interrupt on CPU %d !\n", cpu); + dump_stack(); +} + +void cbe_thermal_exception(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + /* + * Nothing implemented for the thermal interrupt at this point + */ + + printk(KERN_ERR "Unhandled Thermal interrupt on CPU %d !\n", cpu); + dump_stack(); +} + +static int cbe_machine_check_handler(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + printk(KERN_ERR "Machine Check Interrupt on CPU %d !\n", cpu); + dump_fir(cpu); + + /* No recovery from this code now, lets continue */ + return 0; +} + +struct ptcal_area { + struct list_head list; + int nid; + int order; + struct page *pages; +}; + +static LIST_HEAD(ptcal_list); + +static int ptcal_start_tok, ptcal_stop_tok; + +static int __init cbe_ptcal_enable_on_node(int nid, int order) +{ + struct ptcal_area *area; + int ret = -ENOMEM; + unsigned long addr; + + if (is_kdump_kernel()) + rtas_call(ptcal_stop_tok, 1, 1, NULL, nid); + + area = kmalloc(sizeof(*area), GFP_KERNEL); + if (!area) + goto out_err; + + area->nid = nid; + area->order = order; + area->pages = alloc_pages_exact_node(area->nid, + GFP_KERNEL|__GFP_THISNODE, + area->order); + + if (!area->pages) { + printk(KERN_WARNING "%s: no page on node %d\n", + __func__, area->nid); + goto out_free_area; + } + + /* + * We move the ptcal area to the middle of the allocated + * page, in order to avoid prefetches in memcpy and similar + * functions stepping on it. + */ + addr = __pa(page_address(area->pages)) + (PAGE_SIZE >> 1); + printk(KERN_DEBUG "%s: enabling PTCAL on node %d address=0x%016lx\n", + __func__, area->nid, addr); + + ret = -EIO; + if (rtas_call(ptcal_start_tok, 3, 1, NULL, area->nid, + (unsigned int)(addr >> 32), + (unsigned int)(addr & 0xffffffff))) { + printk(KERN_ERR "%s: error enabling PTCAL on node %d!\n", + __func__, nid); + goto out_free_pages; + } + + list_add(&area->list, &ptcal_list); + + return 0; + +out_free_pages: + __free_pages(area->pages, area->order); +out_free_area: + kfree(area); +out_err: + return ret; +} + +static int __init cbe_ptcal_enable(void) +{ + const u32 *size; + struct device_node *np; + int order, found_mic = 0; + + np = of_find_node_by_path("/rtas"); + if (!np) + return -ENODEV; + + size = of_get_property(np, "ibm,cbe-ptcal-size", NULL); + if (!size) { + of_node_put(np); + return -ENODEV; + } + + pr_debug("%s: enabling PTCAL, size = 0x%x\n", __func__, *size); + order = get_order(*size); + of_node_put(np); + + /* support for malta device trees, with be@/mic@ nodes */ + for_each_node_by_type(np, "mic-tm") { + cbe_ptcal_enable_on_node(of_node_to_nid(np), order); + found_mic = 1; + } + + if (found_mic) + return 0; + + /* support for older device tree - use cpu nodes */ + for_each_node_by_type(np, "cpu") { + const u32 *nid = of_get_property(np, "node-id", NULL); + if (!nid) { + printk(KERN_ERR "%s: node %s is missing node-id?\n", + __func__, np->full_name); + continue; + } + cbe_ptcal_enable_on_node(*nid, order); + found_mic = 1; + } + + return found_mic ? 0 : -ENODEV; +} + +static int cbe_ptcal_disable(void) +{ + struct ptcal_area *area, *tmp; + int ret = 0; + + pr_debug("%s: disabling PTCAL\n", __func__); + + list_for_each_entry_safe(area, tmp, &ptcal_list, list) { + /* disable ptcal on this node */ + if (rtas_call(ptcal_stop_tok, 1, 1, NULL, area->nid)) { + printk(KERN_ERR "%s: error disabling PTCAL " + "on node %d!\n", __func__, + area->nid); + ret = -EIO; + continue; + } + + /* ensure we can access the PTCAL area */ + memset(page_address(area->pages), 0, + 1 << (area->order + PAGE_SHIFT)); + + /* clean up */ + list_del(&area->list); + __free_pages(area->pages, area->order); + kfree(area); + } + + return ret; +} + +static int cbe_ptcal_notify_reboot(struct notifier_block *nb, + unsigned long code, void *data) +{ + return cbe_ptcal_disable(); +} + +static void cbe_ptcal_crash_shutdown(void) +{ + cbe_ptcal_disable(); +} + +static struct notifier_block cbe_ptcal_reboot_notifier = { + .notifier_call = cbe_ptcal_notify_reboot +}; + +#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON +static int sysreset_hack; + +static int __init cbe_sysreset_init(void) +{ + struct cbe_pmd_regs __iomem *regs; + + sysreset_hack = of_machine_is_compatible("IBM,CBPLUS-1.0"); + if (!sysreset_hack) + return 0; + + regs = cbe_get_cpu_pmd_regs(0); + if (!regs) + return 0; + + /* Enable JTAG system-reset hack */ + out_be32(®s->fir_mode_reg, + in_be32(®s->fir_mode_reg) | + CBE_PMD_FIR_MODE_M8); + + return 0; +} +device_initcall(cbe_sysreset_init); + +int cbe_sysreset_hack(void) +{ + struct cbe_pmd_regs __iomem *regs; + + /* + * The BMC can inject user triggered system reset exceptions, + * but cannot set the system reset reason in srr1, + * so check an extra register here. + */ + if (sysreset_hack && (smp_processor_id() == 0)) { + regs = cbe_get_cpu_pmd_regs(0); + if (!regs) + return 0; + if (in_be64(®s->ras_esc_0) & 0x0000ffff) { + out_be64(®s->ras_esc_0, 0); + return 0; + } + } + return 1; +} +#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */ + +int __init cbe_ptcal_init(void) +{ + int ret; + ptcal_start_tok = rtas_token("ibm,cbe-start-ptcal"); + ptcal_stop_tok = rtas_token("ibm,cbe-stop-ptcal"); + + if (ptcal_start_tok == RTAS_UNKNOWN_SERVICE + || ptcal_stop_tok == RTAS_UNKNOWN_SERVICE) + return -ENODEV; + + ret = register_reboot_notifier(&cbe_ptcal_reboot_notifier); + if (ret) + goto out1; + + ret = crash_shutdown_register(&cbe_ptcal_crash_shutdown); + if (ret) + goto out2; + + return cbe_ptcal_enable(); + +out2: + unregister_reboot_notifier(&cbe_ptcal_reboot_notifier); +out1: + printk(KERN_ERR "Can't disable PTCAL, so not enabling\n"); + return ret; +} + +arch_initcall(cbe_ptcal_init); + +void __init cbe_ras_init(void) +{ + unsigned long hid0; + + /* + * Enable System Error & thermal interrupts and wakeup conditions + */ + + hid0 = mfspr(SPRN_HID0); + hid0 |= HID0_CBE_THERM_INT_EN | HID0_CBE_THERM_WAKEUP | + HID0_CBE_SYSERR_INT_EN | HID0_CBE_SYSERR_WAKEUP; + mtspr(SPRN_HID0, hid0); + mb(); + + /* + * Install machine check handler. Leave setting of precise mode to + * what the firmware did for now + */ + ppc_md.machine_check_exception = cbe_machine_check_handler; + mb(); + + /* + * For now, we assume that IOC_FIR is already set to forward some + * error conditions to the System Error handler. If that is not true + * then it will have to be fixed up here. + */ +} diff --git a/kernel/arch/powerpc/platforms/cell/ras.h b/kernel/arch/powerpc/platforms/cell/ras.h new file mode 100644 index 000000000..eb7ee54c8 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/ras.h @@ -0,0 +1,9 @@ +#ifndef RAS_H +#define RAS_H + +extern void cbe_system_error_exception(struct pt_regs *regs); +extern void cbe_maintenance_exception(struct pt_regs *regs); +extern void cbe_thermal_exception(struct pt_regs *regs); +extern void cbe_ras_init(void); + +#endif /* RAS_H */ diff --git a/kernel/arch/powerpc/platforms/cell/setup.c b/kernel/arch/powerpc/platforms/cell/setup.c new file mode 100644 index 000000000..36cff28d0 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/setup.c @@ -0,0 +1,286 @@ +/* + * linux/arch/powerpc/platforms/cell/cell_setup.c + * + * Copyright (C) 1995 Linus Torvalds + * Adapted from 'alpha' version by Gary Thomas + * Modified by Cort Dougan (cort@cs.nmt.edu) + * Modified by PPC64 Team, IBM Corp + * Modified by Cell Team, IBM Deutschland Entwicklung GmbH + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#undef DEBUG + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/export.h> +#include <linux/unistd.h> +#include <linux/user.h> +#include <linux/reboot.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/irq.h> +#include <linux/seq_file.h> +#include <linux/root_dev.h> +#include <linux/console.h> +#include <linux/mutex.h> +#include <linux/memory_hotplug.h> +#include <linux/of_platform.h> + +#include <asm/mmu.h> +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/pci-bridge.h> +#include <asm/iommu.h> +#include <asm/dma.h> +#include <asm/machdep.h> +#include <asm/time.h> +#include <asm/nvram.h> +#include <asm/cputable.h> +#include <asm/ppc-pci.h> +#include <asm/irq.h> +#include <asm/spu.h> +#include <asm/spu_priv1.h> +#include <asm/udbg.h> +#include <asm/mpic.h> +#include <asm/cell-regs.h> +#include <asm/io-workarounds.h> + +#include "cell.h" +#include "interrupt.h" +#include "pervasive.h" +#include "ras.h" + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +static void cell_show_cpuinfo(struct seq_file *m) +{ + struct device_node *root; + const char *model = ""; + + root = of_find_node_by_path("/"); + if (root) + model = of_get_property(root, "model", NULL); + seq_printf(m, "machine\t\t: CHRP %s\n", model); + of_node_put(root); +} + +static void cell_progress(char *s, unsigned short hex) +{ + printk("*** %04x : %s\n", hex, s ? s : ""); +} + +static void cell_fixup_pcie_rootcomplex(struct pci_dev *dev) +{ + struct pci_controller *hose; + const char *s; + int i; + + if (!machine_is(cell)) + return; + + /* We're searching for a direct child of the PHB */ + if (dev->bus->self != NULL || dev->devfn != 0) + return; + + hose = pci_bus_to_host(dev->bus); + if (hose == NULL) + return; + + /* Only on PCIE */ + if (!of_device_is_compatible(hose->dn, "pciex")) + return; + + /* And only on axon */ + s = of_get_property(hose->dn, "model", NULL); + if (!s || strcmp(s, "Axon") != 0) + return; + + for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) { + dev->resource[i].start = dev->resource[i].end = 0; + dev->resource[i].flags = 0; + } + + printk(KERN_DEBUG "PCI: Hiding resources on Axon PCIE RC %s\n", + pci_name(dev)); +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, cell_fixup_pcie_rootcomplex); + +static int cell_setup_phb(struct pci_controller *phb) +{ + const char *model; + struct device_node *np; + + int rc = rtas_setup_phb(phb); + if (rc) + return rc; + + phb->controller_ops = cell_pci_controller_ops; + + np = phb->dn; + model = of_get_property(np, "model", NULL); + if (model == NULL || strcmp(np->name, "pci")) + return 0; + + /* Setup workarounds for spider */ + if (strcmp(model, "Spider")) + return 0; + + iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init, + (void *)SPIDER_PCI_REG_BASE); + return 0; +} + +static const struct of_device_id cell_bus_ids[] __initconst = { + { .type = "soc", }, + { .compatible = "soc", }, + { .type = "spider", }, + { .type = "axon", }, + { .type = "plb5", }, + { .type = "plb4", }, + { .type = "opb", }, + { .type = "ebc", }, + {}, +}; + +static int __init cell_publish_devices(void) +{ + struct device_node *root = of_find_node_by_path("/"); + struct device_node *np; + int node; + + /* Publish OF platform devices for southbridge IOs */ + of_platform_bus_probe(NULL, cell_bus_ids, NULL); + + /* On spider based blades, we need to manually create the OF + * platform devices for the PCI host bridges + */ + for_each_child_of_node(root, np) { + if (np->type == NULL || (strcmp(np->type, "pci") != 0 && + strcmp(np->type, "pciex") != 0)) + continue; + of_platform_device_create(np, NULL, NULL); + } + + /* There is no device for the MIC memory controller, thus we create + * a platform device for it to attach the EDAC driver to. + */ + for_each_online_node(node) { + if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL) + continue; + platform_device_register_simple("cbe-mic", node, NULL, 0); + } + + return 0; +} +machine_subsys_initcall(cell, cell_publish_devices); + +static void __init mpic_init_IRQ(void) +{ + struct device_node *dn; + struct mpic *mpic; + + for (dn = NULL; + (dn = of_find_node_by_name(dn, "interrupt-controller"));) { + if (!of_device_is_compatible(dn, "CBEA,platform-open-pic")) + continue; + + /* The MPIC driver will get everything it needs from the + * device-tree, just pass 0 to all arguments + */ + mpic = mpic_alloc(dn, 0, MPIC_SECONDARY | MPIC_NO_RESET, + 0, 0, " MPIC "); + if (mpic == NULL) + continue; + mpic_init(mpic); + } +} + + +static void __init cell_init_irq(void) +{ + iic_init_IRQ(); + spider_init_IRQ(); + mpic_init_IRQ(); +} + +static void __init cell_set_dabrx(void) +{ + mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER); +} + +static void __init cell_setup_arch(void) +{ +#ifdef CONFIG_SPU_BASE + spu_priv1_ops = &spu_priv1_mmio_ops; + spu_management_ops = &spu_management_of_ops; +#endif + + cbe_regs_init(); + + cell_set_dabrx(); + +#ifdef CONFIG_CBE_RAS + cbe_ras_init(); +#endif + +#ifdef CONFIG_SMP + smp_init_cell(); +#endif + /* init to some ~sane value until calibrate_delay() runs */ + loops_per_jiffy = 50000000; + + /* Find and initialize PCI host bridges */ + init_pci_config_tokens(); + + cbe_pervasive_init(); +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + + mmio_nvram_init(); +} + +static int __init cell_probe(void) +{ + unsigned long root = of_get_flat_dt_root(); + + if (!of_flat_dt_is_compatible(root, "IBM,CBEA") && + !of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) + return 0; + + hpte_init_native(); + pm_power_off = rtas_power_off; + + return 1; +} + +define_machine(cell) { + .name = "Cell", + .probe = cell_probe, + .setup_arch = cell_setup_arch, + .show_cpuinfo = cell_show_cpuinfo, + .restart = rtas_restart, + .halt = rtas_halt, + .get_boot_time = rtas_get_boot_time, + .get_rtc_time = rtas_get_rtc_time, + .set_rtc_time = rtas_set_rtc_time, + .calibrate_decr = generic_calibrate_decr, + .progress = cell_progress, + .init_IRQ = cell_init_irq, + .pci_setup_phb = cell_setup_phb, +}; + +struct pci_controller_ops cell_pci_controller_ops; diff --git a/kernel/arch/powerpc/platforms/cell/smp.c b/kernel/arch/powerpc/platforms/cell/smp.c new file mode 100644 index 000000000..895560f4b --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/smp.c @@ -0,0 +1,168 @@ +/* + * SMP support for BPA machines. + * + * Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com + * + * Plus various changes from other IBM teams... + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/cache.h> +#include <linux/err.h> +#include <linux/device.h> +#include <linux/cpu.h> + +#include <asm/ptrace.h> +#include <linux/atomic.h> +#include <asm/irq.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/smp.h> +#include <asm/paca.h> +#include <asm/machdep.h> +#include <asm/cputable.h> +#include <asm/firmware.h> +#include <asm/rtas.h> +#include <asm/cputhreads.h> +#include <asm/code-patching.h> + +#include "interrupt.h" +#include <asm/udbg.h> + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +/* + * The Primary thread of each non-boot processor was started from the OF client + * interface by prom_hold_cpus and is spinning on secondary_hold_spinloop. + */ +static cpumask_t of_spin_map; + +/** + * smp_startup_cpu() - start the given cpu + * + * At boot time, there is nothing to do for primary threads which were + * started from Open Firmware. For anything else, call RTAS with the + * appropriate start location. + * + * Returns: + * 0 - failure + * 1 - success + */ +static inline int smp_startup_cpu(unsigned int lcpu) +{ + int status; + unsigned long start_here = + __pa(ppc_function_entry(generic_secondary_smp_init)); + unsigned int pcpu; + int start_cpu; + + if (cpumask_test_cpu(lcpu, &of_spin_map)) + /* Already started by OF and sitting in spin loop */ + return 1; + + pcpu = get_hard_smp_processor_id(lcpu); + + /* Fixup atomic count: it exited inside IRQ handler. */ + task_thread_info(paca[lcpu].__current)->preempt_count = 0; + + /* + * If the RTAS start-cpu token does not exist then presume the + * cpu is already spinning. + */ + start_cpu = rtas_token("start-cpu"); + if (start_cpu == RTAS_UNKNOWN_SERVICE) + return 1; + + status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, lcpu); + if (status != 0) { + printk(KERN_ERR "start-cpu failed: %i\n", status); + return 0; + } + + return 1; +} + +static void smp_cell_setup_cpu(int cpu) +{ + if (cpu != boot_cpuid) + iic_setup_cpu(); + + /* + * change default DABRX to allow user watchpoints + */ + mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER); +} + +static int smp_cell_kick_cpu(int nr) +{ + BUG_ON(nr < 0 || nr >= NR_CPUS); + + if (!smp_startup_cpu(nr)) + return -ENOENT; + + /* + * The processor is currently spinning, waiting for the + * cpu_start field to become non-zero After we set cpu_start, + * the processor will continue on to secondary_start + */ + paca[nr].cpu_start = 1; + + return 0; +} + +static struct smp_ops_t bpa_iic_smp_ops = { + .message_pass = iic_message_pass, + .probe = iic_request_IPIs, + .kick_cpu = smp_cell_kick_cpu, + .setup_cpu = smp_cell_setup_cpu, + .cpu_bootable = smp_generic_cpu_bootable, +}; + +/* This is called very early */ +void __init smp_init_cell(void) +{ + int i; + + DBG(" -> smp_init_cell()\n"); + + smp_ops = &bpa_iic_smp_ops; + + /* Mark threads which are still spinning in hold loops. */ + if (cpu_has_feature(CPU_FTR_SMT)) { + for_each_present_cpu(i) { + if (cpu_thread_in_core(i) == 0) + cpumask_set_cpu(i, &of_spin_map); + } + } else + cpumask_copy(&of_spin_map, cpu_present_mask); + + cpumask_clear_cpu(boot_cpuid, &of_spin_map); + + /* Non-lpar has additional take/give timebase */ + if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { + smp_ops->give_timebase = rtas_give_timebase; + smp_ops->take_timebase = rtas_take_timebase; + } + + DBG(" <- smp_init_cell()\n"); +} diff --git a/kernel/arch/powerpc/platforms/cell/spider-pci.c b/kernel/arch/powerpc/platforms/cell/spider-pci.c new file mode 100644 index 000000000..f1f787889 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spider-pci.c @@ -0,0 +1,184 @@ +/* + * IO workarounds for PCI on Celleb/Cell platform + * + * (C) Copyright 2006-2007 TOSHIBA CORPORATION + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/of_platform.h> +#include <linux/slab.h> +#include <linux/io.h> + +#include <asm/ppc-pci.h> +#include <asm/pci-bridge.h> +#include <asm/io-workarounds.h> + +#define SPIDER_PCI_DISABLE_PREFETCH + +struct spiderpci_iowa_private { + void __iomem *regs; +}; + +static void spiderpci_io_flush(struct iowa_bus *bus) +{ + struct spiderpci_iowa_private *priv; + u32 val; + + priv = bus->private; + val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ); + iosync(); +} + +#define SPIDER_PCI_MMIO_READ(name, ret) \ +static ret spiderpci_##name(const PCI_IO_ADDR addr) \ +{ \ + ret val = __do_##name(addr); \ + spiderpci_io_flush(iowa_mem_find_bus(addr)); \ + return val; \ +} + +#define SPIDER_PCI_MMIO_READ_STR(name) \ +static void spiderpci_##name(const PCI_IO_ADDR addr, void *buf, \ + unsigned long count) \ +{ \ + __do_##name(addr, buf, count); \ + spiderpci_io_flush(iowa_mem_find_bus(addr)); \ +} + +SPIDER_PCI_MMIO_READ(readb, u8) +SPIDER_PCI_MMIO_READ(readw, u16) +SPIDER_PCI_MMIO_READ(readl, u32) +SPIDER_PCI_MMIO_READ(readq, u64) +SPIDER_PCI_MMIO_READ(readw_be, u16) +SPIDER_PCI_MMIO_READ(readl_be, u32) +SPIDER_PCI_MMIO_READ(readq_be, u64) +SPIDER_PCI_MMIO_READ_STR(readsb) +SPIDER_PCI_MMIO_READ_STR(readsw) +SPIDER_PCI_MMIO_READ_STR(readsl) + +static void spiderpci_memcpy_fromio(void *dest, const PCI_IO_ADDR src, + unsigned long n) +{ + __do_memcpy_fromio(dest, src, n); + spiderpci_io_flush(iowa_mem_find_bus(src)); +} + +static int __init spiderpci_pci_setup_chip(struct pci_controller *phb, + void __iomem *regs) +{ + void *dummy_page_va; + dma_addr_t dummy_page_da; + +#ifdef SPIDER_PCI_DISABLE_PREFETCH + u32 val = in_be32(regs + SPIDER_PCI_VCI_CNTL_STAT); + pr_debug("SPIDER_IOWA:PVCI_Control_Status was 0x%08x\n", val); + out_be32(regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8); +#endif /* SPIDER_PCI_DISABLE_PREFETCH */ + + /* setup dummy read */ + /* + * On CellBlade, we can't know that which XDR memory is used by + * kmalloc() to allocate dummy_page_va. + * In order to imporve the performance, the XDR which is used to + * allocate dummy_page_va is the nearest the spider-pci. + * We have to select the CBE which is the nearest the spider-pci + * to allocate memory from the best XDR, but I don't know that + * how to do. + * + * Celleb does not have this problem, because it has only one XDR. + */ + dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!dummy_page_va) { + pr_err("SPIDERPCI-IOWA:Alloc dummy_page_va failed.\n"); + return -1; + } + + dummy_page_da = dma_map_single(phb->parent, dummy_page_va, + PAGE_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(phb->parent, dummy_page_da)) { + pr_err("SPIDER-IOWA:Map dummy page filed.\n"); + kfree(dummy_page_va); + return -1; + } + + out_be32(regs + SPIDER_PCI_DUMMY_READ_BASE, dummy_page_da); + + return 0; +} + +int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data) +{ + void __iomem *regs = NULL; + struct spiderpci_iowa_private *priv; + struct device_node *np = bus->phb->dn; + struct resource r; + unsigned long offset = (unsigned long)data; + + pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%s)\n", + np->full_name); + + priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL); + if (!priv) { + pr_err("SPIDERPCI-IOWA:" + "Can't allocate struct spiderpci_iowa_private"); + return -1; + } + + if (of_address_to_resource(np, 0, &r)) { + pr_err("SPIDERPCI-IOWA:Can't get resource.\n"); + goto error; + } + + regs = ioremap(r.start + offset, SPIDER_PCI_REG_SIZE); + if (!regs) { + pr_err("SPIDERPCI-IOWA:ioremap failed.\n"); + goto error; + } + priv->regs = regs; + bus->private = priv; + + if (spiderpci_pci_setup_chip(bus->phb, regs)) + goto error; + + return 0; + +error: + kfree(priv); + bus->private = NULL; + + if (regs) + iounmap(regs); + + return -1; +} + +struct ppc_pci_io spiderpci_ops = { + .readb = spiderpci_readb, + .readw = spiderpci_readw, + .readl = spiderpci_readl, + .readq = spiderpci_readq, + .readw_be = spiderpci_readw_be, + .readl_be = spiderpci_readl_be, + .readq_be = spiderpci_readq_be, + .readsb = spiderpci_readsb, + .readsw = spiderpci_readsw, + .readsl = spiderpci_readsl, + .memcpy_fromio = spiderpci_memcpy_fromio, +}; + diff --git a/kernel/arch/powerpc/platforms/cell/spider-pic.c b/kernel/arch/powerpc/platforms/cell/spider-pic.c new file mode 100644 index 000000000..1f72f4ab6 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spider-pic.c @@ -0,0 +1,359 @@ +/* + * External Interrupt Controller on Spider South Bridge + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/ioport.h> + +#include <asm/pgtable.h> +#include <asm/prom.h> +#include <asm/io.h> + +#include "interrupt.h" + +/* register layout taken from Spider spec, table 7.4-4 */ +enum { + TIR_DEN = 0x004, /* Detection Enable Register */ + TIR_MSK = 0x084, /* Mask Level Register */ + TIR_EDC = 0x0c0, /* Edge Detection Clear Register */ + TIR_PNDA = 0x100, /* Pending Register A */ + TIR_PNDB = 0x104, /* Pending Register B */ + TIR_CS = 0x144, /* Current Status Register */ + TIR_LCSA = 0x150, /* Level Current Status Register A */ + TIR_LCSB = 0x154, /* Level Current Status Register B */ + TIR_LCSC = 0x158, /* Level Current Status Register C */ + TIR_LCSD = 0x15c, /* Level Current Status Register D */ + TIR_CFGA = 0x200, /* Setting Register A0 */ + TIR_CFGB = 0x204, /* Setting Register B0 */ + /* 0x208 ... 0x3ff Setting Register An/Bn */ + TIR_PPNDA = 0x400, /* Packet Pending Register A */ + TIR_PPNDB = 0x404, /* Packet Pending Register B */ + TIR_PIERA = 0x408, /* Packet Output Error Register A */ + TIR_PIERB = 0x40c, /* Packet Output Error Register B */ + TIR_PIEN = 0x444, /* Packet Output Enable Register */ + TIR_PIPND = 0x454, /* Packet Output Pending Register */ + TIRDID = 0x484, /* Spider Device ID Register */ + REISTIM = 0x500, /* Reissue Command Timeout Time Setting */ + REISTIMEN = 0x504, /* Reissue Command Timeout Setting */ + REISWAITEN = 0x508, /* Reissue Wait Control*/ +}; + +#define SPIDER_CHIP_COUNT 4 +#define SPIDER_SRC_COUNT 64 +#define SPIDER_IRQ_INVALID 63 + +struct spider_pic { + struct irq_domain *host; + void __iomem *regs; + unsigned int node_id; +}; +static struct spider_pic spider_pics[SPIDER_CHIP_COUNT]; + +static struct spider_pic *spider_irq_data_to_pic(struct irq_data *d) +{ + return irq_data_get_irq_chip_data(d); +} + +static void __iomem *spider_get_irq_config(struct spider_pic *pic, + unsigned int src) +{ + return pic->regs + TIR_CFGA + 8 * src; +} + +static void spider_unmask_irq(struct irq_data *d) +{ + struct spider_pic *pic = spider_irq_data_to_pic(d); + void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d)); + + out_be32(cfg, in_be32(cfg) | 0x30000000u); +} + +static void spider_mask_irq(struct irq_data *d) +{ + struct spider_pic *pic = spider_irq_data_to_pic(d); + void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d)); + + out_be32(cfg, in_be32(cfg) & ~0x30000000u); +} + +static void spider_ack_irq(struct irq_data *d) +{ + struct spider_pic *pic = spider_irq_data_to_pic(d); + unsigned int src = irqd_to_hwirq(d); + + /* Reset edge detection logic if necessary + */ + if (irqd_is_level_type(d)) + return; + + /* Only interrupts 47 to 50 can be set to edge */ + if (src < 47 || src > 50) + return; + + /* Perform the clear of the edge logic */ + out_be32(pic->regs + TIR_EDC, 0x100 | (src & 0xf)); +} + +static int spider_set_irq_type(struct irq_data *d, unsigned int type) +{ + unsigned int sense = type & IRQ_TYPE_SENSE_MASK; + struct spider_pic *pic = spider_irq_data_to_pic(d); + unsigned int hw = irqd_to_hwirq(d); + void __iomem *cfg = spider_get_irq_config(pic, hw); + u32 old_mask; + u32 ic; + + /* Note that only level high is supported for most interrupts */ + if (sense != IRQ_TYPE_NONE && sense != IRQ_TYPE_LEVEL_HIGH && + (hw < 47 || hw > 50)) + return -EINVAL; + + /* Decode sense type */ + switch(sense) { + case IRQ_TYPE_EDGE_RISING: + ic = 0x3; + break; + case IRQ_TYPE_EDGE_FALLING: + ic = 0x2; + break; + case IRQ_TYPE_LEVEL_LOW: + ic = 0x0; + break; + case IRQ_TYPE_LEVEL_HIGH: + case IRQ_TYPE_NONE: + ic = 0x1; + break; + default: + return -EINVAL; + } + + /* Configure the source. One gross hack that was there before and + * that I've kept around is the priority to the BE which I set to + * be the same as the interrupt source number. I don't know whether + * that's supposed to make any kind of sense however, we'll have to + * decide that, but for now, I'm not changing the behaviour. + */ + old_mask = in_be32(cfg) & 0x30000000u; + out_be32(cfg, old_mask | (ic << 24) | (0x7 << 16) | + (pic->node_id << 4) | 0xe); + out_be32(cfg + 4, (0x2 << 16) | (hw & 0xff)); + + return 0; +} + +static struct irq_chip spider_pic = { + .name = "SPIDER", + .irq_unmask = spider_unmask_irq, + .irq_mask = spider_mask_irq, + .irq_ack = spider_ack_irq, + .irq_set_type = spider_set_irq_type, +}; + +static int spider_host_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + irq_set_chip_data(virq, h->host_data); + irq_set_chip_and_handler(virq, &spider_pic, handle_level_irq); + + /* Set default irq type */ + irq_set_irq_type(virq, IRQ_TYPE_NONE); + + return 0; +} + +static int spider_host_xlate(struct irq_domain *h, struct device_node *ct, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) + +{ + /* Spider interrupts have 2 cells, first is the interrupt source, + * second, well, I don't know for sure yet ... We mask the top bits + * because old device-trees encode a node number in there + */ + *out_hwirq = intspec[0] & 0x3f; + *out_flags = IRQ_TYPE_LEVEL_HIGH; + return 0; +} + +static const struct irq_domain_ops spider_host_ops = { + .map = spider_host_map, + .xlate = spider_host_xlate, +}; + +static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct spider_pic *pic = irq_desc_get_handler_data(desc); + unsigned int cs, virq; + + cs = in_be32(pic->regs + TIR_CS) >> 24; + if (cs == SPIDER_IRQ_INVALID) + virq = NO_IRQ; + else + virq = irq_linear_revmap(pic->host, cs); + + if (virq != NO_IRQ) + generic_handle_irq(virq); + + chip->irq_eoi(&desc->irq_data); +} + +/* For hooking up the cascace we have a problem. Our device-tree is + * crap and we don't know on which BE iic interrupt we are hooked on at + * least not the "standard" way. We can reconstitute it based on two + * informations though: which BE node we are connected to and whether + * we are connected to IOIF0 or IOIF1. Right now, we really only care + * about the IBM cell blade and we know that its firmware gives us an + * interrupt-map property which is pretty strange. + */ +static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic) +{ + unsigned int virq; + const u32 *imap, *tmp; + int imaplen, intsize, unit; + struct device_node *iic; + + /* First, we check whether we have a real "interrupts" in the device + * tree in case the device-tree is ever fixed + */ + virq = irq_of_parse_and_map(pic->host->of_node, 0); + if (virq) + return virq; + + /* Now do the horrible hacks */ + tmp = of_get_property(pic->host->of_node, "#interrupt-cells", NULL); + if (tmp == NULL) + return NO_IRQ; + intsize = *tmp; + imap = of_get_property(pic->host->of_node, "interrupt-map", &imaplen); + if (imap == NULL || imaplen < (intsize + 1)) + return NO_IRQ; + iic = of_find_node_by_phandle(imap[intsize]); + if (iic == NULL) + return NO_IRQ; + imap += intsize + 1; + tmp = of_get_property(iic, "#interrupt-cells", NULL); + if (tmp == NULL) { + of_node_put(iic); + return NO_IRQ; + } + intsize = *tmp; + /* Assume unit is last entry of interrupt specifier */ + unit = imap[intsize - 1]; + /* Ok, we have a unit, now let's try to get the node */ + tmp = of_get_property(iic, "ibm,interrupt-server-ranges", NULL); + if (tmp == NULL) { + of_node_put(iic); + return NO_IRQ; + } + /* ugly as hell but works for now */ + pic->node_id = (*tmp) >> 1; + of_node_put(iic); + + /* Ok, now let's get cracking. You may ask me why I just didn't match + * the iic host from the iic OF node, but that way I'm still compatible + * with really really old old firmwares for which we don't have a node + */ + /* Manufacture an IIC interrupt number of class 2 */ + virq = irq_create_mapping(NULL, + (pic->node_id << IIC_IRQ_NODE_SHIFT) | + (2 << IIC_IRQ_CLASS_SHIFT) | + unit); + if (virq == NO_IRQ) + printk(KERN_ERR "spider_pic: failed to map cascade !"); + return virq; +} + + +static void __init spider_init_one(struct device_node *of_node, int chip, + unsigned long addr) +{ + struct spider_pic *pic = &spider_pics[chip]; + int i, virq; + + /* Map registers */ + pic->regs = ioremap(addr, 0x1000); + if (pic->regs == NULL) + panic("spider_pic: can't map registers !"); + + /* Allocate a host */ + pic->host = irq_domain_add_linear(of_node, SPIDER_SRC_COUNT, + &spider_host_ops, pic); + if (pic->host == NULL) + panic("spider_pic: can't allocate irq host !"); + + /* Go through all sources and disable them */ + for (i = 0; i < SPIDER_SRC_COUNT; i++) { + void __iomem *cfg = pic->regs + TIR_CFGA + 8 * i; + out_be32(cfg, in_be32(cfg) & ~0x30000000u); + } + + /* do not mask any interrupts because of level */ + out_be32(pic->regs + TIR_MSK, 0x0); + + /* enable interrupt packets to be output */ + out_be32(pic->regs + TIR_PIEN, in_be32(pic->regs + TIR_PIEN) | 0x1); + + /* Hook up the cascade interrupt to the iic and nodeid */ + virq = spider_find_cascade_and_node(pic); + if (virq == NO_IRQ) + return; + irq_set_handler_data(virq, pic); + irq_set_chained_handler(virq, spider_irq_cascade); + + printk(KERN_INFO "spider_pic: node %d, addr: 0x%lx %s\n", + pic->node_id, addr, of_node->full_name); + + /* Enable the interrupt detection enable bit. Do this last! */ + out_be32(pic->regs + TIR_DEN, in_be32(pic->regs + TIR_DEN) | 0x1); +} + +void __init spider_init_IRQ(void) +{ + struct resource r; + struct device_node *dn; + int chip = 0; + + /* XXX node numbers are totally bogus. We _hope_ we get the device + * nodes in the right order here but that's definitely not guaranteed, + * we need to get the node from the device tree instead. + * There is currently no proper property for it (but our whole + * device-tree is bogus anyway) so all we can do is pray or maybe test + * the address and deduce the node-id + */ + for (dn = NULL; + (dn = of_find_node_by_name(dn, "interrupt-controller"));) { + if (of_device_is_compatible(dn, "CBEA,platform-spider-pic")) { + if (of_address_to_resource(dn, 0, &r)) { + printk(KERN_WARNING "spider-pic: Failed\n"); + continue; + } + } else if (of_device_is_compatible(dn, "sti,platform-spider-pic") + && (chip < 2)) { + static long hard_coded_pics[] = + { 0x24000008000ul, 0x34000008000ul}; + r.start = hard_coded_pics[chip]; + } else + continue; + spider_init_one(dn, chip++, r.start); + } +} diff --git a/kernel/arch/powerpc/platforms/cell/spu_base.c b/kernel/arch/powerpc/platforms/cell/spu_base.c new file mode 100644 index 000000000..f7af74f83 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spu_base.c @@ -0,0 +1,811 @@ +/* + * Low-level SPU handling + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#undef DEBUG + +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/wait.h> +#include <linux/mm.h> +#include <linux/io.h> +#include <linux/mutex.h> +#include <linux/linux_logo.h> +#include <linux/syscore_ops.h> +#include <asm/spu.h> +#include <asm/spu_priv1.h> +#include <asm/spu_csa.h> +#include <asm/xmon.h> +#include <asm/prom.h> +#include <asm/kexec.h> + +const struct spu_management_ops *spu_management_ops; +EXPORT_SYMBOL_GPL(spu_management_ops); + +const struct spu_priv1_ops *spu_priv1_ops; +EXPORT_SYMBOL_GPL(spu_priv1_ops); + +struct cbe_spu_info cbe_spu_info[MAX_NUMNODES]; +EXPORT_SYMBOL_GPL(cbe_spu_info); + +/* + * The spufs fault-handling code needs to call force_sig_info to raise signals + * on DMA errors. Export it here to avoid general kernel-wide access to this + * function + */ +EXPORT_SYMBOL_GPL(force_sig_info); + +/* + * Protects cbe_spu_info and spu->number. + */ +static DEFINE_SPINLOCK(spu_lock); + +/* + * List of all spus in the system. + * + * This list is iterated by callers from irq context and callers that + * want to sleep. Thus modifications need to be done with both + * spu_full_list_lock and spu_full_list_mutex held, while iterating + * through it requires either of these locks. + * + * In addition spu_full_list_lock protects all assignmens to + * spu->mm. + */ +static LIST_HEAD(spu_full_list); +static DEFINE_SPINLOCK(spu_full_list_lock); +static DEFINE_MUTEX(spu_full_list_mutex); + +void spu_invalidate_slbs(struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + unsigned long flags; + + spin_lock_irqsave(&spu->register_lock, flags); + if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK) + out_be64(&priv2->slb_invalidate_all_W, 0UL); + spin_unlock_irqrestore(&spu->register_lock, flags); +} +EXPORT_SYMBOL_GPL(spu_invalidate_slbs); + +/* This is called by the MM core when a segment size is changed, to + * request a flush of all the SPEs using a given mm + */ +void spu_flush_all_slbs(struct mm_struct *mm) +{ + struct spu *spu; + unsigned long flags; + + spin_lock_irqsave(&spu_full_list_lock, flags); + list_for_each_entry(spu, &spu_full_list, full_list) { + if (spu->mm == mm) + spu_invalidate_slbs(spu); + } + spin_unlock_irqrestore(&spu_full_list_lock, flags); +} + +/* The hack below stinks... try to do something better one of + * these days... Does it even work properly with NR_CPUS == 1 ? + */ +static inline void mm_needs_global_tlbie(struct mm_struct *mm) +{ + int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1; + + /* Global TLBIE broadcast required with SPEs. */ + bitmap_fill(cpumask_bits(mm_cpumask(mm)), nr); +} + +void spu_associate_mm(struct spu *spu, struct mm_struct *mm) +{ + unsigned long flags; + + spin_lock_irqsave(&spu_full_list_lock, flags); + spu->mm = mm; + spin_unlock_irqrestore(&spu_full_list_lock, flags); + if (mm) + mm_needs_global_tlbie(mm); +} +EXPORT_SYMBOL_GPL(spu_associate_mm); + +int spu_64k_pages_available(void) +{ + return mmu_psize_defs[MMU_PAGE_64K].shift != 0; +} +EXPORT_SYMBOL_GPL(spu_64k_pages_available); + +static void spu_restart_dma(struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags)) + out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND); + else { + set_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags); + mb(); + } +} + +static inline void spu_load_slb(struct spu *spu, int slbe, struct copro_slb *slb) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + pr_debug("%s: adding SLB[%d] 0x%016llx 0x%016llx\n", + __func__, slbe, slb->vsid, slb->esid); + + out_be64(&priv2->slb_index_W, slbe); + /* set invalid before writing vsid */ + out_be64(&priv2->slb_esid_RW, 0); + /* now it's safe to write the vsid */ + out_be64(&priv2->slb_vsid_RW, slb->vsid); + /* setting the new esid makes the entry valid again */ + out_be64(&priv2->slb_esid_RW, slb->esid); +} + +static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) +{ + struct copro_slb slb; + int ret; + + ret = copro_calculate_slb(spu->mm, ea, &slb); + if (ret) + return ret; + + spu_load_slb(spu, spu->slb_replace, &slb); + + spu->slb_replace++; + if (spu->slb_replace >= 8) + spu->slb_replace = 0; + + spu_restart_dma(spu); + spu->stats.slb_flt++; + return 0; +} + +extern int hash_page(unsigned long ea, unsigned long access, + unsigned long trap, unsigned long dsisr); //XXX +static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) +{ + int ret; + + pr_debug("%s, %llx, %lx\n", __func__, dsisr, ea); + + /* + * Handle kernel space hash faults immediately. User hash + * faults need to be deferred to process context. + */ + if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) && + (REGION_ID(ea) != USER_REGION_ID)) { + + spin_unlock(&spu->register_lock); + ret = hash_page(ea, _PAGE_PRESENT, 0x300, dsisr); + spin_lock(&spu->register_lock); + + if (!ret) { + spu_restart_dma(spu); + return 0; + } + } + + spu->class_1_dar = ea; + spu->class_1_dsisr = dsisr; + + spu->stop_callback(spu, 1); + + spu->class_1_dar = 0; + spu->class_1_dsisr = 0; + + return 0; +} + +static void __spu_kernel_slb(void *addr, struct copro_slb *slb) +{ + unsigned long ea = (unsigned long)addr; + u64 llp; + + if (REGION_ID(ea) == KERNEL_REGION_ID) + llp = mmu_psize_defs[mmu_linear_psize].sllp; + else + llp = mmu_psize_defs[mmu_virtual_psize].sllp; + + slb->vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) | + SLB_VSID_KERNEL | llp; + slb->esid = (ea & ESID_MASK) | SLB_ESID_V; +} + +/** + * Given an array of @nr_slbs SLB entries, @slbs, return non-zero if the + * address @new_addr is present. + */ +static inline int __slb_present(struct copro_slb *slbs, int nr_slbs, + void *new_addr) +{ + unsigned long ea = (unsigned long)new_addr; + int i; + + for (i = 0; i < nr_slbs; i++) + if (!((slbs[i].esid ^ ea) & ESID_MASK)) + return 1; + + return 0; +} + +/** + * Setup the SPU kernel SLBs, in preparation for a context save/restore. We + * need to map both the context save area, and the save/restore code. + * + * Because the lscsa and code may cross segment boundaires, we check to see + * if mappings are required for the start and end of each range. We currently + * assume that the mappings are smaller that one segment - if not, something + * is seriously wrong. + */ +void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa, + void *code, int code_size) +{ + struct copro_slb slbs[4]; + int i, nr_slbs = 0; + /* start and end addresses of both mappings */ + void *addrs[] = { + lscsa, (void *)lscsa + sizeof(*lscsa) - 1, + code, code + code_size - 1 + }; + + /* check the set of addresses, and create a new entry in the slbs array + * if there isn't already a SLB for that address */ + for (i = 0; i < ARRAY_SIZE(addrs); i++) { + if (__slb_present(slbs, nr_slbs, addrs[i])) + continue; + + __spu_kernel_slb(addrs[i], &slbs[nr_slbs]); + nr_slbs++; + } + + spin_lock_irq(&spu->register_lock); + /* Add the set of SLBs */ + for (i = 0; i < nr_slbs; i++) + spu_load_slb(spu, i, &slbs[i]); + spin_unlock_irq(&spu->register_lock); +} +EXPORT_SYMBOL_GPL(spu_setup_kernel_slbs); + +static irqreturn_t +spu_irq_class_0(int irq, void *data) +{ + struct spu *spu; + unsigned long stat, mask; + + spu = data; + + spin_lock(&spu->register_lock); + mask = spu_int_mask_get(spu, 0); + stat = spu_int_stat_get(spu, 0) & mask; + + spu->class_0_pending |= stat; + spu->class_0_dar = spu_mfc_dar_get(spu); + spu->stop_callback(spu, 0); + spu->class_0_pending = 0; + spu->class_0_dar = 0; + + spu_int_stat_clear(spu, 0, stat); + spin_unlock(&spu->register_lock); + + return IRQ_HANDLED; +} + +static irqreturn_t +spu_irq_class_1(int irq, void *data) +{ + struct spu *spu; + unsigned long stat, mask, dar, dsisr; + + spu = data; + + /* atomically read & clear class1 status. */ + spin_lock(&spu->register_lock); + mask = spu_int_mask_get(spu, 1); + stat = spu_int_stat_get(spu, 1) & mask; + dar = spu_mfc_dar_get(spu); + dsisr = spu_mfc_dsisr_get(spu); + if (stat & CLASS1_STORAGE_FAULT_INTR) + spu_mfc_dsisr_set(spu, 0ul); + spu_int_stat_clear(spu, 1, stat); + + pr_debug("%s: %lx %lx %lx %lx\n", __func__, mask, stat, + dar, dsisr); + + if (stat & CLASS1_SEGMENT_FAULT_INTR) + __spu_trap_data_seg(spu, dar); + + if (stat & CLASS1_STORAGE_FAULT_INTR) + __spu_trap_data_map(spu, dar, dsisr); + + if (stat & CLASS1_LS_COMPARE_SUSPEND_ON_GET_INTR) + ; + + if (stat & CLASS1_LS_COMPARE_SUSPEND_ON_PUT_INTR) + ; + + spu->class_1_dsisr = 0; + spu->class_1_dar = 0; + + spin_unlock(&spu->register_lock); + + return stat ? IRQ_HANDLED : IRQ_NONE; +} + +static irqreturn_t +spu_irq_class_2(int irq, void *data) +{ + struct spu *spu; + unsigned long stat; + unsigned long mask; + const int mailbox_intrs = + CLASS2_MAILBOX_THRESHOLD_INTR | CLASS2_MAILBOX_INTR; + + spu = data; + spin_lock(&spu->register_lock); + stat = spu_int_stat_get(spu, 2); + mask = spu_int_mask_get(spu, 2); + /* ignore interrupts we're not waiting for */ + stat &= mask; + /* mailbox interrupts are level triggered. mask them now before + * acknowledging */ + if (stat & mailbox_intrs) + spu_int_mask_and(spu, 2, ~(stat & mailbox_intrs)); + /* acknowledge all interrupts before the callbacks */ + spu_int_stat_clear(spu, 2, stat); + + pr_debug("class 2 interrupt %d, %lx, %lx\n", irq, stat, mask); + + if (stat & CLASS2_MAILBOX_INTR) + spu->ibox_callback(spu); + + if (stat & CLASS2_SPU_STOP_INTR) + spu->stop_callback(spu, 2); + + if (stat & CLASS2_SPU_HALT_INTR) + spu->stop_callback(spu, 2); + + if (stat & CLASS2_SPU_DMA_TAG_GROUP_COMPLETE_INTR) + spu->mfc_callback(spu); + + if (stat & CLASS2_MAILBOX_THRESHOLD_INTR) + spu->wbox_callback(spu); + + spu->stats.class2_intr++; + + spin_unlock(&spu->register_lock); + + return stat ? IRQ_HANDLED : IRQ_NONE; +} + +static int spu_request_irqs(struct spu *spu) +{ + int ret = 0; + + if (spu->irqs[0] != NO_IRQ) { + snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0", + spu->number); + ret = request_irq(spu->irqs[0], spu_irq_class_0, + 0, spu->irq_c0, spu); + if (ret) + goto bail0; + } + if (spu->irqs[1] != NO_IRQ) { + snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1", + spu->number); + ret = request_irq(spu->irqs[1], spu_irq_class_1, + 0, spu->irq_c1, spu); + if (ret) + goto bail1; + } + if (spu->irqs[2] != NO_IRQ) { + snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2", + spu->number); + ret = request_irq(spu->irqs[2], spu_irq_class_2, + 0, spu->irq_c2, spu); + if (ret) + goto bail2; + } + return 0; + +bail2: + if (spu->irqs[1] != NO_IRQ) + free_irq(spu->irqs[1], spu); +bail1: + if (spu->irqs[0] != NO_IRQ) + free_irq(spu->irqs[0], spu); +bail0: + return ret; +} + +static void spu_free_irqs(struct spu *spu) +{ + if (spu->irqs[0] != NO_IRQ) + free_irq(spu->irqs[0], spu); + if (spu->irqs[1] != NO_IRQ) + free_irq(spu->irqs[1], spu); + if (spu->irqs[2] != NO_IRQ) + free_irq(spu->irqs[2], spu); +} + +void spu_init_channels(struct spu *spu) +{ + static const struct { + unsigned channel; + unsigned count; + } zero_list[] = { + { 0x00, 1, }, { 0x01, 1, }, { 0x03, 1, }, { 0x04, 1, }, + { 0x18, 1, }, { 0x19, 1, }, { 0x1b, 1, }, { 0x1d, 1, }, + }, count_list[] = { + { 0x00, 0, }, { 0x03, 0, }, { 0x04, 0, }, { 0x15, 16, }, + { 0x17, 1, }, { 0x18, 0, }, { 0x19, 0, }, { 0x1b, 0, }, + { 0x1c, 1, }, { 0x1d, 0, }, { 0x1e, 1, }, + }; + struct spu_priv2 __iomem *priv2; + int i; + + priv2 = spu->priv2; + + /* initialize all channel data to zero */ + for (i = 0; i < ARRAY_SIZE(zero_list); i++) { + int count; + + out_be64(&priv2->spu_chnlcntptr_RW, zero_list[i].channel); + for (count = 0; count < zero_list[i].count; count++) + out_be64(&priv2->spu_chnldata_RW, 0); + } + + /* initialize channel counts to meaningful values */ + for (i = 0; i < ARRAY_SIZE(count_list); i++) { + out_be64(&priv2->spu_chnlcntptr_RW, count_list[i].channel); + out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count); + } +} +EXPORT_SYMBOL_GPL(spu_init_channels); + +static struct bus_type spu_subsys = { + .name = "spu", + .dev_name = "spu", +}; + +int spu_add_dev_attr(struct device_attribute *attr) +{ + struct spu *spu; + + mutex_lock(&spu_full_list_mutex); + list_for_each_entry(spu, &spu_full_list, full_list) + device_create_file(&spu->dev, attr); + mutex_unlock(&spu_full_list_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(spu_add_dev_attr); + +int spu_add_dev_attr_group(struct attribute_group *attrs) +{ + struct spu *spu; + int rc = 0; + + mutex_lock(&spu_full_list_mutex); + list_for_each_entry(spu, &spu_full_list, full_list) { + rc = sysfs_create_group(&spu->dev.kobj, attrs); + + /* we're in trouble here, but try unwinding anyway */ + if (rc) { + printk(KERN_ERR "%s: can't create sysfs group '%s'\n", + __func__, attrs->name); + + list_for_each_entry_continue_reverse(spu, + &spu_full_list, full_list) + sysfs_remove_group(&spu->dev.kobj, attrs); + break; + } + } + + mutex_unlock(&spu_full_list_mutex); + + return rc; +} +EXPORT_SYMBOL_GPL(spu_add_dev_attr_group); + + +void spu_remove_dev_attr(struct device_attribute *attr) +{ + struct spu *spu; + + mutex_lock(&spu_full_list_mutex); + list_for_each_entry(spu, &spu_full_list, full_list) + device_remove_file(&spu->dev, attr); + mutex_unlock(&spu_full_list_mutex); +} +EXPORT_SYMBOL_GPL(spu_remove_dev_attr); + +void spu_remove_dev_attr_group(struct attribute_group *attrs) +{ + struct spu *spu; + + mutex_lock(&spu_full_list_mutex); + list_for_each_entry(spu, &spu_full_list, full_list) + sysfs_remove_group(&spu->dev.kobj, attrs); + mutex_unlock(&spu_full_list_mutex); +} +EXPORT_SYMBOL_GPL(spu_remove_dev_attr_group); + +static int spu_create_dev(struct spu *spu) +{ + int ret; + + spu->dev.id = spu->number; + spu->dev.bus = &spu_subsys; + ret = device_register(&spu->dev); + if (ret) { + printk(KERN_ERR "Can't register SPU %d with sysfs\n", + spu->number); + return ret; + } + + sysfs_add_device_to_node(&spu->dev, spu->node); + + return 0; +} + +static int __init create_spu(void *data) +{ + struct spu *spu; + int ret; + static int number; + unsigned long flags; + + ret = -ENOMEM; + spu = kzalloc(sizeof (*spu), GFP_KERNEL); + if (!spu) + goto out; + + spu->alloc_state = SPU_FREE; + + spin_lock_init(&spu->register_lock); + spin_lock(&spu_lock); + spu->number = number++; + spin_unlock(&spu_lock); + + ret = spu_create_spu(spu, data); + + if (ret) + goto out_free; + + spu_mfc_sdr_setup(spu); + spu_mfc_sr1_set(spu, 0x33); + ret = spu_request_irqs(spu); + if (ret) + goto out_destroy; + + ret = spu_create_dev(spu); + if (ret) + goto out_free_irqs; + + mutex_lock(&cbe_spu_info[spu->node].list_mutex); + list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus); + cbe_spu_info[spu->node].n_spus++; + mutex_unlock(&cbe_spu_info[spu->node].list_mutex); + + mutex_lock(&spu_full_list_mutex); + spin_lock_irqsave(&spu_full_list_lock, flags); + list_add(&spu->full_list, &spu_full_list); + spin_unlock_irqrestore(&spu_full_list_lock, flags); + mutex_unlock(&spu_full_list_mutex); + + spu->stats.util_state = SPU_UTIL_IDLE_LOADED; + spu->stats.tstamp = ktime_get_ns(); + + INIT_LIST_HEAD(&spu->aff_list); + + goto out; + +out_free_irqs: + spu_free_irqs(spu); +out_destroy: + spu_destroy_spu(spu); +out_free: + kfree(spu); +out: + return ret; +} + +static const char *spu_state_names[] = { + "user", "system", "iowait", "idle" +}; + +static unsigned long long spu_acct_time(struct spu *spu, + enum spu_utilization_state state) +{ + unsigned long long time = spu->stats.times[state]; + + /* + * If the spu is idle or the context is stopped, utilization + * statistics are not updated. Apply the time delta from the + * last recorded state of the spu. + */ + if (spu->stats.util_state == state) + time += ktime_get_ns() - spu->stats.tstamp; + + return time / NSEC_PER_MSEC; +} + + +static ssize_t spu_stat_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct spu *spu = container_of(dev, struct spu, dev); + + return sprintf(buf, "%s %llu %llu %llu %llu " + "%llu %llu %llu %llu %llu %llu %llu %llu\n", + spu_state_names[spu->stats.util_state], + spu_acct_time(spu, SPU_UTIL_USER), + spu_acct_time(spu, SPU_UTIL_SYSTEM), + spu_acct_time(spu, SPU_UTIL_IOWAIT), + spu_acct_time(spu, SPU_UTIL_IDLE_LOADED), + spu->stats.vol_ctx_switch, + spu->stats.invol_ctx_switch, + spu->stats.slb_flt, + spu->stats.hash_flt, + spu->stats.min_flt, + spu->stats.maj_flt, + spu->stats.class2_intr, + spu->stats.libassist); +} + +static DEVICE_ATTR(stat, 0444, spu_stat_show, NULL); + +#ifdef CONFIG_KEXEC + +struct crash_spu_info { + struct spu *spu; + u32 saved_spu_runcntl_RW; + u32 saved_spu_status_R; + u32 saved_spu_npc_RW; + u64 saved_mfc_sr1_RW; + u64 saved_mfc_dar; + u64 saved_mfc_dsisr; +}; + +#define CRASH_NUM_SPUS 16 /* Enough for current hardware */ +static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS]; + +static void crash_kexec_stop_spus(void) +{ + struct spu *spu; + int i; + u64 tmp; + + for (i = 0; i < CRASH_NUM_SPUS; i++) { + if (!crash_spu_info[i].spu) + continue; + + spu = crash_spu_info[i].spu; + + crash_spu_info[i].saved_spu_runcntl_RW = + in_be32(&spu->problem->spu_runcntl_RW); + crash_spu_info[i].saved_spu_status_R = + in_be32(&spu->problem->spu_status_R); + crash_spu_info[i].saved_spu_npc_RW = + in_be32(&spu->problem->spu_npc_RW); + + crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu); + crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu); + tmp = spu_mfc_sr1_get(spu); + crash_spu_info[i].saved_mfc_sr1_RW = tmp; + + tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK; + spu_mfc_sr1_set(spu, tmp); + + __delay(200); + } +} + +static void crash_register_spus(struct list_head *list) +{ + struct spu *spu; + int ret; + + list_for_each_entry(spu, list, full_list) { + if (WARN_ON(spu->number >= CRASH_NUM_SPUS)) + continue; + + crash_spu_info[spu->number].spu = spu; + } + + ret = crash_shutdown_register(&crash_kexec_stop_spus); + if (ret) + printk(KERN_ERR "Could not register SPU crash handler"); +} + +#else +static inline void crash_register_spus(struct list_head *list) +{ +} +#endif + +static void spu_shutdown(void) +{ + struct spu *spu; + + mutex_lock(&spu_full_list_mutex); + list_for_each_entry(spu, &spu_full_list, full_list) { + spu_free_irqs(spu); + spu_destroy_spu(spu); + } + mutex_unlock(&spu_full_list_mutex); +} + +static struct syscore_ops spu_syscore_ops = { + .shutdown = spu_shutdown, +}; + +static int __init init_spu_base(void) +{ + int i, ret = 0; + + for (i = 0; i < MAX_NUMNODES; i++) { + mutex_init(&cbe_spu_info[i].list_mutex); + INIT_LIST_HEAD(&cbe_spu_info[i].spus); + } + + if (!spu_management_ops) + goto out; + + /* create system subsystem for spus */ + ret = subsys_system_register(&spu_subsys, NULL); + if (ret) + goto out; + + ret = spu_enumerate_spus(create_spu); + + if (ret < 0) { + printk(KERN_WARNING "%s: Error initializing spus\n", + __func__); + goto out_unregister_subsys; + } + + if (ret > 0) + fb_append_extra_logo(&logo_spe_clut224, ret); + + mutex_lock(&spu_full_list_mutex); + xmon_register_spus(&spu_full_list); + crash_register_spus(&spu_full_list); + mutex_unlock(&spu_full_list_mutex); + spu_add_dev_attr(&dev_attr_stat); + register_syscore_ops(&spu_syscore_ops); + + spu_init_affinity(); + + return 0; + + out_unregister_subsys: + bus_unregister(&spu_subsys); + out: + return ret; +} +module_init(init_spu_base); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>"); diff --git a/kernel/arch/powerpc/platforms/cell/spu_callbacks.c b/kernel/arch/powerpc/platforms/cell/spu_callbacks.c new file mode 100644 index 000000000..a494028b2 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spu_callbacks.c @@ -0,0 +1,74 @@ +/* + * System call callback functions for SPUs + */ + +#undef DEBUG + +#include <linux/kallsyms.h> +#include <linux/export.h> +#include <linux/syscalls.h> + +#include <asm/spu.h> +#include <asm/syscalls.h> +#include <asm/unistd.h> + +/* + * This table defines the system calls that an SPU can call. + * It is currently a subset of the 64 bit powerpc system calls, + * with the exact semantics. + * + * The reasons for disabling some of the system calls are: + * 1. They interact with the way SPU syscalls are handled + * and we can't let them execute ever: + * restart_syscall, exit, for, execve, ptrace, ... + * 2. They are deprecated and replaced by other means: + * uselib, pciconfig_*, sysfs, ... + * 3. They are somewhat interacting with the system in a way + * we don't want an SPU to: + * reboot, init_module, mount, kexec_load + * 4. They are optional and we can't rely on them being + * linked into the kernel. Unfortunately, the cond_syscall + * helper does not work here as it does not add the necessary + * opd symbols: + * mbind, mq_open, ipc, ... + */ + +static void *spu_syscall_table[] = { +#define SYSCALL(func) sys_ni_syscall, +#define COMPAT_SYS(func) sys_ni_syscall, +#define PPC_SYS(func) sys_ni_syscall, +#define OLDSYS(func) sys_ni_syscall, +#define SYS32ONLY(func) sys_ni_syscall, +#define PPC64ONLY(func) sys_ni_syscall, +#define SYSX(f, f3264, f32) sys_ni_syscall, + +#define SYSCALL_SPU(func) sys_##func, +#define COMPAT_SYS_SPU(func) sys_##func, +#define PPC_SYS_SPU(func) ppc_##func, +#define SYSX_SPU(f, f3264, f32) f, + +#include <asm/systbl.h> +}; + +long spu_sys_callback(struct spu_syscall_block *s) +{ + long (*syscall)(u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6); + + if (s->nr_ret >= ARRAY_SIZE(spu_syscall_table)) { + pr_debug("%s: invalid syscall #%lld", __func__, s->nr_ret); + return -ENOSYS; + } + + syscall = spu_syscall_table[s->nr_ret]; + + pr_debug("SPU-syscall " + "%pSR:syscall%lld(%llx, %llx, %llx, %llx, %llx, %llx)\n", + syscall, + s->nr_ret, + s->parm[0], s->parm[1], s->parm[2], + s->parm[3], s->parm[4], s->parm[5]); + + return syscall(s->parm[0], s->parm[1], s->parm[2], + s->parm[3], s->parm[4], s->parm[5]); +} +EXPORT_SYMBOL_GPL(spu_sys_callback); diff --git a/kernel/arch/powerpc/platforms/cell/spu_manage.c b/kernel/arch/powerpc/platforms/cell/spu_manage.c new file mode 100644 index 000000000..c3327f3d8 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spu_manage.c @@ -0,0 +1,555 @@ +/* + * spu management operations for of based platforms + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * Copyright 2006 Sony Corp. + * (C) Copyright 2007 TOSHIBA CORPORATION + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/export.h> +#include <linux/ptrace.h> +#include <linux/wait.h> +#include <linux/mm.h> +#include <linux/io.h> +#include <linux/mutex.h> +#include <linux/device.h> + +#include <asm/spu.h> +#include <asm/spu_priv1.h> +#include <asm/firmware.h> +#include <asm/prom.h> + +#include "spufs/spufs.h" +#include "interrupt.h" + +struct device_node *spu_devnode(struct spu *spu) +{ + return spu->devnode; +} + +EXPORT_SYMBOL_GPL(spu_devnode); + +static u64 __init find_spu_unit_number(struct device_node *spe) +{ + const unsigned int *prop; + int proplen; + + /* new device trees should provide the physical-id attribute */ + prop = of_get_property(spe, "physical-id", &proplen); + if (proplen == 4) + return (u64)*prop; + + /* celleb device tree provides the unit-id */ + prop = of_get_property(spe, "unit-id", &proplen); + if (proplen == 4) + return (u64)*prop; + + /* legacy device trees provide the id in the reg attribute */ + prop = of_get_property(spe, "reg", &proplen); + if (proplen == 4) + return (u64)*prop; + + return 0; +} + +static void spu_unmap(struct spu *spu) +{ + if (!firmware_has_feature(FW_FEATURE_LPAR)) + iounmap(spu->priv1); + iounmap(spu->priv2); + iounmap(spu->problem); + iounmap((__force u8 __iomem *)spu->local_store); +} + +static int __init spu_map_interrupts_old(struct spu *spu, + struct device_node *np) +{ + unsigned int isrc; + const u32 *tmp; + int nid; + + /* Get the interrupt source unit from the device-tree */ + tmp = of_get_property(np, "isrc", NULL); + if (!tmp) + return -ENODEV; + isrc = tmp[0]; + + tmp = of_get_property(np->parent->parent, "node-id", NULL); + if (!tmp) { + printk(KERN_WARNING "%s: can't find node-id\n", __func__); + nid = spu->node; + } else + nid = tmp[0]; + + /* Add the node number */ + isrc |= nid << IIC_IRQ_NODE_SHIFT; + + /* Now map interrupts of all 3 classes */ + spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc); + spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc); + spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc); + + /* Right now, we only fail if class 2 failed */ + return spu->irqs[2] == NO_IRQ ? -EINVAL : 0; +} + +static void __iomem * __init spu_map_prop_old(struct spu *spu, + struct device_node *n, + const char *name) +{ + const struct address_prop { + unsigned long address; + unsigned int len; + } __attribute__((packed)) *prop; + int proplen; + + prop = of_get_property(n, name, &proplen); + if (prop == NULL || proplen != sizeof (struct address_prop)) + return NULL; + + return ioremap(prop->address, prop->len); +} + +static int __init spu_map_device_old(struct spu *spu) +{ + struct device_node *node = spu->devnode; + const char *prop; + int ret; + + ret = -ENODEV; + spu->name = of_get_property(node, "name", NULL); + if (!spu->name) + goto out; + + prop = of_get_property(node, "local-store", NULL); + if (!prop) + goto out; + spu->local_store_phys = *(unsigned long *)prop; + + /* we use local store as ram, not io memory */ + spu->local_store = (void __force *) + spu_map_prop_old(spu, node, "local-store"); + if (!spu->local_store) + goto out; + + prop = of_get_property(node, "problem", NULL); + if (!prop) + goto out_unmap; + spu->problem_phys = *(unsigned long *)prop; + + spu->problem = spu_map_prop_old(spu, node, "problem"); + if (!spu->problem) + goto out_unmap; + + spu->priv2 = spu_map_prop_old(spu, node, "priv2"); + if (!spu->priv2) + goto out_unmap; + + if (!firmware_has_feature(FW_FEATURE_LPAR)) { + spu->priv1 = spu_map_prop_old(spu, node, "priv1"); + if (!spu->priv1) + goto out_unmap; + } + + ret = 0; + goto out; + +out_unmap: + spu_unmap(spu); +out: + return ret; +} + +static int __init spu_map_interrupts(struct spu *spu, struct device_node *np) +{ + struct of_phandle_args oirq; + int ret; + int i; + + for (i=0; i < 3; i++) { + ret = of_irq_parse_one(np, i, &oirq); + if (ret) { + pr_debug("spu_new: failed to get irq %d\n", i); + goto err; + } + ret = -EINVAL; + pr_debug(" irq %d no 0x%x on %s\n", i, oirq.args[0], + oirq.np->full_name); + spu->irqs[i] = irq_create_of_mapping(&oirq); + if (spu->irqs[i] == NO_IRQ) { + pr_debug("spu_new: failed to map it !\n"); + goto err; + } + } + return 0; + +err: + pr_debug("failed to map irq %x for spu %s\n", *oirq.args, + spu->name); + for (; i >= 0; i--) { + if (spu->irqs[i] != NO_IRQ) + irq_dispose_mapping(spu->irqs[i]); + } + return ret; +} + +static int spu_map_resource(struct spu *spu, int nr, + void __iomem** virt, unsigned long *phys) +{ + struct device_node *np = spu->devnode; + struct resource resource = { }; + unsigned long len; + int ret; + + ret = of_address_to_resource(np, nr, &resource); + if (ret) + return ret; + if (phys) + *phys = resource.start; + len = resource_size(&resource); + *virt = ioremap(resource.start, len); + if (!*virt) + return -EINVAL; + return 0; +} + +static int __init spu_map_device(struct spu *spu) +{ + struct device_node *np = spu->devnode; + int ret = -ENODEV; + + spu->name = of_get_property(np, "name", NULL); + if (!spu->name) + goto out; + + ret = spu_map_resource(spu, 0, (void __iomem**)&spu->local_store, + &spu->local_store_phys); + if (ret) { + pr_debug("spu_new: failed to map %s resource 0\n", + np->full_name); + goto out; + } + ret = spu_map_resource(spu, 1, (void __iomem**)&spu->problem, + &spu->problem_phys); + if (ret) { + pr_debug("spu_new: failed to map %s resource 1\n", + np->full_name); + goto out_unmap; + } + ret = spu_map_resource(spu, 2, (void __iomem**)&spu->priv2, NULL); + if (ret) { + pr_debug("spu_new: failed to map %s resource 2\n", + np->full_name); + goto out_unmap; + } + if (!firmware_has_feature(FW_FEATURE_LPAR)) + ret = spu_map_resource(spu, 3, + (void __iomem**)&spu->priv1, NULL); + if (ret) { + pr_debug("spu_new: failed to map %s resource 3\n", + np->full_name); + goto out_unmap; + } + pr_debug("spu_new: %s maps:\n", np->full_name); + pr_debug(" local store : 0x%016lx -> 0x%p\n", + spu->local_store_phys, spu->local_store); + pr_debug(" problem state : 0x%016lx -> 0x%p\n", + spu->problem_phys, spu->problem); + pr_debug(" priv2 : 0x%p\n", spu->priv2); + pr_debug(" priv1 : 0x%p\n", spu->priv1); + + return 0; + +out_unmap: + spu_unmap(spu); +out: + pr_debug("failed to map spe %s: %d\n", spu->name, ret); + return ret; +} + +static int __init of_enumerate_spus(int (*fn)(void *data)) +{ + int ret; + struct device_node *node; + unsigned int n = 0; + + ret = -ENODEV; + for (node = of_find_node_by_type(NULL, "spe"); + node; node = of_find_node_by_type(node, "spe")) { + ret = fn(node); + if (ret) { + printk(KERN_WARNING "%s: Error initializing %s\n", + __func__, node->name); + break; + } + n++; + } + return ret ? ret : n; +} + +static int __init of_create_spu(struct spu *spu, void *data) +{ + int ret; + struct device_node *spe = (struct device_node *)data; + static int legacy_map = 0, legacy_irq = 0; + + spu->devnode = of_node_get(spe); + spu->spe_id = find_spu_unit_number(spe); + + spu->node = of_node_to_nid(spe); + if (spu->node >= MAX_NUMNODES) { + printk(KERN_WARNING "SPE %s on node %d ignored," + " node number too big\n", spe->full_name, spu->node); + printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n"); + ret = -ENODEV; + goto out; + } + + ret = spu_map_device(spu); + if (ret) { + if (!legacy_map) { + legacy_map = 1; + printk(KERN_WARNING "%s: Legacy device tree found, " + "trying to map old style\n", __func__); + } + ret = spu_map_device_old(spu); + if (ret) { + printk(KERN_ERR "Unable to map %s\n", + spu->name); + goto out; + } + } + + ret = spu_map_interrupts(spu, spe); + if (ret) { + if (!legacy_irq) { + legacy_irq = 1; + printk(KERN_WARNING "%s: Legacy device tree found, " + "trying old style irq\n", __func__); + } + ret = spu_map_interrupts_old(spu, spe); + if (ret) { + printk(KERN_ERR "%s: could not map interrupts\n", + spu->name); + goto out_unmap; + } + } + + pr_debug("Using SPE %s %p %p %p %p %d\n", spu->name, + spu->local_store, spu->problem, spu->priv1, + spu->priv2, spu->number); + goto out; + +out_unmap: + spu_unmap(spu); +out: + return ret; +} + +static int of_destroy_spu(struct spu *spu) +{ + spu_unmap(spu); + of_node_put(spu->devnode); + return 0; +} + +static void enable_spu_by_master_run(struct spu_context *ctx) +{ + ctx->ops->master_start(ctx); +} + +static void disable_spu_by_master_run(struct spu_context *ctx) +{ + ctx->ops->master_stop(ctx); +} + +/* Hardcoded affinity idxs for qs20 */ +#define QS20_SPES_PER_BE 8 +static int qs20_reg_idxs[QS20_SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 }; +static int qs20_reg_memory[QS20_SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 }; + +static struct spu *spu_lookup_reg(int node, u32 reg) +{ + struct spu *spu; + const u32 *spu_reg; + + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + spu_reg = of_get_property(spu_devnode(spu), "reg", NULL); + if (*spu_reg == reg) + return spu; + } + return NULL; +} + +static void init_affinity_qs20_harcoded(void) +{ + int node, i; + struct spu *last_spu, *spu; + u32 reg; + + for (node = 0; node < MAX_NUMNODES; node++) { + last_spu = NULL; + for (i = 0; i < QS20_SPES_PER_BE; i++) { + reg = qs20_reg_idxs[i]; + spu = spu_lookup_reg(node, reg); + if (!spu) + continue; + spu->has_mem_affinity = qs20_reg_memory[reg]; + if (last_spu) + list_add_tail(&spu->aff_list, + &last_spu->aff_list); + last_spu = spu; + } + } +} + +static int of_has_vicinity(void) +{ + struct device_node *dn; + + for_each_node_by_type(dn, "spe") { + if (of_find_property(dn, "vicinity", NULL)) { + of_node_put(dn); + return 1; + } + } + return 0; +} + +static struct spu *devnode_spu(int cbe, struct device_node *dn) +{ + struct spu *spu; + + list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) + if (spu_devnode(spu) == dn) + return spu; + return NULL; +} + +static struct spu * +neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid) +{ + struct spu *spu; + struct device_node *spu_dn; + const phandle *vic_handles; + int lenp, i; + + list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) { + spu_dn = spu_devnode(spu); + if (spu_dn == avoid) + continue; + vic_handles = of_get_property(spu_dn, "vicinity", &lenp); + for (i=0; i < (lenp / sizeof(phandle)); i++) { + if (vic_handles[i] == target->phandle) + return spu; + } + } + return NULL; +} + +static void init_affinity_node(int cbe) +{ + struct spu *spu, *last_spu; + struct device_node *vic_dn, *last_spu_dn; + phandle avoid_ph; + const phandle *vic_handles; + const char *name; + int lenp, i, added; + + last_spu = list_first_entry(&cbe_spu_info[cbe].spus, struct spu, + cbe_list); + avoid_ph = 0; + for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) { + last_spu_dn = spu_devnode(last_spu); + vic_handles = of_get_property(last_spu_dn, "vicinity", &lenp); + + /* + * Walk through each phandle in vicinity property of the spu + * (tipically two vicinity phandles per spe node) + */ + for (i = 0; i < (lenp / sizeof(phandle)); i++) { + if (vic_handles[i] == avoid_ph) + continue; + + vic_dn = of_find_node_by_phandle(vic_handles[i]); + if (!vic_dn) + continue; + + /* a neighbour might be spe, mic-tm, or bif0 */ + name = of_get_property(vic_dn, "name", NULL); + if (!name) + continue; + + if (strcmp(name, "spe") == 0) { + spu = devnode_spu(cbe, vic_dn); + avoid_ph = last_spu_dn->phandle; + } else { + /* + * "mic-tm" and "bif0" nodes do not have + * vicinity property. So we need to find the + * spe which has vic_dn as neighbour, but + * skipping the one we came from (last_spu_dn) + */ + spu = neighbour_spu(cbe, vic_dn, last_spu_dn); + if (!spu) + continue; + if (!strcmp(name, "mic-tm")) { + last_spu->has_mem_affinity = 1; + spu->has_mem_affinity = 1; + } + avoid_ph = vic_dn->phandle; + } + + list_add_tail(&spu->aff_list, &last_spu->aff_list); + last_spu = spu; + break; + } + } +} + +static void init_affinity_fw(void) +{ + int cbe; + + for (cbe = 0; cbe < MAX_NUMNODES; cbe++) + init_affinity_node(cbe); +} + +static int __init init_affinity(void) +{ + if (of_has_vicinity()) { + init_affinity_fw(); + } else { + long root = of_get_flat_dt_root(); + if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) + init_affinity_qs20_harcoded(); + else + printk("No affinity configuration found\n"); + } + + return 0; +} + +const struct spu_management_ops spu_management_of_ops = { + .enumerate_spus = of_enumerate_spus, + .create_spu = of_create_spu, + .destroy_spu = of_destroy_spu, + .enable_spu = enable_spu_by_master_run, + .disable_spu = disable_spu_by_master_run, + .init_affinity = init_affinity, +}; diff --git a/kernel/arch/powerpc/platforms/cell/spu_notify.c b/kernel/arch/powerpc/platforms/cell/spu_notify.c new file mode 100644 index 000000000..afdf857c3 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spu_notify.c @@ -0,0 +1,68 @@ +/* + * Move OProfile dependencies from spufs module to the kernel so it + * can run on non-cell PPC. + * + * Copyright (C) IBM 2005 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#undef DEBUG + +#include <linux/export.h> +#include <linux/notifier.h> +#include <asm/spu.h> +#include "spufs/spufs.h" + +static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); + +void spu_switch_notify(struct spu *spu, struct spu_context *ctx) +{ + blocking_notifier_call_chain(&spu_switch_notifier, + ctx ? ctx->object_id : 0, spu); +} +EXPORT_SYMBOL_GPL(spu_switch_notify); + +int spu_switch_event_register(struct notifier_block *n) +{ + int ret; + ret = blocking_notifier_chain_register(&spu_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; +} +EXPORT_SYMBOL_GPL(spu_switch_event_register); + +int spu_switch_event_unregister(struct notifier_block *n) +{ + return blocking_notifier_chain_unregister(&spu_switch_notifier, n); +} +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); + +void spu_set_profile_private_kref(struct spu_context *ctx, + struct kref *prof_info_kref, + void (* prof_info_release) (struct kref *kref)) +{ + ctx->prof_priv_kref = prof_info_kref; + ctx->prof_priv_release = prof_info_release; +} +EXPORT_SYMBOL_GPL(spu_set_profile_private_kref); + +void *spu_get_profile_private_kref(struct spu_context *ctx) +{ + return ctx->prof_priv_kref; +} +EXPORT_SYMBOL_GPL(spu_get_profile_private_kref); + diff --git a/kernel/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/kernel/arch/powerpc/platforms/cell/spu_priv1_mmio.c new file mode 100644 index 000000000..66d33724f --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spu_priv1_mmio.c @@ -0,0 +1,180 @@ +/* + * spu hypervisor abstraction for direct hardware access. + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * Copyright 2006 Sony Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/ptrace.h> +#include <linux/wait.h> +#include <linux/mm.h> +#include <linux/io.h> +#include <linux/mutex.h> +#include <linux/device.h> +#include <linux/sched.h> + +#include <asm/spu.h> +#include <asm/spu_priv1.h> +#include <asm/firmware.h> +#include <asm/prom.h> + +#include "interrupt.h" +#include "spu_priv1_mmio.h" + +static void int_mask_and(struct spu *spu, int class, u64 mask) +{ + u64 old_mask; + + old_mask = in_be64(&spu->priv1->int_mask_RW[class]); + out_be64(&spu->priv1->int_mask_RW[class], old_mask & mask); +} + +static void int_mask_or(struct spu *spu, int class, u64 mask) +{ + u64 old_mask; + + old_mask = in_be64(&spu->priv1->int_mask_RW[class]); + out_be64(&spu->priv1->int_mask_RW[class], old_mask | mask); +} + +static void int_mask_set(struct spu *spu, int class, u64 mask) +{ + out_be64(&spu->priv1->int_mask_RW[class], mask); +} + +static u64 int_mask_get(struct spu *spu, int class) +{ + return in_be64(&spu->priv1->int_mask_RW[class]); +} + +static void int_stat_clear(struct spu *spu, int class, u64 stat) +{ + out_be64(&spu->priv1->int_stat_RW[class], stat); +} + +static u64 int_stat_get(struct spu *spu, int class) +{ + return in_be64(&spu->priv1->int_stat_RW[class]); +} + +static void cpu_affinity_set(struct spu *spu, int cpu) +{ + u64 target; + u64 route; + + if (nr_cpus_node(spu->node)) { + const struct cpumask *spumask = cpumask_of_node(spu->node), + *cpumask = cpumask_of_node(cpu_to_node(cpu)); + + if (!cpumask_intersects(spumask, cpumask)) + return; + } + + target = iic_get_target_id(cpu); + route = target << 48 | target << 32 | target << 16; + out_be64(&spu->priv1->int_route_RW, route); +} + +static u64 mfc_dar_get(struct spu *spu) +{ + return in_be64(&spu->priv1->mfc_dar_RW); +} + +static u64 mfc_dsisr_get(struct spu *spu) +{ + return in_be64(&spu->priv1->mfc_dsisr_RW); +} + +static void mfc_dsisr_set(struct spu *spu, u64 dsisr) +{ + out_be64(&spu->priv1->mfc_dsisr_RW, dsisr); +} + +static void mfc_sdr_setup(struct spu *spu) +{ + out_be64(&spu->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1)); +} + +static void mfc_sr1_set(struct spu *spu, u64 sr1) +{ + out_be64(&spu->priv1->mfc_sr1_RW, sr1); +} + +static u64 mfc_sr1_get(struct spu *spu) +{ + return in_be64(&spu->priv1->mfc_sr1_RW); +} + +static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id) +{ + out_be64(&spu->priv1->mfc_tclass_id_RW, tclass_id); +} + +static u64 mfc_tclass_id_get(struct spu *spu) +{ + return in_be64(&spu->priv1->mfc_tclass_id_RW); +} + +static void tlb_invalidate(struct spu *spu) +{ + out_be64(&spu->priv1->tlb_invalidate_entry_W, 0ul); +} + +static void resource_allocation_groupID_set(struct spu *spu, u64 id) +{ + out_be64(&spu->priv1->resource_allocation_groupID_RW, id); +} + +static u64 resource_allocation_groupID_get(struct spu *spu) +{ + return in_be64(&spu->priv1->resource_allocation_groupID_RW); +} + +static void resource_allocation_enable_set(struct spu *spu, u64 enable) +{ + out_be64(&spu->priv1->resource_allocation_enable_RW, enable); +} + +static u64 resource_allocation_enable_get(struct spu *spu) +{ + return in_be64(&spu->priv1->resource_allocation_enable_RW); +} + +const struct spu_priv1_ops spu_priv1_mmio_ops = +{ + .int_mask_and = int_mask_and, + .int_mask_or = int_mask_or, + .int_mask_set = int_mask_set, + .int_mask_get = int_mask_get, + .int_stat_clear = int_stat_clear, + .int_stat_get = int_stat_get, + .cpu_affinity_set = cpu_affinity_set, + .mfc_dar_get = mfc_dar_get, + .mfc_dsisr_get = mfc_dsisr_get, + .mfc_dsisr_set = mfc_dsisr_set, + .mfc_sdr_setup = mfc_sdr_setup, + .mfc_sr1_set = mfc_sr1_set, + .mfc_sr1_get = mfc_sr1_get, + .mfc_tclass_id_set = mfc_tclass_id_set, + .mfc_tclass_id_get = mfc_tclass_id_get, + .tlb_invalidate = tlb_invalidate, + .resource_allocation_groupID_set = resource_allocation_groupID_set, + .resource_allocation_groupID_get = resource_allocation_groupID_get, + .resource_allocation_enable_set = resource_allocation_enable_set, + .resource_allocation_enable_get = resource_allocation_enable_get, +}; diff --git a/kernel/arch/powerpc/platforms/cell/spu_priv1_mmio.h b/kernel/arch/powerpc/platforms/cell/spu_priv1_mmio.h new file mode 100644 index 000000000..7b62bd1cc --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spu_priv1_mmio.h @@ -0,0 +1,26 @@ +/* + * spu hypervisor abstraction for direct hardware access. + * + * Copyright (C) 2006 Sony Computer Entertainment Inc. + * Copyright 2006 Sony Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef SPU_PRIV1_MMIO_H +#define SPU_PRIV1_MMIO_H + +struct device_node *spu_devnode(struct spu *spu); + +#endif /* SPU_PRIV1_MMIO_H */ diff --git a/kernel/arch/powerpc/platforms/cell/spu_syscalls.c b/kernel/arch/powerpc/platforms/cell/spu_syscalls.c new file mode 100644 index 000000000..5e6e0bad6 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spu_syscalls.c @@ -0,0 +1,178 @@ +/* + * SPU file system -- system call stubs + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * (C) Copyright 2006-2007, IBM Corporation + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/syscalls.h> +#include <linux/rcupdate.h> +#include <linux/binfmts.h> + +#include <asm/spu.h> + +/* protected by rcu */ +static struct spufs_calls *spufs_calls; + +#ifdef CONFIG_SPU_FS_MODULE + +static inline struct spufs_calls *spufs_calls_get(void) +{ + struct spufs_calls *calls = NULL; + + rcu_read_lock(); + calls = rcu_dereference(spufs_calls); + if (calls && !try_module_get(calls->owner)) + calls = NULL; + rcu_read_unlock(); + + return calls; +} + +static inline void spufs_calls_put(struct spufs_calls *calls) +{ + BUG_ON(calls != spufs_calls); + + /* we don't need to rcu this, as we hold a reference to the module */ + module_put(spufs_calls->owner); +} + +#else /* !defined CONFIG_SPU_FS_MODULE */ + +static inline struct spufs_calls *spufs_calls_get(void) +{ + return spufs_calls; +} + +static inline void spufs_calls_put(struct spufs_calls *calls) { } + +#endif /* CONFIG_SPU_FS_MODULE */ + +SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags, + umode_t, mode, int, neighbor_fd) +{ + long ret; + struct spufs_calls *calls; + + calls = spufs_calls_get(); + if (!calls) + return -ENOSYS; + + if (flags & SPU_CREATE_AFFINITY_SPU) { + struct fd neighbor = fdget(neighbor_fd); + ret = -EBADF; + if (neighbor.file) { + ret = calls->create_thread(name, flags, mode, neighbor.file); + fdput(neighbor); + } + } else + ret = calls->create_thread(name, flags, mode, NULL); + + spufs_calls_put(calls); + return ret; +} + +asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) +{ + long ret; + struct fd arg; + struct spufs_calls *calls; + + calls = spufs_calls_get(); + if (!calls) + return -ENOSYS; + + ret = -EBADF; + arg = fdget(fd); + if (arg.file) { + ret = calls->spu_run(arg.file, unpc, ustatus); + fdput(arg); + } + + spufs_calls_put(calls); + return ret; +} + +#ifdef CONFIG_COREDUMP +int elf_coredump_extra_notes_size(void) +{ + struct spufs_calls *calls; + int ret; + + calls = spufs_calls_get(); + if (!calls) + return 0; + + ret = calls->coredump_extra_notes_size(); + + spufs_calls_put(calls); + + return ret; +} + +int elf_coredump_extra_notes_write(struct coredump_params *cprm) +{ + struct spufs_calls *calls; + int ret; + + calls = spufs_calls_get(); + if (!calls) + return 0; + + ret = calls->coredump_extra_notes_write(cprm); + + spufs_calls_put(calls); + + return ret; +} +#endif + +void notify_spus_active(void) +{ + struct spufs_calls *calls; + + calls = spufs_calls_get(); + if (!calls) + return; + + calls->notify_spus_active(); + spufs_calls_put(calls); + + return; +} + +int register_spu_syscalls(struct spufs_calls *calls) +{ + if (spufs_calls) + return -EBUSY; + + rcu_assign_pointer(spufs_calls, calls); + return 0; +} +EXPORT_SYMBOL_GPL(register_spu_syscalls); + +void unregister_spu_syscalls(struct spufs_calls *calls) +{ + BUG_ON(spufs_calls->owner != calls->owner); + RCU_INIT_POINTER(spufs_calls, NULL); + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(unregister_spu_syscalls); diff --git a/kernel/arch/powerpc/platforms/cell/spufs/.gitignore b/kernel/arch/powerpc/platforms/cell/spufs/.gitignore new file mode 100644 index 000000000..a09ee8d84 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/.gitignore @@ -0,0 +1,2 @@ +spu_save_dump.h +spu_restore_dump.h diff --git a/kernel/arch/powerpc/platforms/cell/spufs/Makefile b/kernel/arch/powerpc/platforms/cell/spufs/Makefile new file mode 100644 index 000000000..52a7d2596 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/Makefile @@ -0,0 +1,62 @@ + +obj-$(CONFIG_SPU_FS) += spufs.o +spufs-y += inode.o file.o context.o syscalls.o +spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o +spufs-y += switch.o fault.o lscsa_alloc.o +spufs-$(CONFIG_COREDUMP) += coredump.o + +# magic for the trace events +CFLAGS_sched.o := -I$(src) + +# Rules to build switch.o with the help of SPU tool chain +SPU_CROSS := spu- +SPU_CC := $(SPU_CROSS)gcc +SPU_AS := $(SPU_CROSS)gcc +SPU_LD := $(SPU_CROSS)ld +SPU_OBJCOPY := $(SPU_CROSS)objcopy +SPU_CFLAGS := -O2 -Wall -I$(srctree)/include -D__KERNEL__ +SPU_AFLAGS := -c -D__ASSEMBLY__ -I$(srctree)/include -D__KERNEL__ +SPU_LDFLAGS := -N -Ttext=0x0 + +$(obj)/switch.o: $(obj)/spu_save_dump.h $(obj)/spu_restore_dump.h +clean-files := spu_save_dump.h spu_restore_dump.h + +# Compile SPU files + cmd_spu_cc = $(SPU_CC) $(SPU_CFLAGS) -c -o $@ $< +quiet_cmd_spu_cc = SPU_CC $@ +$(obj)/spu_%.o: $(src)/spu_%.c + $(call if_changed,spu_cc) + +# Assemble SPU files + cmd_spu_as = $(SPU_AS) $(SPU_AFLAGS) -o $@ $< +quiet_cmd_spu_as = SPU_AS $@ +$(obj)/spu_%.o: $(src)/spu_%.S + $(call if_changed,spu_as) + +# Link SPU Executables + cmd_spu_ld = $(SPU_LD) $(SPU_LDFLAGS) -o $@ $^ +quiet_cmd_spu_ld = SPU_LD $@ +$(obj)/spu_%: $(obj)/spu_%_crt0.o $(obj)/spu_%.o + $(call if_changed,spu_ld) + +# Copy into binary format + cmd_spu_objcopy = $(SPU_OBJCOPY) -O binary $< $@ +quiet_cmd_spu_objcopy = OBJCOPY $@ +$(obj)/spu_%.bin: $(src)/spu_% + $(call if_changed,spu_objcopy) + +# create C code from ELF executable +cmd_hexdump = ( \ + echo "/*" ; \ + echo " * $*_dump.h: Copyright (C) 2005 IBM." ; \ + echo " * Hex-dump auto generated from $*.c." ; \ + echo " * Do not edit!" ; \ + echo " */" ; \ + echo "static unsigned int $*_code[] " \ + "__attribute__((__aligned__(128))) = {" ; \ + hexdump -v -e '"0x" 4/1 "%02x" "," "\n"' $< ; \ + echo "};" ; \ + ) > $@ +quiet_cmd_hexdump = HEXDUMP $@ +$(obj)/%_dump.h: $(obj)/%.bin + $(call if_changed,hexdump) diff --git a/kernel/arch/powerpc/platforms/cell/spufs/backing_ops.c b/kernel/arch/powerpc/platforms/cell/spufs/backing_ops.c new file mode 100644 index 000000000..6e8a9ef85 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/backing_ops.c @@ -0,0 +1,413 @@ +/* backing_ops.c - query/set operations on saved SPU context. + * + * Copyright (C) IBM 2005 + * Author: Mark Nutter <mnutter@us.ibm.com> + * + * These register operations allow SPUFS to operate on saved + * SPU contexts rather than hardware. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/poll.h> + +#include <asm/io.h> +#include <asm/spu.h> +#include <asm/spu_csa.h> +#include <asm/spu_info.h> +#include <asm/mmu_context.h> +#include "spufs.h" + +/* + * Reads/writes to various problem and priv2 registers require + * state changes, i.e. generate SPU events, modify channel + * counts, etc. + */ + +static void gen_spu_event(struct spu_context *ctx, u32 event) +{ + u64 ch0_cnt; + u64 ch0_data; + u64 ch1_data; + + ch0_cnt = ctx->csa.spu_chnlcnt_RW[0]; + ch0_data = ctx->csa.spu_chnldata_RW[0]; + ch1_data = ctx->csa.spu_chnldata_RW[1]; + ctx->csa.spu_chnldata_RW[0] |= event; + if ((ch0_cnt == 0) && !(ch0_data & event) && (ch1_data & event)) { + ctx->csa.spu_chnlcnt_RW[0] = 1; + } +} + +static int spu_backing_mbox_read(struct spu_context *ctx, u32 * data) +{ + u32 mbox_stat; + int ret = 0; + + spin_lock(&ctx->csa.register_lock); + mbox_stat = ctx->csa.prob.mb_stat_R; + if (mbox_stat & 0x0000ff) { + /* Read the first available word. + * Implementation note: the depth + * of pu_mb_R is currently 1. + */ + *data = ctx->csa.prob.pu_mb_R; + ctx->csa.prob.mb_stat_R &= ~(0x0000ff); + ctx->csa.spu_chnlcnt_RW[28] = 1; + gen_spu_event(ctx, MFC_PU_MAILBOX_AVAILABLE_EVENT); + ret = 4; + } + spin_unlock(&ctx->csa.register_lock); + return ret; +} + +static u32 spu_backing_mbox_stat_read(struct spu_context *ctx) +{ + return ctx->csa.prob.mb_stat_R; +} + +static unsigned int spu_backing_mbox_stat_poll(struct spu_context *ctx, + unsigned int events) +{ + int ret; + u32 stat; + + ret = 0; + spin_lock_irq(&ctx->csa.register_lock); + stat = ctx->csa.prob.mb_stat_R; + + /* if the requested event is there, return the poll + mask, otherwise enable the interrupt to get notified, + but first mark any pending interrupts as done so + we don't get woken up unnecessarily */ + + if (events & (POLLIN | POLLRDNORM)) { + if (stat & 0xff0000) + ret |= POLLIN | POLLRDNORM; + else { + ctx->csa.priv1.int_stat_class2_RW &= + ~CLASS2_MAILBOX_INTR; + ctx->csa.priv1.int_mask_class2_RW |= + CLASS2_ENABLE_MAILBOX_INTR; + } + } + if (events & (POLLOUT | POLLWRNORM)) { + if (stat & 0x00ff00) + ret = POLLOUT | POLLWRNORM; + else { + ctx->csa.priv1.int_stat_class2_RW &= + ~CLASS2_MAILBOX_THRESHOLD_INTR; + ctx->csa.priv1.int_mask_class2_RW |= + CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR; + } + } + spin_unlock_irq(&ctx->csa.register_lock); + return ret; +} + +static int spu_backing_ibox_read(struct spu_context *ctx, u32 * data) +{ + int ret; + + spin_lock(&ctx->csa.register_lock); + if (ctx->csa.prob.mb_stat_R & 0xff0000) { + /* Read the first available word. + * Implementation note: the depth + * of puint_mb_R is currently 1. + */ + *data = ctx->csa.priv2.puint_mb_R; + ctx->csa.prob.mb_stat_R &= ~(0xff0000); + ctx->csa.spu_chnlcnt_RW[30] = 1; + gen_spu_event(ctx, MFC_PU_INT_MAILBOX_AVAILABLE_EVENT); + ret = 4; + } else { + /* make sure we get woken up by the interrupt */ + ctx->csa.priv1.int_mask_class2_RW |= CLASS2_ENABLE_MAILBOX_INTR; + ret = 0; + } + spin_unlock(&ctx->csa.register_lock); + return ret; +} + +static int spu_backing_wbox_write(struct spu_context *ctx, u32 data) +{ + int ret; + + spin_lock(&ctx->csa.register_lock); + if ((ctx->csa.prob.mb_stat_R) & 0x00ff00) { + int slot = ctx->csa.spu_chnlcnt_RW[29]; + int avail = (ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8; + + /* We have space to write wbox_data. + * Implementation note: the depth + * of spu_mb_W is currently 4. + */ + BUG_ON(avail != (4 - slot)); + ctx->csa.spu_mailbox_data[slot] = data; + ctx->csa.spu_chnlcnt_RW[29] = ++slot; + ctx->csa.prob.mb_stat_R &= ~(0x00ff00); + ctx->csa.prob.mb_stat_R |= (((4 - slot) & 0xff) << 8); + gen_spu_event(ctx, MFC_SPU_MAILBOX_WRITTEN_EVENT); + ret = 4; + } else { + /* make sure we get woken up by the interrupt when space + becomes available */ + ctx->csa.priv1.int_mask_class2_RW |= + CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR; + ret = 0; + } + spin_unlock(&ctx->csa.register_lock); + return ret; +} + +static u32 spu_backing_signal1_read(struct spu_context *ctx) +{ + return ctx->csa.spu_chnldata_RW[3]; +} + +static void spu_backing_signal1_write(struct spu_context *ctx, u32 data) +{ + spin_lock(&ctx->csa.register_lock); + if (ctx->csa.priv2.spu_cfg_RW & 0x1) + ctx->csa.spu_chnldata_RW[3] |= data; + else + ctx->csa.spu_chnldata_RW[3] = data; + ctx->csa.spu_chnlcnt_RW[3] = 1; + gen_spu_event(ctx, MFC_SIGNAL_1_EVENT); + spin_unlock(&ctx->csa.register_lock); +} + +static u32 spu_backing_signal2_read(struct spu_context *ctx) +{ + return ctx->csa.spu_chnldata_RW[4]; +} + +static void spu_backing_signal2_write(struct spu_context *ctx, u32 data) +{ + spin_lock(&ctx->csa.register_lock); + if (ctx->csa.priv2.spu_cfg_RW & 0x2) + ctx->csa.spu_chnldata_RW[4] |= data; + else + ctx->csa.spu_chnldata_RW[4] = data; + ctx->csa.spu_chnlcnt_RW[4] = 1; + gen_spu_event(ctx, MFC_SIGNAL_2_EVENT); + spin_unlock(&ctx->csa.register_lock); +} + +static void spu_backing_signal1_type_set(struct spu_context *ctx, u64 val) +{ + u64 tmp; + + spin_lock(&ctx->csa.register_lock); + tmp = ctx->csa.priv2.spu_cfg_RW; + if (val) + tmp |= 1; + else + tmp &= ~1; + ctx->csa.priv2.spu_cfg_RW = tmp; + spin_unlock(&ctx->csa.register_lock); +} + +static u64 spu_backing_signal1_type_get(struct spu_context *ctx) +{ + return ((ctx->csa.priv2.spu_cfg_RW & 1) != 0); +} + +static void spu_backing_signal2_type_set(struct spu_context *ctx, u64 val) +{ + u64 tmp; + + spin_lock(&ctx->csa.register_lock); + tmp = ctx->csa.priv2.spu_cfg_RW; + if (val) + tmp |= 2; + else + tmp &= ~2; + ctx->csa.priv2.spu_cfg_RW = tmp; + spin_unlock(&ctx->csa.register_lock); +} + +static u64 spu_backing_signal2_type_get(struct spu_context *ctx) +{ + return ((ctx->csa.priv2.spu_cfg_RW & 2) != 0); +} + +static u32 spu_backing_npc_read(struct spu_context *ctx) +{ + return ctx->csa.prob.spu_npc_RW; +} + +static void spu_backing_npc_write(struct spu_context *ctx, u32 val) +{ + ctx->csa.prob.spu_npc_RW = val; +} + +static u32 spu_backing_status_read(struct spu_context *ctx) +{ + return ctx->csa.prob.spu_status_R; +} + +static char *spu_backing_get_ls(struct spu_context *ctx) +{ + return ctx->csa.lscsa->ls; +} + +static void spu_backing_privcntl_write(struct spu_context *ctx, u64 val) +{ + ctx->csa.priv2.spu_privcntl_RW = val; +} + +static u32 spu_backing_runcntl_read(struct spu_context *ctx) +{ + return ctx->csa.prob.spu_runcntl_RW; +} + +static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val) +{ + spin_lock(&ctx->csa.register_lock); + ctx->csa.prob.spu_runcntl_RW = val; + if (val & SPU_RUNCNTL_RUNNABLE) { + ctx->csa.prob.spu_status_R &= + ~SPU_STATUS_STOPPED_BY_STOP & + ~SPU_STATUS_STOPPED_BY_HALT & + ~SPU_STATUS_SINGLE_STEP & + ~SPU_STATUS_INVALID_INSTR & + ~SPU_STATUS_INVALID_CH; + ctx->csa.prob.spu_status_R |= SPU_STATUS_RUNNING; + } else { + ctx->csa.prob.spu_status_R &= ~SPU_STATUS_RUNNING; + } + spin_unlock(&ctx->csa.register_lock); +} + +static void spu_backing_runcntl_stop(struct spu_context *ctx) +{ + spu_backing_runcntl_write(ctx, SPU_RUNCNTL_STOP); +} + +static void spu_backing_master_start(struct spu_context *ctx) +{ + struct spu_state *csa = &ctx->csa; + u64 sr1; + + spin_lock(&csa->register_lock); + sr1 = csa->priv1.mfc_sr1_RW | MFC_STATE1_MASTER_RUN_CONTROL_MASK; + csa->priv1.mfc_sr1_RW = sr1; + spin_unlock(&csa->register_lock); +} + +static void spu_backing_master_stop(struct spu_context *ctx) +{ + struct spu_state *csa = &ctx->csa; + u64 sr1; + + spin_lock(&csa->register_lock); + sr1 = csa->priv1.mfc_sr1_RW & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK; + csa->priv1.mfc_sr1_RW = sr1; + spin_unlock(&csa->register_lock); +} + +static int spu_backing_set_mfc_query(struct spu_context * ctx, u32 mask, + u32 mode) +{ + struct spu_problem_collapsed *prob = &ctx->csa.prob; + int ret; + + spin_lock(&ctx->csa.register_lock); + ret = -EAGAIN; + if (prob->dma_querytype_RW) + goto out; + ret = 0; + /* FIXME: what are the side-effects of this? */ + prob->dma_querymask_RW = mask; + prob->dma_querytype_RW = mode; + /* In the current implementation, the SPU context is always + * acquired in runnable state when new bits are added to the + * mask (tagwait), so it's sufficient just to mask + * dma_tagstatus_R with the 'mask' parameter here. + */ + ctx->csa.prob.dma_tagstatus_R &= mask; +out: + spin_unlock(&ctx->csa.register_lock); + + return ret; +} + +static u32 spu_backing_read_mfc_tagstatus(struct spu_context * ctx) +{ + return ctx->csa.prob.dma_tagstatus_R; +} + +static u32 spu_backing_get_mfc_free_elements(struct spu_context *ctx) +{ + return ctx->csa.prob.dma_qstatus_R; +} + +static int spu_backing_send_mfc_command(struct spu_context *ctx, + struct mfc_dma_command *cmd) +{ + int ret; + + spin_lock(&ctx->csa.register_lock); + ret = -EAGAIN; + /* FIXME: set up priv2->puq */ + spin_unlock(&ctx->csa.register_lock); + + return ret; +} + +static void spu_backing_restart_dma(struct spu_context *ctx) +{ + ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND; +} + +struct spu_context_ops spu_backing_ops = { + .mbox_read = spu_backing_mbox_read, + .mbox_stat_read = spu_backing_mbox_stat_read, + .mbox_stat_poll = spu_backing_mbox_stat_poll, + .ibox_read = spu_backing_ibox_read, + .wbox_write = spu_backing_wbox_write, + .signal1_read = spu_backing_signal1_read, + .signal1_write = spu_backing_signal1_write, + .signal2_read = spu_backing_signal2_read, + .signal2_write = spu_backing_signal2_write, + .signal1_type_set = spu_backing_signal1_type_set, + .signal1_type_get = spu_backing_signal1_type_get, + .signal2_type_set = spu_backing_signal2_type_set, + .signal2_type_get = spu_backing_signal2_type_get, + .npc_read = spu_backing_npc_read, + .npc_write = spu_backing_npc_write, + .status_read = spu_backing_status_read, + .get_ls = spu_backing_get_ls, + .privcntl_write = spu_backing_privcntl_write, + .runcntl_read = spu_backing_runcntl_read, + .runcntl_write = spu_backing_runcntl_write, + .runcntl_stop = spu_backing_runcntl_stop, + .master_start = spu_backing_master_start, + .master_stop = spu_backing_master_stop, + .set_mfc_query = spu_backing_set_mfc_query, + .read_mfc_tagstatus = spu_backing_read_mfc_tagstatus, + .get_mfc_free_elements = spu_backing_get_mfc_free_elements, + .send_mfc_command = spu_backing_send_mfc_command, + .restart_dma = spu_backing_restart_dma, +}; diff --git a/kernel/arch/powerpc/platforms/cell/spufs/context.c b/kernel/arch/powerpc/platforms/cell/spufs/context.c new file mode 100644 index 000000000..3b4152fae --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/context.c @@ -0,0 +1,186 @@ +/* + * SPU file system -- SPU context management + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/atomic.h> +#include <linux/sched.h> +#include <asm/spu.h> +#include <asm/spu_csa.h> +#include "spufs.h" +#include "sputrace.h" + + +atomic_t nr_spu_contexts = ATOMIC_INIT(0); + +struct spu_context *alloc_spu_context(struct spu_gang *gang) +{ + struct spu_context *ctx; + + ctx = kzalloc(sizeof *ctx, GFP_KERNEL); + if (!ctx) + goto out; + /* Binding to physical processor deferred + * until spu_activate(). + */ + if (spu_init_csa(&ctx->csa)) + goto out_free; + spin_lock_init(&ctx->mmio_lock); + mutex_init(&ctx->mapping_lock); + kref_init(&ctx->kref); + mutex_init(&ctx->state_mutex); + mutex_init(&ctx->run_mutex); + init_waitqueue_head(&ctx->ibox_wq); + init_waitqueue_head(&ctx->wbox_wq); + init_waitqueue_head(&ctx->stop_wq); + init_waitqueue_head(&ctx->mfc_wq); + init_waitqueue_head(&ctx->run_wq); + ctx->state = SPU_STATE_SAVED; + ctx->ops = &spu_backing_ops; + ctx->owner = get_task_mm(current); + INIT_LIST_HEAD(&ctx->rq); + INIT_LIST_HEAD(&ctx->aff_list); + if (gang) + spu_gang_add_ctx(gang, ctx); + + __spu_update_sched_info(ctx); + spu_set_timeslice(ctx); + ctx->stats.util_state = SPU_UTIL_IDLE_LOADED; + ctx->stats.tstamp = ktime_get_ns(); + + atomic_inc(&nr_spu_contexts); + goto out; +out_free: + kfree(ctx); + ctx = NULL; +out: + return ctx; +} + +void destroy_spu_context(struct kref *kref) +{ + struct spu_context *ctx; + ctx = container_of(kref, struct spu_context, kref); + spu_context_nospu_trace(destroy_spu_context__enter, ctx); + mutex_lock(&ctx->state_mutex); + spu_deactivate(ctx); + mutex_unlock(&ctx->state_mutex); + spu_fini_csa(&ctx->csa); + if (ctx->gang) + spu_gang_remove_ctx(ctx->gang, ctx); + if (ctx->prof_priv_kref) + kref_put(ctx->prof_priv_kref, ctx->prof_priv_release); + BUG_ON(!list_empty(&ctx->rq)); + atomic_dec(&nr_spu_contexts); + kfree(ctx->switch_log); + kfree(ctx); +} + +struct spu_context * get_spu_context(struct spu_context *ctx) +{ + kref_get(&ctx->kref); + return ctx; +} + +int put_spu_context(struct spu_context *ctx) +{ + return kref_put(&ctx->kref, &destroy_spu_context); +} + +/* give up the mm reference when the context is about to be destroyed */ +void spu_forget(struct spu_context *ctx) +{ + struct mm_struct *mm; + + /* + * This is basically an open-coded spu_acquire_saved, except that + * we don't acquire the state mutex interruptible, and we don't + * want this context to be rescheduled on release. + */ + mutex_lock(&ctx->state_mutex); + if (ctx->state != SPU_STATE_SAVED) + spu_deactivate(ctx); + + mm = ctx->owner; + ctx->owner = NULL; + mmput(mm); + spu_release(ctx); +} + +void spu_unmap_mappings(struct spu_context *ctx) +{ + mutex_lock(&ctx->mapping_lock); + if (ctx->local_store) + unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1); + if (ctx->mfc) + unmap_mapping_range(ctx->mfc, 0, SPUFS_MFC_MAP_SIZE, 1); + if (ctx->cntl) + unmap_mapping_range(ctx->cntl, 0, SPUFS_CNTL_MAP_SIZE, 1); + if (ctx->signal1) + unmap_mapping_range(ctx->signal1, 0, SPUFS_SIGNAL_MAP_SIZE, 1); + if (ctx->signal2) + unmap_mapping_range(ctx->signal2, 0, SPUFS_SIGNAL_MAP_SIZE, 1); + if (ctx->mss) + unmap_mapping_range(ctx->mss, 0, SPUFS_MSS_MAP_SIZE, 1); + if (ctx->psmap) + unmap_mapping_range(ctx->psmap, 0, SPUFS_PS_MAP_SIZE, 1); + mutex_unlock(&ctx->mapping_lock); +} + +/** + * spu_acquire_saved - lock spu contex and make sure it is in saved state + * @ctx: spu contex to lock + */ +int spu_acquire_saved(struct spu_context *ctx) +{ + int ret; + + spu_context_nospu_trace(spu_acquire_saved__enter, ctx); + + ret = spu_acquire(ctx); + if (ret) + return ret; + + if (ctx->state != SPU_STATE_SAVED) { + set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags); + spu_deactivate(ctx); + } + + return 0; +} + +/** + * spu_release_saved - unlock spu context and return it to the runqueue + * @ctx: context to unlock + */ +void spu_release_saved(struct spu_context *ctx) +{ + BUG_ON(ctx->state != SPU_STATE_SAVED); + + if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags) && + test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags)) + spu_activate(ctx, 0); + + spu_release(ctx); +} + diff --git a/kernel/arch/powerpc/platforms/cell/spufs/coredump.c b/kernel/arch/powerpc/platforms/cell/spufs/coredump.c new file mode 100644 index 000000000..be6212ddb --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/coredump.c @@ -0,0 +1,211 @@ +/* + * SPU core dump code + * + * (C) Copyright 2006 IBM Corp. + * + * Author: Dwayne Grant McConnell <decimal@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/elf.h> +#include <linux/file.h> +#include <linux/fdtable.h> +#include <linux/fs.h> +#include <linux/gfp.h> +#include <linux/list.h> +#include <linux/syscalls.h> +#include <linux/coredump.h> +#include <linux/binfmts.h> + +#include <asm/uaccess.h> + +#include "spufs.h" + +static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer, + size_t size, loff_t *off) +{ + u64 data; + int ret; + + if (spufs_coredump_read[num].read) + return spufs_coredump_read[num].read(ctx, buffer, size, off); + + data = spufs_coredump_read[num].get(ctx); + ret = snprintf(buffer, size, "0x%.16llx", data); + if (ret >= size) + return size; + return ++ret; /* count trailing NULL */ +} + +static int spufs_ctx_note_size(struct spu_context *ctx, int dfd) +{ + int i, sz, total = 0; + char *name; + char fullname[80]; + + for (i = 0; spufs_coredump_read[i].name != NULL; i++) { + name = spufs_coredump_read[i].name; + sz = spufs_coredump_read[i].size; + + sprintf(fullname, "SPU/%d/%s", dfd, name); + + total += sizeof(struct elf_note); + total += roundup(strlen(fullname) + 1, 4); + total += roundup(sz, 4); + } + + return total; +} + +static int match_context(const void *v, struct file *file, unsigned fd) +{ + struct spu_context *ctx; + if (file->f_op != &spufs_context_fops) + return 0; + ctx = SPUFS_I(file_inode(file))->i_ctx; + if (ctx->flags & SPU_CREATE_NOSCHED) + return 0; + return fd + 1; +} + +/* + * The additional architecture-specific notes for Cell are various + * context files in the spu context. + * + * This function iterates over all open file descriptors and sees + * if they are a directory in spufs. In that case we use spufs + * internal functionality to dump them without needing to actually + * open the files. + */ +/* + * descriptor table is not shared, so files can't change or go away. + */ +static struct spu_context *coredump_next_context(int *fd) +{ + struct file *file; + int n = iterate_fd(current->files, *fd, match_context, NULL); + if (!n) + return NULL; + *fd = n - 1; + file = fcheck(*fd); + return SPUFS_I(file_inode(file))->i_ctx; +} + +int spufs_coredump_extra_notes_size(void) +{ + struct spu_context *ctx; + int size = 0, rc, fd; + + fd = 0; + while ((ctx = coredump_next_context(&fd)) != NULL) { + rc = spu_acquire_saved(ctx); + if (rc) + break; + rc = spufs_ctx_note_size(ctx, fd); + spu_release_saved(ctx); + if (rc < 0) + break; + + size += rc; + + /* start searching the next fd next time */ + fd++; + } + + return size; +} + +static int spufs_arch_write_note(struct spu_context *ctx, int i, + struct coredump_params *cprm, int dfd) +{ + loff_t pos = 0; + int sz, rc, total = 0; + const int bufsz = PAGE_SIZE; + char *name; + char fullname[80], *buf; + struct elf_note en; + + buf = (void *)get_zeroed_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + + name = spufs_coredump_read[i].name; + sz = spufs_coredump_read[i].size; + + sprintf(fullname, "SPU/%d/%s", dfd, name); + en.n_namesz = strlen(fullname) + 1; + en.n_descsz = sz; + en.n_type = NT_SPU; + + if (!dump_emit(cprm, &en, sizeof(en))) + goto Eio; + + if (!dump_emit(cprm, fullname, en.n_namesz)) + goto Eio; + + if (!dump_align(cprm, 4)) + goto Eio; + + do { + rc = do_coredump_read(i, ctx, buf, bufsz, &pos); + if (rc > 0) { + if (!dump_emit(cprm, buf, rc)) + goto Eio; + total += rc; + } + } while (rc == bufsz && total < sz); + + if (rc < 0) + goto out; + + if (!dump_skip(cprm, + roundup(cprm->written - total + sz, 4) - cprm->written)) + goto Eio; +out: + free_page((unsigned long)buf); + return rc; +Eio: + free_page((unsigned long)buf); + return -EIO; +} + +int spufs_coredump_extra_notes_write(struct coredump_params *cprm) +{ + struct spu_context *ctx; + int fd, j, rc; + + fd = 0; + while ((ctx = coredump_next_context(&fd)) != NULL) { + rc = spu_acquire_saved(ctx); + if (rc) + return rc; + + for (j = 0; spufs_coredump_read[j].name != NULL; j++) { + rc = spufs_arch_write_note(ctx, j, cprm, fd); + if (rc) { + spu_release_saved(ctx); + return rc; + } + } + + spu_release_saved(ctx); + + /* start searching the next fd next time */ + fd++; + } + + return 0; +} diff --git a/kernel/arch/powerpc/platforms/cell/spufs/fault.c b/kernel/arch/powerpc/platforms/cell/spufs/fault.c new file mode 100644 index 000000000..d98f845ac --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/fault.c @@ -0,0 +1,191 @@ +/* + * Low-level SPU handling + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <linux/sched.h> +#include <linux/mm.h> + +#include <asm/spu.h> +#include <asm/spu_csa.h> + +#include "spufs.h" + +/** + * Handle an SPE event, depending on context SPU_CREATE_EVENTS_ENABLED flag. + * + * If the context was created with events, we just set the return event. + * Otherwise, send an appropriate signal to the process. + */ +static void spufs_handle_event(struct spu_context *ctx, + unsigned long ea, int type) +{ + siginfo_t info; + + if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) { + ctx->event_return |= type; + wake_up_all(&ctx->stop_wq); + return; + } + + memset(&info, 0, sizeof(info)); + + switch (type) { + case SPE_EVENT_INVALID_DMA: + info.si_signo = SIGBUS; + info.si_code = BUS_OBJERR; + break; + case SPE_EVENT_SPE_DATA_STORAGE: + info.si_signo = SIGSEGV; + info.si_addr = (void __user *)ea; + info.si_code = SEGV_ACCERR; + ctx->ops->restart_dma(ctx); + break; + case SPE_EVENT_DMA_ALIGNMENT: + info.si_signo = SIGBUS; + /* DAR isn't set for an alignment fault :( */ + info.si_code = BUS_ADRALN; + break; + case SPE_EVENT_SPE_ERROR: + info.si_signo = SIGILL; + info.si_addr = (void __user *)(unsigned long) + ctx->ops->npc_read(ctx) - 4; + info.si_code = ILL_ILLOPC; + break; + } + + if (info.si_signo) + force_sig_info(info.si_signo, &info, current); +} + +int spufs_handle_class0(struct spu_context *ctx) +{ + unsigned long stat = ctx->csa.class_0_pending & CLASS0_INTR_MASK; + + if (likely(!stat)) + return 0; + + if (stat & CLASS0_DMA_ALIGNMENT_INTR) + spufs_handle_event(ctx, ctx->csa.class_0_dar, + SPE_EVENT_DMA_ALIGNMENT); + + if (stat & CLASS0_INVALID_DMA_COMMAND_INTR) + spufs_handle_event(ctx, ctx->csa.class_0_dar, + SPE_EVENT_INVALID_DMA); + + if (stat & CLASS0_SPU_ERROR_INTR) + spufs_handle_event(ctx, ctx->csa.class_0_dar, + SPE_EVENT_SPE_ERROR); + + ctx->csa.class_0_pending = 0; + + return -EIO; +} + +/* + * bottom half handler for page faults, we can't do this from + * interrupt context, since we might need to sleep. + * we also need to give up the mutex so we can get scheduled + * out while waiting for the backing store. + * + * TODO: try calling hash_page from the interrupt handler first + * in order to speed up the easy case. + */ +int spufs_handle_class1(struct spu_context *ctx) +{ + u64 ea, dsisr, access; + unsigned long flags; + unsigned flt = 0; + int ret; + + /* + * dar and dsisr get passed from the registers + * to the spu_context, to this function, but not + * back to the spu if it gets scheduled again. + * + * if we don't handle the fault for a saved context + * in time, we can still expect to get the same fault + * the immediately after the context restore. + */ + ea = ctx->csa.class_1_dar; + dsisr = ctx->csa.class_1_dsisr; + + if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))) + return 0; + + spuctx_switch_state(ctx, SPU_UTIL_IOWAIT); + + pr_debug("ctx %p: ea %016llx, dsisr %016llx state %d\n", ctx, ea, + dsisr, ctx->state); + + ctx->stats.hash_flt++; + if (ctx->state == SPU_STATE_RUNNABLE) + ctx->spu->stats.hash_flt++; + + /* we must not hold the lock when entering copro_handle_mm_fault */ + spu_release(ctx); + + access = (_PAGE_PRESENT | _PAGE_USER); + access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL; + local_irq_save(flags); + ret = hash_page(ea, access, 0x300, dsisr); + local_irq_restore(flags); + + /* hashing failed, so try the actual fault handler */ + if (ret) + ret = copro_handle_mm_fault(current->mm, ea, dsisr, &flt); + + /* + * This is nasty: we need the state_mutex for all the bookkeeping even + * if the syscall was interrupted by a signal. ewww. + */ + mutex_lock(&ctx->state_mutex); + + /* + * Clear dsisr under ctxt lock after handling the fault, so that + * time slicing will not preempt the context while the page fault + * handler is running. Context switch code removes mappings. + */ + ctx->csa.class_1_dar = ctx->csa.class_1_dsisr = 0; + + /* + * If we handled the fault successfully and are in runnable + * state, restart the DMA. + * In case of unhandled error report the problem to user space. + */ + if (!ret) { + if (flt & VM_FAULT_MAJOR) + ctx->stats.maj_flt++; + else + ctx->stats.min_flt++; + if (ctx->state == SPU_STATE_RUNNABLE) { + if (flt & VM_FAULT_MAJOR) + ctx->spu->stats.maj_flt++; + else + ctx->spu->stats.min_flt++; + } + + if (ctx->spu) + ctx->ops->restart_dma(ctx); + } else + spufs_handle_event(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE); + + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + return ret; +} diff --git a/kernel/arch/powerpc/platforms/cell/spufs/file.c b/kernel/arch/powerpc/platforms/cell/spufs/file.c new file mode 100644 index 000000000..d966bbe58 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/file.c @@ -0,0 +1,2771 @@ +/* + * SPU file system -- file contents + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#undef DEBUG + +#include <linux/fs.h> +#include <linux/ioctl.h> +#include <linux/export.h> +#include <linux/pagemap.h> +#include <linux/poll.h> +#include <linux/ptrace.h> +#include <linux/seq_file.h> +#include <linux/slab.h> + +#include <asm/io.h> +#include <asm/time.h> +#include <asm/spu.h> +#include <asm/spu_info.h> +#include <asm/uaccess.h> + +#include "spufs.h" +#include "sputrace.h" + +#define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000) + +/* Simple attribute files */ +struct spufs_attr { + int (*get)(void *, u64 *); + int (*set)(void *, u64); + char get_buf[24]; /* enough to store a u64 and "\n\0" */ + char set_buf[24]; + void *data; + const char *fmt; /* format for read operation */ + struct mutex mutex; /* protects access to these buffers */ +}; + +static int spufs_attr_open(struct inode *inode, struct file *file, + int (*get)(void *, u64 *), int (*set)(void *, u64), + const char *fmt) +{ + struct spufs_attr *attr; + + attr = kmalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return -ENOMEM; + + attr->get = get; + attr->set = set; + attr->data = inode->i_private; + attr->fmt = fmt; + mutex_init(&attr->mutex); + file->private_data = attr; + + return nonseekable_open(inode, file); +} + +static int spufs_attr_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static ssize_t spufs_attr_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + struct spufs_attr *attr; + size_t size; + ssize_t ret; + + attr = file->private_data; + if (!attr->get) + return -EACCES; + + ret = mutex_lock_interruptible(&attr->mutex); + if (ret) + return ret; + + if (*ppos) { /* continued read */ + size = strlen(attr->get_buf); + } else { /* first read */ + u64 val; + ret = attr->get(attr->data, &val); + if (ret) + goto out; + + size = scnprintf(attr->get_buf, sizeof(attr->get_buf), + attr->fmt, (unsigned long long)val); + } + + ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size); +out: + mutex_unlock(&attr->mutex); + return ret; +} + +static ssize_t spufs_attr_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + struct spufs_attr *attr; + u64 val; + size_t size; + ssize_t ret; + + attr = file->private_data; + if (!attr->set) + return -EACCES; + + ret = mutex_lock_interruptible(&attr->mutex); + if (ret) + return ret; + + ret = -EFAULT; + size = min(sizeof(attr->set_buf) - 1, len); + if (copy_from_user(attr->set_buf, buf, size)) + goto out; + + ret = len; /* claim we got the whole input */ + attr->set_buf[size] = '\0'; + val = simple_strtol(attr->set_buf, NULL, 0); + attr->set(attr->data, val); +out: + mutex_unlock(&attr->mutex); + return ret; +} + +#define DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ +static int __fops ## _open(struct inode *inode, struct file *file) \ +{ \ + __simple_attr_check_format(__fmt, 0ull); \ + return spufs_attr_open(inode, file, __get, __set, __fmt); \ +} \ +static const struct file_operations __fops = { \ + .open = __fops ## _open, \ + .release = spufs_attr_release, \ + .read = spufs_attr_read, \ + .write = spufs_attr_write, \ + .llseek = generic_file_llseek, \ +}; + + +static int +spufs_mem_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + file->private_data = ctx; + if (!i->i_openers++) + ctx->local_store = inode->i_mapping; + mutex_unlock(&ctx->mapping_lock); + return 0; +} + +static int +spufs_mem_release(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + if (!--i->i_openers) + ctx->local_store = NULL; + mutex_unlock(&ctx->mapping_lock); + return 0; +} + +static ssize_t +__spufs_mem_read(struct spu_context *ctx, char __user *buffer, + size_t size, loff_t *pos) +{ + char *local_store = ctx->ops->get_ls(ctx); + return simple_read_from_buffer(buffer, size, pos, local_store, + LS_SIZE); +} + +static ssize_t +spufs_mem_read(struct file *file, char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + ssize_t ret; + + ret = spu_acquire(ctx); + if (ret) + return ret; + ret = __spufs_mem_read(ctx, buffer, size, pos); + spu_release(ctx); + + return ret; +} + +static ssize_t +spufs_mem_write(struct file *file, const char __user *buffer, + size_t size, loff_t *ppos) +{ + struct spu_context *ctx = file->private_data; + char *local_store; + loff_t pos = *ppos; + int ret; + + if (pos > LS_SIZE) + return -EFBIG; + + ret = spu_acquire(ctx); + if (ret) + return ret; + + local_store = ctx->ops->get_ls(ctx); + size = simple_write_to_buffer(local_store, LS_SIZE, ppos, buffer, size); + spu_release(ctx); + + return size; +} + +static int +spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct spu_context *ctx = vma->vm_file->private_data; + unsigned long address = (unsigned long)vmf->virtual_address; + unsigned long pfn, offset; + +#ifdef CONFIG_SPU_FS_64K_LS + struct spu_state *csa = &ctx->csa; + int psize; + + /* Check what page size we are using */ + psize = get_slice_psize(vma->vm_mm, address); + + /* Some sanity checking */ + BUG_ON(csa->use_big_pages != (psize == MMU_PAGE_64K)); + + /* Wow, 64K, cool, we need to align the address though */ + if (csa->use_big_pages) { + BUG_ON(vma->vm_start & 0xffff); + address &= ~0xfffful; + } +#endif /* CONFIG_SPU_FS_64K_LS */ + + offset = vmf->pgoff << PAGE_SHIFT; + if (offset >= LS_SIZE) + return VM_FAULT_SIGBUS; + + pr_debug("spufs_mem_mmap_fault address=0x%lx, offset=0x%lx\n", + address, offset); + + if (spu_acquire(ctx)) + return VM_FAULT_NOPAGE; + + if (ctx->state == SPU_STATE_SAVED) { + vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); + pfn = vmalloc_to_pfn(ctx->csa.lscsa->ls + offset); + } else { + vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); + pfn = (ctx->spu->local_store_phys + offset) >> PAGE_SHIFT; + } + vm_insert_pfn(vma, address, pfn); + + spu_release(ctx); + + return VM_FAULT_NOPAGE; +} + +static int spufs_mem_mmap_access(struct vm_area_struct *vma, + unsigned long address, + void *buf, int len, int write) +{ + struct spu_context *ctx = vma->vm_file->private_data; + unsigned long offset = address - vma->vm_start; + char *local_store; + + if (write && !(vma->vm_flags & VM_WRITE)) + return -EACCES; + if (spu_acquire(ctx)) + return -EINTR; + if ((offset + len) > vma->vm_end) + len = vma->vm_end - offset; + local_store = ctx->ops->get_ls(ctx); + if (write) + memcpy_toio(local_store + offset, buf, len); + else + memcpy_fromio(buf, local_store + offset, len); + spu_release(ctx); + return len; +} + +static const struct vm_operations_struct spufs_mem_mmap_vmops = { + .fault = spufs_mem_mmap_fault, + .access = spufs_mem_mmap_access, +}; + +static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) +{ +#ifdef CONFIG_SPU_FS_64K_LS + struct spu_context *ctx = file->private_data; + struct spu_state *csa = &ctx->csa; + + /* Sanity check VMA alignment */ + if (csa->use_big_pages) { + pr_debug("spufs_mem_mmap 64K, start=0x%lx, end=0x%lx," + " pgoff=0x%lx\n", vma->vm_start, vma->vm_end, + vma->vm_pgoff); + if (vma->vm_start & 0xffff) + return -EINVAL; + if (vma->vm_pgoff & 0xf) + return -EINVAL; + } +#endif /* CONFIG_SPU_FS_64K_LS */ + + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); + + vma->vm_ops = &spufs_mem_mmap_vmops; + return 0; +} + +#ifdef CONFIG_SPU_FS_64K_LS +static unsigned long spufs_get_unmapped_area(struct file *file, + unsigned long addr, unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + struct spu_context *ctx = file->private_data; + struct spu_state *csa = &ctx->csa; + + /* If not using big pages, fallback to normal MM g_u_a */ + if (!csa->use_big_pages) + return current->mm->get_unmapped_area(file, addr, len, + pgoff, flags); + + /* Else, try to obtain a 64K pages slice */ + return slice_get_unmapped_area(addr, len, flags, + MMU_PAGE_64K, 1); +} +#endif /* CONFIG_SPU_FS_64K_LS */ + +static const struct file_operations spufs_mem_fops = { + .open = spufs_mem_open, + .release = spufs_mem_release, + .read = spufs_mem_read, + .write = spufs_mem_write, + .llseek = generic_file_llseek, + .mmap = spufs_mem_mmap, +#ifdef CONFIG_SPU_FS_64K_LS + .get_unmapped_area = spufs_get_unmapped_area, +#endif +}; + +static int spufs_ps_fault(struct vm_area_struct *vma, + struct vm_fault *vmf, + unsigned long ps_offs, + unsigned long ps_size) +{ + struct spu_context *ctx = vma->vm_file->private_data; + unsigned long area, offset = vmf->pgoff << PAGE_SHIFT; + int ret = 0; + + spu_context_nospu_trace(spufs_ps_fault__enter, ctx); + + if (offset >= ps_size) + return VM_FAULT_SIGBUS; + + if (fatal_signal_pending(current)) + return VM_FAULT_SIGBUS; + + /* + * Because we release the mmap_sem, the context may be destroyed while + * we're in spu_wait. Grab an extra reference so it isn't destroyed + * in the meantime. + */ + get_spu_context(ctx); + + /* + * We have to wait for context to be loaded before we have + * pages to hand out to the user, but we don't want to wait + * with the mmap_sem held. + * It is possible to drop the mmap_sem here, but then we need + * to return VM_FAULT_NOPAGE because the mappings may have + * hanged. + */ + if (spu_acquire(ctx)) + goto refault; + + if (ctx->state == SPU_STATE_SAVED) { + up_read(¤t->mm->mmap_sem); + spu_context_nospu_trace(spufs_ps_fault__sleep, ctx); + ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE); + spu_context_trace(spufs_ps_fault__wake, ctx, ctx->spu); + down_read(¤t->mm->mmap_sem); + } else { + area = ctx->spu->problem_phys + ps_offs; + vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, + (area + offset) >> PAGE_SHIFT); + spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu); + } + + if (!ret) + spu_release(ctx); + +refault: + put_spu_context(ctx); + return VM_FAULT_NOPAGE; +} + +#if SPUFS_MMAP_4K +static int spufs_cntl_mmap_fault(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + return spufs_ps_fault(vma, vmf, 0x4000, SPUFS_CNTL_MAP_SIZE); +} + +static const struct vm_operations_struct spufs_cntl_mmap_vmops = { + .fault = spufs_cntl_mmap_fault, +}; + +/* + * mmap support for problem state control area [0x4000 - 0x4fff]. + */ +static int spufs_cntl_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + vma->vm_ops = &spufs_cntl_mmap_vmops; + return 0; +} +#else /* SPUFS_MMAP_4K */ +#define spufs_cntl_mmap NULL +#endif /* !SPUFS_MMAP_4K */ + +static int spufs_cntl_get(void *data, u64 *val) +{ + struct spu_context *ctx = data; + int ret; + + ret = spu_acquire(ctx); + if (ret) + return ret; + *val = ctx->ops->status_read(ctx); + spu_release(ctx); + + return 0; +} + +static int spufs_cntl_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + int ret; + + ret = spu_acquire(ctx); + if (ret) + return ret; + ctx->ops->runcntl_write(ctx, val); + spu_release(ctx); + + return 0; +} + +static int spufs_cntl_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + file->private_data = ctx; + if (!i->i_openers++) + ctx->cntl = inode->i_mapping; + mutex_unlock(&ctx->mapping_lock); + return simple_attr_open(inode, file, spufs_cntl_get, + spufs_cntl_set, "0x%08lx"); +} + +static int +spufs_cntl_release(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + simple_attr_release(inode, file); + + mutex_lock(&ctx->mapping_lock); + if (!--i->i_openers) + ctx->cntl = NULL; + mutex_unlock(&ctx->mapping_lock); + return 0; +} + +static const struct file_operations spufs_cntl_fops = { + .open = spufs_cntl_open, + .release = spufs_cntl_release, + .read = simple_attr_read, + .write = simple_attr_write, + .llseek = generic_file_llseek, + .mmap = spufs_cntl_mmap, +}; + +static int +spufs_regs_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + file->private_data = i->i_ctx; + return 0; +} + +static ssize_t +__spufs_regs_read(struct spu_context *ctx, char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu_lscsa *lscsa = ctx->csa.lscsa; + return simple_read_from_buffer(buffer, size, pos, + lscsa->gprs, sizeof lscsa->gprs); +} + +static ssize_t +spufs_regs_read(struct file *file, char __user *buffer, + size_t size, loff_t *pos) +{ + int ret; + struct spu_context *ctx = file->private_data; + + /* pre-check for file position: if we'd return EOF, there's no point + * causing a deschedule */ + if (*pos >= sizeof(ctx->csa.lscsa->gprs)) + return 0; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + ret = __spufs_regs_read(ctx, buffer, size, pos); + spu_release_saved(ctx); + return ret; +} + +static ssize_t +spufs_regs_write(struct file *file, const char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + int ret; + + if (*pos >= sizeof(lscsa->gprs)) + return -EFBIG; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + + size = simple_write_to_buffer(lscsa->gprs, sizeof(lscsa->gprs), pos, + buffer, size); + + spu_release_saved(ctx); + return size; +} + +static const struct file_operations spufs_regs_fops = { + .open = spufs_regs_open, + .read = spufs_regs_read, + .write = spufs_regs_write, + .llseek = generic_file_llseek, +}; + +static ssize_t +__spufs_fpcr_read(struct spu_context *ctx, char __user * buffer, + size_t size, loff_t * pos) +{ + struct spu_lscsa *lscsa = ctx->csa.lscsa; + return simple_read_from_buffer(buffer, size, pos, + &lscsa->fpcr, sizeof(lscsa->fpcr)); +} + +static ssize_t +spufs_fpcr_read(struct file *file, char __user * buffer, + size_t size, loff_t * pos) +{ + int ret; + struct spu_context *ctx = file->private_data; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + ret = __spufs_fpcr_read(ctx, buffer, size, pos); + spu_release_saved(ctx); + return ret; +} + +static ssize_t +spufs_fpcr_write(struct file *file, const char __user * buffer, + size_t size, loff_t * pos) +{ + struct spu_context *ctx = file->private_data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + int ret; + + if (*pos >= sizeof(lscsa->fpcr)) + return -EFBIG; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + + size = simple_write_to_buffer(&lscsa->fpcr, sizeof(lscsa->fpcr), pos, + buffer, size); + + spu_release_saved(ctx); + return size; +} + +static const struct file_operations spufs_fpcr_fops = { + .open = spufs_regs_open, + .read = spufs_fpcr_read, + .write = spufs_fpcr_write, + .llseek = generic_file_llseek, +}; + +/* generic open function for all pipe-like files */ +static int spufs_pipe_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + file->private_data = i->i_ctx; + + return nonseekable_open(inode, file); +} + +/* + * Read as many bytes from the mailbox as possible, until + * one of the conditions becomes true: + * + * - no more data available in the mailbox + * - end of the user provided buffer + * - end of the mapped area + */ +static ssize_t spufs_mbox_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + u32 mbox_data, __user *udata; + ssize_t count; + + if (len < 4) + return -EINVAL; + + if (!access_ok(VERIFY_WRITE, buf, len)) + return -EFAULT; + + udata = (void __user *)buf; + + count = spu_acquire(ctx); + if (count) + return count; + + for (count = 0; (count + 4) <= len; count += 4, udata++) { + int ret; + ret = ctx->ops->mbox_read(ctx, &mbox_data); + if (ret == 0) + break; + + /* + * at the end of the mapped area, we can fault + * but still need to return the data we have + * read successfully so far. + */ + ret = __put_user(mbox_data, udata); + if (ret) { + if (!count) + count = -EFAULT; + break; + } + } + spu_release(ctx); + + if (!count) + count = -EAGAIN; + + return count; +} + +static const struct file_operations spufs_mbox_fops = { + .open = spufs_pipe_open, + .read = spufs_mbox_read, + .llseek = no_llseek, +}; + +static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + ssize_t ret; + u32 mbox_stat; + + if (len < 4) + return -EINVAL; + + ret = spu_acquire(ctx); + if (ret) + return ret; + + mbox_stat = ctx->ops->mbox_stat_read(ctx) & 0xff; + + spu_release(ctx); + + if (copy_to_user(buf, &mbox_stat, sizeof mbox_stat)) + return -EFAULT; + + return 4; +} + +static const struct file_operations spufs_mbox_stat_fops = { + .open = spufs_pipe_open, + .read = spufs_mbox_stat_read, + .llseek = no_llseek, +}; + +/* low-level ibox access function */ +size_t spu_ibox_read(struct spu_context *ctx, u32 *data) +{ + return ctx->ops->ibox_read(ctx, data); +} + +static int spufs_ibox_fasync(int fd, struct file *file, int on) +{ + struct spu_context *ctx = file->private_data; + + return fasync_helper(fd, file, on, &ctx->ibox_fasync); +} + +/* interrupt-level ibox callback function. */ +void spufs_ibox_callback(struct spu *spu) +{ + struct spu_context *ctx = spu->ctx; + + if (!ctx) + return; + + wake_up_all(&ctx->ibox_wq); + kill_fasync(&ctx->ibox_fasync, SIGIO, POLLIN); +} + +/* + * Read as many bytes from the interrupt mailbox as possible, until + * one of the conditions becomes true: + * + * - no more data available in the mailbox + * - end of the user provided buffer + * - end of the mapped area + * + * If the file is opened without O_NONBLOCK, we wait here until + * any data is available, but return when we have been able to + * read something. + */ +static ssize_t spufs_ibox_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + u32 ibox_data, __user *udata; + ssize_t count; + + if (len < 4) + return -EINVAL; + + if (!access_ok(VERIFY_WRITE, buf, len)) + return -EFAULT; + + udata = (void __user *)buf; + + count = spu_acquire(ctx); + if (count) + goto out; + + /* wait only for the first element */ + count = 0; + if (file->f_flags & O_NONBLOCK) { + if (!spu_ibox_read(ctx, &ibox_data)) { + count = -EAGAIN; + goto out_unlock; + } + } else { + count = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data)); + if (count) + goto out; + } + + /* if we can't write at all, return -EFAULT */ + count = __put_user(ibox_data, udata); + if (count) + goto out_unlock; + + for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) { + int ret; + ret = ctx->ops->ibox_read(ctx, &ibox_data); + if (ret == 0) + break; + /* + * at the end of the mapped area, we can fault + * but still need to return the data we have + * read successfully so far. + */ + ret = __put_user(ibox_data, udata); + if (ret) + break; + } + +out_unlock: + spu_release(ctx); +out: + return count; +} + +static unsigned int spufs_ibox_poll(struct file *file, poll_table *wait) +{ + struct spu_context *ctx = file->private_data; + unsigned int mask; + + poll_wait(file, &ctx->ibox_wq, wait); + + /* + * For now keep this uninterruptible and also ignore the rule + * that poll should not sleep. Will be fixed later. + */ + mutex_lock(&ctx->state_mutex); + mask = ctx->ops->mbox_stat_poll(ctx, POLLIN | POLLRDNORM); + spu_release(ctx); + + return mask; +} + +static const struct file_operations spufs_ibox_fops = { + .open = spufs_pipe_open, + .read = spufs_ibox_read, + .poll = spufs_ibox_poll, + .fasync = spufs_ibox_fasync, + .llseek = no_llseek, +}; + +static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + ssize_t ret; + u32 ibox_stat; + + if (len < 4) + return -EINVAL; + + ret = spu_acquire(ctx); + if (ret) + return ret; + ibox_stat = (ctx->ops->mbox_stat_read(ctx) >> 16) & 0xff; + spu_release(ctx); + + if (copy_to_user(buf, &ibox_stat, sizeof ibox_stat)) + return -EFAULT; + + return 4; +} + +static const struct file_operations spufs_ibox_stat_fops = { + .open = spufs_pipe_open, + .read = spufs_ibox_stat_read, + .llseek = no_llseek, +}; + +/* low-level mailbox write */ +size_t spu_wbox_write(struct spu_context *ctx, u32 data) +{ + return ctx->ops->wbox_write(ctx, data); +} + +static int spufs_wbox_fasync(int fd, struct file *file, int on) +{ + struct spu_context *ctx = file->private_data; + int ret; + + ret = fasync_helper(fd, file, on, &ctx->wbox_fasync); + + return ret; +} + +/* interrupt-level wbox callback function. */ +void spufs_wbox_callback(struct spu *spu) +{ + struct spu_context *ctx = spu->ctx; + + if (!ctx) + return; + + wake_up_all(&ctx->wbox_wq); + kill_fasync(&ctx->wbox_fasync, SIGIO, POLLOUT); +} + +/* + * Write as many bytes to the interrupt mailbox as possible, until + * one of the conditions becomes true: + * + * - the mailbox is full + * - end of the user provided buffer + * - end of the mapped area + * + * If the file is opened without O_NONBLOCK, we wait here until + * space is availabyl, but return when we have been able to + * write something. + */ +static ssize_t spufs_wbox_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + u32 wbox_data, __user *udata; + ssize_t count; + + if (len < 4) + return -EINVAL; + + udata = (void __user *)buf; + if (!access_ok(VERIFY_READ, buf, len)) + return -EFAULT; + + if (__get_user(wbox_data, udata)) + return -EFAULT; + + count = spu_acquire(ctx); + if (count) + goto out; + + /* + * make sure we can at least write one element, by waiting + * in case of !O_NONBLOCK + */ + count = 0; + if (file->f_flags & O_NONBLOCK) { + if (!spu_wbox_write(ctx, wbox_data)) { + count = -EAGAIN; + goto out_unlock; + } + } else { + count = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data)); + if (count) + goto out; + } + + + /* write as much as possible */ + for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) { + int ret; + ret = __get_user(wbox_data, udata); + if (ret) + break; + + ret = spu_wbox_write(ctx, wbox_data); + if (ret == 0) + break; + } + +out_unlock: + spu_release(ctx); +out: + return count; +} + +static unsigned int spufs_wbox_poll(struct file *file, poll_table *wait) +{ + struct spu_context *ctx = file->private_data; + unsigned int mask; + + poll_wait(file, &ctx->wbox_wq, wait); + + /* + * For now keep this uninterruptible and also ignore the rule + * that poll should not sleep. Will be fixed later. + */ + mutex_lock(&ctx->state_mutex); + mask = ctx->ops->mbox_stat_poll(ctx, POLLOUT | POLLWRNORM); + spu_release(ctx); + + return mask; +} + +static const struct file_operations spufs_wbox_fops = { + .open = spufs_pipe_open, + .write = spufs_wbox_write, + .poll = spufs_wbox_poll, + .fasync = spufs_wbox_fasync, + .llseek = no_llseek, +}; + +static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + ssize_t ret; + u32 wbox_stat; + + if (len < 4) + return -EINVAL; + + ret = spu_acquire(ctx); + if (ret) + return ret; + wbox_stat = (ctx->ops->mbox_stat_read(ctx) >> 8) & 0xff; + spu_release(ctx); + + if (copy_to_user(buf, &wbox_stat, sizeof wbox_stat)) + return -EFAULT; + + return 4; +} + +static const struct file_operations spufs_wbox_stat_fops = { + .open = spufs_pipe_open, + .read = spufs_wbox_stat_read, + .llseek = no_llseek, +}; + +static int spufs_signal1_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + file->private_data = ctx; + if (!i->i_openers++) + ctx->signal1 = inode->i_mapping; + mutex_unlock(&ctx->mapping_lock); + return nonseekable_open(inode, file); +} + +static int +spufs_signal1_release(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + if (!--i->i_openers) + ctx->signal1 = NULL; + mutex_unlock(&ctx->mapping_lock); + return 0; +} + +static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf, + size_t len, loff_t *pos) +{ + int ret = 0; + u32 data; + + if (len < 4) + return -EINVAL; + + if (ctx->csa.spu_chnlcnt_RW[3]) { + data = ctx->csa.spu_chnldata_RW[3]; + ret = 4; + } + + if (!ret) + goto out; + + if (copy_to_user(buf, &data, 4)) + return -EFAULT; + +out: + return ret; +} + +static ssize_t spufs_signal1_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + int ret; + struct spu_context *ctx = file->private_data; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + ret = __spufs_signal1_read(ctx, buf, len, pos); + spu_release_saved(ctx); + + return ret; +} + +static ssize_t spufs_signal1_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx; + ssize_t ret; + u32 data; + + ctx = file->private_data; + + if (len < 4) + return -EINVAL; + + if (copy_from_user(&data, buf, 4)) + return -EFAULT; + + ret = spu_acquire(ctx); + if (ret) + return ret; + ctx->ops->signal1_write(ctx, data); + spu_release(ctx); + + return 4; +} + +static int +spufs_signal1_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ +#if SPUFS_SIGNAL_MAP_SIZE == 0x1000 + return spufs_ps_fault(vma, vmf, 0x14000, SPUFS_SIGNAL_MAP_SIZE); +#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000 + /* For 64k pages, both signal1 and signal2 can be used to mmap the whole + * signal 1 and 2 area + */ + return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE); +#else +#error unsupported page size +#endif +} + +static const struct vm_operations_struct spufs_signal1_mmap_vmops = { + .fault = spufs_signal1_mmap_fault, +}; + +static int spufs_signal1_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + vma->vm_ops = &spufs_signal1_mmap_vmops; + return 0; +} + +static const struct file_operations spufs_signal1_fops = { + .open = spufs_signal1_open, + .release = spufs_signal1_release, + .read = spufs_signal1_read, + .write = spufs_signal1_write, + .mmap = spufs_signal1_mmap, + .llseek = no_llseek, +}; + +static const struct file_operations spufs_signal1_nosched_fops = { + .open = spufs_signal1_open, + .release = spufs_signal1_release, + .write = spufs_signal1_write, + .mmap = spufs_signal1_mmap, + .llseek = no_llseek, +}; + +static int spufs_signal2_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + file->private_data = ctx; + if (!i->i_openers++) + ctx->signal2 = inode->i_mapping; + mutex_unlock(&ctx->mapping_lock); + return nonseekable_open(inode, file); +} + +static int +spufs_signal2_release(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + if (!--i->i_openers) + ctx->signal2 = NULL; + mutex_unlock(&ctx->mapping_lock); + return 0; +} + +static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf, + size_t len, loff_t *pos) +{ + int ret = 0; + u32 data; + + if (len < 4) + return -EINVAL; + + if (ctx->csa.spu_chnlcnt_RW[4]) { + data = ctx->csa.spu_chnldata_RW[4]; + ret = 4; + } + + if (!ret) + goto out; + + if (copy_to_user(buf, &data, 4)) + return -EFAULT; + +out: + return ret; +} + +static ssize_t spufs_signal2_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + ret = __spufs_signal2_read(ctx, buf, len, pos); + spu_release_saved(ctx); + + return ret; +} + +static ssize_t spufs_signal2_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx; + ssize_t ret; + u32 data; + + ctx = file->private_data; + + if (len < 4) + return -EINVAL; + + if (copy_from_user(&data, buf, 4)) + return -EFAULT; + + ret = spu_acquire(ctx); + if (ret) + return ret; + ctx->ops->signal2_write(ctx, data); + spu_release(ctx); + + return 4; +} + +#if SPUFS_MMAP_4K +static int +spufs_signal2_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ +#if SPUFS_SIGNAL_MAP_SIZE == 0x1000 + return spufs_ps_fault(vma, vmf, 0x1c000, SPUFS_SIGNAL_MAP_SIZE); +#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000 + /* For 64k pages, both signal1 and signal2 can be used to mmap the whole + * signal 1 and 2 area + */ + return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE); +#else +#error unsupported page size +#endif +} + +static const struct vm_operations_struct spufs_signal2_mmap_vmops = { + .fault = spufs_signal2_mmap_fault, +}; + +static int spufs_signal2_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + vma->vm_ops = &spufs_signal2_mmap_vmops; + return 0; +} +#else /* SPUFS_MMAP_4K */ +#define spufs_signal2_mmap NULL +#endif /* !SPUFS_MMAP_4K */ + +static const struct file_operations spufs_signal2_fops = { + .open = spufs_signal2_open, + .release = spufs_signal2_release, + .read = spufs_signal2_read, + .write = spufs_signal2_write, + .mmap = spufs_signal2_mmap, + .llseek = no_llseek, +}; + +static const struct file_operations spufs_signal2_nosched_fops = { + .open = spufs_signal2_open, + .release = spufs_signal2_release, + .write = spufs_signal2_write, + .mmap = spufs_signal2_mmap, + .llseek = no_llseek, +}; + +/* + * This is a wrapper around DEFINE_SIMPLE_ATTRIBUTE which does the + * work of acquiring (or not) the SPU context before calling through + * to the actual get routine. The set routine is called directly. + */ +#define SPU_ATTR_NOACQUIRE 0 +#define SPU_ATTR_ACQUIRE 1 +#define SPU_ATTR_ACQUIRE_SAVED 2 + +#define DEFINE_SPUFS_ATTRIBUTE(__name, __get, __set, __fmt, __acquire) \ +static int __##__get(void *data, u64 *val) \ +{ \ + struct spu_context *ctx = data; \ + int ret = 0; \ + \ + if (__acquire == SPU_ATTR_ACQUIRE) { \ + ret = spu_acquire(ctx); \ + if (ret) \ + return ret; \ + *val = __get(ctx); \ + spu_release(ctx); \ + } else if (__acquire == SPU_ATTR_ACQUIRE_SAVED) { \ + ret = spu_acquire_saved(ctx); \ + if (ret) \ + return ret; \ + *val = __get(ctx); \ + spu_release_saved(ctx); \ + } else \ + *val = __get(ctx); \ + \ + return 0; \ +} \ +DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__name, __##__get, __set, __fmt); + +static int spufs_signal1_type_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + int ret; + + ret = spu_acquire(ctx); + if (ret) + return ret; + ctx->ops->signal1_type_set(ctx, val); + spu_release(ctx); + + return 0; +} + +static u64 spufs_signal1_type_get(struct spu_context *ctx) +{ + return ctx->ops->signal1_type_get(ctx); +} +DEFINE_SPUFS_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get, + spufs_signal1_type_set, "%llu\n", SPU_ATTR_ACQUIRE); + + +static int spufs_signal2_type_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + int ret; + + ret = spu_acquire(ctx); + if (ret) + return ret; + ctx->ops->signal2_type_set(ctx, val); + spu_release(ctx); + + return 0; +} + +static u64 spufs_signal2_type_get(struct spu_context *ctx) +{ + return ctx->ops->signal2_type_get(ctx); +} +DEFINE_SPUFS_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get, + spufs_signal2_type_set, "%llu\n", SPU_ATTR_ACQUIRE); + +#if SPUFS_MMAP_4K +static int +spufs_mss_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_MSS_MAP_SIZE); +} + +static const struct vm_operations_struct spufs_mss_mmap_vmops = { + .fault = spufs_mss_mmap_fault, +}; + +/* + * mmap support for problem state MFC DMA area [0x0000 - 0x0fff]. + */ +static int spufs_mss_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + vma->vm_ops = &spufs_mss_mmap_vmops; + return 0; +} +#else /* SPUFS_MMAP_4K */ +#define spufs_mss_mmap NULL +#endif /* !SPUFS_MMAP_4K */ + +static int spufs_mss_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + file->private_data = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + if (!i->i_openers++) + ctx->mss = inode->i_mapping; + mutex_unlock(&ctx->mapping_lock); + return nonseekable_open(inode, file); +} + +static int +spufs_mss_release(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + if (!--i->i_openers) + ctx->mss = NULL; + mutex_unlock(&ctx->mapping_lock); + return 0; +} + +static const struct file_operations spufs_mss_fops = { + .open = spufs_mss_open, + .release = spufs_mss_release, + .mmap = spufs_mss_mmap, + .llseek = no_llseek, +}; + +static int +spufs_psmap_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_PS_MAP_SIZE); +} + +static const struct vm_operations_struct spufs_psmap_mmap_vmops = { + .fault = spufs_psmap_mmap_fault, +}; + +/* + * mmap support for full problem state area [0x00000 - 0x1ffff]. + */ +static int spufs_psmap_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + vma->vm_ops = &spufs_psmap_mmap_vmops; + return 0; +} + +static int spufs_psmap_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + file->private_data = i->i_ctx; + if (!i->i_openers++) + ctx->psmap = inode->i_mapping; + mutex_unlock(&ctx->mapping_lock); + return nonseekable_open(inode, file); +} + +static int +spufs_psmap_release(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + if (!--i->i_openers) + ctx->psmap = NULL; + mutex_unlock(&ctx->mapping_lock); + return 0; +} + +static const struct file_operations spufs_psmap_fops = { + .open = spufs_psmap_open, + .release = spufs_psmap_release, + .mmap = spufs_psmap_mmap, + .llseek = no_llseek, +}; + + +#if SPUFS_MMAP_4K +static int +spufs_mfc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + return spufs_ps_fault(vma, vmf, 0x3000, SPUFS_MFC_MAP_SIZE); +} + +static const struct vm_operations_struct spufs_mfc_mmap_vmops = { + .fault = spufs_mfc_mmap_fault, +}; + +/* + * mmap support for problem state MFC DMA area [0x0000 - 0x0fff]. + */ +static int spufs_mfc_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + vma->vm_ops = &spufs_mfc_mmap_vmops; + return 0; +} +#else /* SPUFS_MMAP_4K */ +#define spufs_mfc_mmap NULL +#endif /* !SPUFS_MMAP_4K */ + +static int spufs_mfc_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + /* we don't want to deal with DMA into other processes */ + if (ctx->owner != current->mm) + return -EINVAL; + + if (atomic_read(&inode->i_count) != 1) + return -EBUSY; + + mutex_lock(&ctx->mapping_lock); + file->private_data = ctx; + if (!i->i_openers++) + ctx->mfc = inode->i_mapping; + mutex_unlock(&ctx->mapping_lock); + return nonseekable_open(inode, file); +} + +static int +spufs_mfc_release(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + if (!--i->i_openers) + ctx->mfc = NULL; + mutex_unlock(&ctx->mapping_lock); + return 0; +} + +/* interrupt-level mfc callback function. */ +void spufs_mfc_callback(struct spu *spu) +{ + struct spu_context *ctx = spu->ctx; + + if (!ctx) + return; + + wake_up_all(&ctx->mfc_wq); + + pr_debug("%s %s\n", __func__, spu->name); + if (ctx->mfc_fasync) { + u32 free_elements, tagstatus; + unsigned int mask; + + /* no need for spu_acquire in interrupt context */ + free_elements = ctx->ops->get_mfc_free_elements(ctx); + tagstatus = ctx->ops->read_mfc_tagstatus(ctx); + + mask = 0; + if (free_elements & 0xffff) + mask |= POLLOUT; + if (tagstatus & ctx->tagwait) + mask |= POLLIN; + + kill_fasync(&ctx->mfc_fasync, SIGIO, mask); + } +} + +static int spufs_read_mfc_tagstatus(struct spu_context *ctx, u32 *status) +{ + /* See if there is one tag group is complete */ + /* FIXME we need locking around tagwait */ + *status = ctx->ops->read_mfc_tagstatus(ctx) & ctx->tagwait; + ctx->tagwait &= ~*status; + if (*status) + return 1; + + /* enable interrupt waiting for any tag group, + may silently fail if interrupts are already enabled */ + ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1); + return 0; +} + +static ssize_t spufs_mfc_read(struct file *file, char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + int ret = -EINVAL; + u32 status; + + if (size != 4) + goto out; + + ret = spu_acquire(ctx); + if (ret) + return ret; + + ret = -EINVAL; + if (file->f_flags & O_NONBLOCK) { + status = ctx->ops->read_mfc_tagstatus(ctx); + if (!(status & ctx->tagwait)) + ret = -EAGAIN; + else + /* XXX(hch): shouldn't we clear ret here? */ + ctx->tagwait &= ~status; + } else { + ret = spufs_wait(ctx->mfc_wq, + spufs_read_mfc_tagstatus(ctx, &status)); + if (ret) + goto out; + } + spu_release(ctx); + + ret = 4; + if (copy_to_user(buffer, &status, 4)) + ret = -EFAULT; + +out: + return ret; +} + +static int spufs_check_valid_dma(struct mfc_dma_command *cmd) +{ + pr_debug("queueing DMA %x %llx %x %x %x\n", cmd->lsa, + cmd->ea, cmd->size, cmd->tag, cmd->cmd); + + switch (cmd->cmd) { + case MFC_PUT_CMD: + case MFC_PUTF_CMD: + case MFC_PUTB_CMD: + case MFC_GET_CMD: + case MFC_GETF_CMD: + case MFC_GETB_CMD: + break; + default: + pr_debug("invalid DMA opcode %x\n", cmd->cmd); + return -EIO; + } + + if ((cmd->lsa & 0xf) != (cmd->ea &0xf)) { + pr_debug("invalid DMA alignment, ea %llx lsa %x\n", + cmd->ea, cmd->lsa); + return -EIO; + } + + switch (cmd->size & 0xf) { + case 1: + break; + case 2: + if (cmd->lsa & 1) + goto error; + break; + case 4: + if (cmd->lsa & 3) + goto error; + break; + case 8: + if (cmd->lsa & 7) + goto error; + break; + case 0: + if (cmd->lsa & 15) + goto error; + break; + error: + default: + pr_debug("invalid DMA alignment %x for size %x\n", + cmd->lsa & 0xf, cmd->size); + return -EIO; + } + + if (cmd->size > 16 * 1024) { + pr_debug("invalid DMA size %x\n", cmd->size); + return -EIO; + } + + if (cmd->tag & 0xfff0) { + /* we reserve the higher tag numbers for kernel use */ + pr_debug("invalid DMA tag\n"); + return -EIO; + } + + if (cmd->class) { + /* not supported in this version */ + pr_debug("invalid DMA class\n"); + return -EIO; + } + + return 0; +} + +static int spu_send_mfc_command(struct spu_context *ctx, + struct mfc_dma_command cmd, + int *error) +{ + *error = ctx->ops->send_mfc_command(ctx, &cmd); + if (*error == -EAGAIN) { + /* wait for any tag group to complete + so we have space for the new command */ + ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1); + /* try again, because the queue might be + empty again */ + *error = ctx->ops->send_mfc_command(ctx, &cmd); + if (*error == -EAGAIN) + return 0; + } + return 1; +} + +static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + struct mfc_dma_command cmd; + int ret = -EINVAL; + + if (size != sizeof cmd) + goto out; + + ret = -EFAULT; + if (copy_from_user(&cmd, buffer, sizeof cmd)) + goto out; + + ret = spufs_check_valid_dma(&cmd); + if (ret) + goto out; + + ret = spu_acquire(ctx); + if (ret) + goto out; + + ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE); + if (ret) + goto out; + + if (file->f_flags & O_NONBLOCK) { + ret = ctx->ops->send_mfc_command(ctx, &cmd); + } else { + int status; + ret = spufs_wait(ctx->mfc_wq, + spu_send_mfc_command(ctx, cmd, &status)); + if (ret) + goto out; + if (status) + ret = status; + } + + if (ret) + goto out_unlock; + + ctx->tagwait |= 1 << cmd.tag; + ret = size; + +out_unlock: + spu_release(ctx); +out: + return ret; +} + +static unsigned int spufs_mfc_poll(struct file *file,poll_table *wait) +{ + struct spu_context *ctx = file->private_data; + u32 free_elements, tagstatus; + unsigned int mask; + + poll_wait(file, &ctx->mfc_wq, wait); + + /* + * For now keep this uninterruptible and also ignore the rule + * that poll should not sleep. Will be fixed later. + */ + mutex_lock(&ctx->state_mutex); + ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2); + free_elements = ctx->ops->get_mfc_free_elements(ctx); + tagstatus = ctx->ops->read_mfc_tagstatus(ctx); + spu_release(ctx); + + mask = 0; + if (free_elements & 0xffff) + mask |= POLLOUT | POLLWRNORM; + if (tagstatus & ctx->tagwait) + mask |= POLLIN | POLLRDNORM; + + pr_debug("%s: free %d tagstatus %d tagwait %d\n", __func__, + free_elements, tagstatus, ctx->tagwait); + + return mask; +} + +static int spufs_mfc_flush(struct file *file, fl_owner_t id) +{ + struct spu_context *ctx = file->private_data; + int ret; + + ret = spu_acquire(ctx); + if (ret) + goto out; +#if 0 +/* this currently hangs */ + ret = spufs_wait(ctx->mfc_wq, + ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2)); + if (ret) + goto out; + ret = spufs_wait(ctx->mfc_wq, + ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait); + if (ret) + goto out; +#else + ret = 0; +#endif + spu_release(ctx); +out: + return ret; +} + +static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + struct inode *inode = file_inode(file); + int err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (!err) { + mutex_lock(&inode->i_mutex); + err = spufs_mfc_flush(file, NULL); + mutex_unlock(&inode->i_mutex); + } + return err; +} + +static int spufs_mfc_fasync(int fd, struct file *file, int on) +{ + struct spu_context *ctx = file->private_data; + + return fasync_helper(fd, file, on, &ctx->mfc_fasync); +} + +static const struct file_operations spufs_mfc_fops = { + .open = spufs_mfc_open, + .release = spufs_mfc_release, + .read = spufs_mfc_read, + .write = spufs_mfc_write, + .poll = spufs_mfc_poll, + .flush = spufs_mfc_flush, + .fsync = spufs_mfc_fsync, + .fasync = spufs_mfc_fasync, + .mmap = spufs_mfc_mmap, + .llseek = no_llseek, +}; + +static int spufs_npc_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + int ret; + + ret = spu_acquire(ctx); + if (ret) + return ret; + ctx->ops->npc_write(ctx, val); + spu_release(ctx); + + return 0; +} + +static u64 spufs_npc_get(struct spu_context *ctx) +{ + return ctx->ops->npc_read(ctx); +} +DEFINE_SPUFS_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set, + "0x%llx\n", SPU_ATTR_ACQUIRE); + +static int spufs_decr_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + lscsa->decr.slot[0] = (u32) val; + spu_release_saved(ctx); + + return 0; +} + +static u64 spufs_decr_get(struct spu_context *ctx) +{ + struct spu_lscsa *lscsa = ctx->csa.lscsa; + return lscsa->decr.slot[0]; +} +DEFINE_SPUFS_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, + "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED); + +static int spufs_decr_status_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + if (val) + ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING; + else + ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING; + spu_release_saved(ctx); + + return 0; +} + +static u64 spufs_decr_status_get(struct spu_context *ctx) +{ + if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) + return SPU_DECR_STATUS_RUNNING; + else + return 0; +} +DEFINE_SPUFS_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get, + spufs_decr_status_set, "0x%llx\n", + SPU_ATTR_ACQUIRE_SAVED); + +static int spufs_event_mask_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + lscsa->event_mask.slot[0] = (u32) val; + spu_release_saved(ctx); + + return 0; +} + +static u64 spufs_event_mask_get(struct spu_context *ctx) +{ + struct spu_lscsa *lscsa = ctx->csa.lscsa; + return lscsa->event_mask.slot[0]; +} + +DEFINE_SPUFS_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get, + spufs_event_mask_set, "0x%llx\n", + SPU_ATTR_ACQUIRE_SAVED); + +static u64 spufs_event_status_get(struct spu_context *ctx) +{ + struct spu_state *state = &ctx->csa; + u64 stat; + stat = state->spu_chnlcnt_RW[0]; + if (stat) + return state->spu_chnldata_RW[0]; + return 0; +} +DEFINE_SPUFS_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get, + NULL, "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED) + +static int spufs_srr0_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + lscsa->srr0.slot[0] = (u32) val; + spu_release_saved(ctx); + + return 0; +} + +static u64 spufs_srr0_get(struct spu_context *ctx) +{ + struct spu_lscsa *lscsa = ctx->csa.lscsa; + return lscsa->srr0.slot[0]; +} +DEFINE_SPUFS_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set, + "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED) + +static u64 spufs_id_get(struct spu_context *ctx) +{ + u64 num; + + if (ctx->state == SPU_STATE_RUNNABLE) + num = ctx->spu->number; + else + num = (unsigned int)-1; + + return num; +} +DEFINE_SPUFS_ATTRIBUTE(spufs_id_ops, spufs_id_get, NULL, "0x%llx\n", + SPU_ATTR_ACQUIRE) + +static u64 spufs_object_id_get(struct spu_context *ctx) +{ + /* FIXME: Should there really be no locking here? */ + return ctx->object_id; +} + +static int spufs_object_id_set(void *data, u64 id) +{ + struct spu_context *ctx = data; + ctx->object_id = id; + + return 0; +} + +DEFINE_SPUFS_ATTRIBUTE(spufs_object_id_ops, spufs_object_id_get, + spufs_object_id_set, "0x%llx\n", SPU_ATTR_NOACQUIRE); + +static u64 spufs_lslr_get(struct spu_context *ctx) +{ + return ctx->csa.priv2.spu_lslr_RW; +} +DEFINE_SPUFS_ATTRIBUTE(spufs_lslr_ops, spufs_lslr_get, NULL, "0x%llx\n", + SPU_ATTR_ACQUIRE_SAVED); + +static int spufs_info_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + file->private_data = ctx; + return 0; +} + +static int spufs_caps_show(struct seq_file *s, void *private) +{ + struct spu_context *ctx = s->private; + + if (!(ctx->flags & SPU_CREATE_NOSCHED)) + seq_puts(s, "sched\n"); + if (!(ctx->flags & SPU_CREATE_ISOLATE)) + seq_puts(s, "step\n"); + return 0; +} + +static int spufs_caps_open(struct inode *inode, struct file *file) +{ + return single_open(file, spufs_caps_show, SPUFS_I(inode)->i_ctx); +} + +static const struct file_operations spufs_caps_fops = { + .open = spufs_caps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static ssize_t __spufs_mbox_info_read(struct spu_context *ctx, + char __user *buf, size_t len, loff_t *pos) +{ + u32 data; + + /* EOF if there's no entry in the mbox */ + if (!(ctx->csa.prob.mb_stat_R & 0x0000ff)) + return 0; + + data = ctx->csa.prob.pu_mb_R; + + return simple_read_from_buffer(buf, len, pos, &data, sizeof data); +} + +static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + int ret; + struct spu_context *ctx = file->private_data; + + if (!access_ok(VERIFY_WRITE, buf, len)) + return -EFAULT; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + spin_lock(&ctx->csa.register_lock); + ret = __spufs_mbox_info_read(ctx, buf, len, pos); + spin_unlock(&ctx->csa.register_lock); + spu_release_saved(ctx); + + return ret; +} + +static const struct file_operations spufs_mbox_info_fops = { + .open = spufs_info_open, + .read = spufs_mbox_info_read, + .llseek = generic_file_llseek, +}; + +static ssize_t __spufs_ibox_info_read(struct spu_context *ctx, + char __user *buf, size_t len, loff_t *pos) +{ + u32 data; + + /* EOF if there's no entry in the ibox */ + if (!(ctx->csa.prob.mb_stat_R & 0xff0000)) + return 0; + + data = ctx->csa.priv2.puint_mb_R; + + return simple_read_from_buffer(buf, len, pos, &data, sizeof data); +} + +static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + int ret; + + if (!access_ok(VERIFY_WRITE, buf, len)) + return -EFAULT; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + spin_lock(&ctx->csa.register_lock); + ret = __spufs_ibox_info_read(ctx, buf, len, pos); + spin_unlock(&ctx->csa.register_lock); + spu_release_saved(ctx); + + return ret; +} + +static const struct file_operations spufs_ibox_info_fops = { + .open = spufs_info_open, + .read = spufs_ibox_info_read, + .llseek = generic_file_llseek, +}; + +static ssize_t __spufs_wbox_info_read(struct spu_context *ctx, + char __user *buf, size_t len, loff_t *pos) +{ + int i, cnt; + u32 data[4]; + u32 wbox_stat; + + wbox_stat = ctx->csa.prob.mb_stat_R; + cnt = 4 - ((wbox_stat & 0x00ff00) >> 8); + for (i = 0; i < cnt; i++) { + data[i] = ctx->csa.spu_mailbox_data[i]; + } + + return simple_read_from_buffer(buf, len, pos, &data, + cnt * sizeof(u32)); +} + +static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + int ret; + + if (!access_ok(VERIFY_WRITE, buf, len)) + return -EFAULT; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + spin_lock(&ctx->csa.register_lock); + ret = __spufs_wbox_info_read(ctx, buf, len, pos); + spin_unlock(&ctx->csa.register_lock); + spu_release_saved(ctx); + + return ret; +} + +static const struct file_operations spufs_wbox_info_fops = { + .open = spufs_info_open, + .read = spufs_wbox_info_read, + .llseek = generic_file_llseek, +}; + +static ssize_t __spufs_dma_info_read(struct spu_context *ctx, + char __user *buf, size_t len, loff_t *pos) +{ + struct spu_dma_info info; + struct mfc_cq_sr *qp, *spuqp; + int i; + + info.dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW; + info.dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0]; + info.dma_info_status = ctx->csa.spu_chnldata_RW[24]; + info.dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25]; + info.dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27]; + for (i = 0; i < 16; i++) { + qp = &info.dma_info_command_data[i]; + spuqp = &ctx->csa.priv2.spuq[i]; + + qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW; + qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW; + qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW; + qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW; + } + + return simple_read_from_buffer(buf, len, pos, &info, + sizeof info); +} + +static ssize_t spufs_dma_info_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + int ret; + + if (!access_ok(VERIFY_WRITE, buf, len)) + return -EFAULT; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + spin_lock(&ctx->csa.register_lock); + ret = __spufs_dma_info_read(ctx, buf, len, pos); + spin_unlock(&ctx->csa.register_lock); + spu_release_saved(ctx); + + return ret; +} + +static const struct file_operations spufs_dma_info_fops = { + .open = spufs_info_open, + .read = spufs_dma_info_read, + .llseek = no_llseek, +}; + +static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx, + char __user *buf, size_t len, loff_t *pos) +{ + struct spu_proxydma_info info; + struct mfc_cq_sr *qp, *puqp; + int ret = sizeof info; + int i; + + if (len < ret) + return -EINVAL; + + if (!access_ok(VERIFY_WRITE, buf, len)) + return -EFAULT; + + info.proxydma_info_type = ctx->csa.prob.dma_querytype_RW; + info.proxydma_info_mask = ctx->csa.prob.dma_querymask_RW; + info.proxydma_info_status = ctx->csa.prob.dma_tagstatus_R; + for (i = 0; i < 8; i++) { + qp = &info.proxydma_info_command_data[i]; + puqp = &ctx->csa.priv2.puq[i]; + + qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW; + qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW; + qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW; + qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW; + } + + return simple_read_from_buffer(buf, len, pos, &info, + sizeof info); +} + +static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + spin_lock(&ctx->csa.register_lock); + ret = __spufs_proxydma_info_read(ctx, buf, len, pos); + spin_unlock(&ctx->csa.register_lock); + spu_release_saved(ctx); + + return ret; +} + +static const struct file_operations spufs_proxydma_info_fops = { + .open = spufs_info_open, + .read = spufs_proxydma_info_read, + .llseek = no_llseek, +}; + +static int spufs_show_tid(struct seq_file *s, void *private) +{ + struct spu_context *ctx = s->private; + + seq_printf(s, "%d\n", ctx->tid); + return 0; +} + +static int spufs_tid_open(struct inode *inode, struct file *file) +{ + return single_open(file, spufs_show_tid, SPUFS_I(inode)->i_ctx); +} + +static const struct file_operations spufs_tid_fops = { + .open = spufs_tid_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const char *ctx_state_names[] = { + "user", "system", "iowait", "loaded" +}; + +static unsigned long long spufs_acct_time(struct spu_context *ctx, + enum spu_utilization_state state) +{ + unsigned long long time = ctx->stats.times[state]; + + /* + * In general, utilization statistics are updated by the controlling + * thread as the spu context moves through various well defined + * state transitions, but if the context is lazily loaded its + * utilization statistics are not updated as the controlling thread + * is not tightly coupled with the execution of the spu context. We + * calculate and apply the time delta from the last recorded state + * of the spu context. + */ + if (ctx->spu && ctx->stats.util_state == state) { + time += ktime_get_ns() - ctx->stats.tstamp; + } + + return time / NSEC_PER_MSEC; +} + +static unsigned long long spufs_slb_flts(struct spu_context *ctx) +{ + unsigned long long slb_flts = ctx->stats.slb_flt; + + if (ctx->state == SPU_STATE_RUNNABLE) { + slb_flts += (ctx->spu->stats.slb_flt - + ctx->stats.slb_flt_base); + } + + return slb_flts; +} + +static unsigned long long spufs_class2_intrs(struct spu_context *ctx) +{ + unsigned long long class2_intrs = ctx->stats.class2_intr; + + if (ctx->state == SPU_STATE_RUNNABLE) { + class2_intrs += (ctx->spu->stats.class2_intr - + ctx->stats.class2_intr_base); + } + + return class2_intrs; +} + + +static int spufs_show_stat(struct seq_file *s, void *private) +{ + struct spu_context *ctx = s->private; + int ret; + + ret = spu_acquire(ctx); + if (ret) + return ret; + + seq_printf(s, "%s %llu %llu %llu %llu " + "%llu %llu %llu %llu %llu %llu %llu %llu\n", + ctx_state_names[ctx->stats.util_state], + spufs_acct_time(ctx, SPU_UTIL_USER), + spufs_acct_time(ctx, SPU_UTIL_SYSTEM), + spufs_acct_time(ctx, SPU_UTIL_IOWAIT), + spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED), + ctx->stats.vol_ctx_switch, + ctx->stats.invol_ctx_switch, + spufs_slb_flts(ctx), + ctx->stats.hash_flt, + ctx->stats.min_flt, + ctx->stats.maj_flt, + spufs_class2_intrs(ctx), + ctx->stats.libassist); + spu_release(ctx); + return 0; +} + +static int spufs_stat_open(struct inode *inode, struct file *file) +{ + return single_open(file, spufs_show_stat, SPUFS_I(inode)->i_ctx); +} + +static const struct file_operations spufs_stat_fops = { + .open = spufs_stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static inline int spufs_switch_log_used(struct spu_context *ctx) +{ + return (ctx->switch_log->head - ctx->switch_log->tail) % + SWITCH_LOG_BUFSIZE; +} + +static inline int spufs_switch_log_avail(struct spu_context *ctx) +{ + return SWITCH_LOG_BUFSIZE - spufs_switch_log_used(ctx); +} + +static int spufs_switch_log_open(struct inode *inode, struct file *file) +{ + struct spu_context *ctx = SPUFS_I(inode)->i_ctx; + int rc; + + rc = spu_acquire(ctx); + if (rc) + return rc; + + if (ctx->switch_log) { + rc = -EBUSY; + goto out; + } + + ctx->switch_log = kmalloc(sizeof(struct switch_log) + + SWITCH_LOG_BUFSIZE * sizeof(struct switch_log_entry), + GFP_KERNEL); + + if (!ctx->switch_log) { + rc = -ENOMEM; + goto out; + } + + ctx->switch_log->head = ctx->switch_log->tail = 0; + init_waitqueue_head(&ctx->switch_log->wait); + rc = 0; + +out: + spu_release(ctx); + return rc; +} + +static int spufs_switch_log_release(struct inode *inode, struct file *file) +{ + struct spu_context *ctx = SPUFS_I(inode)->i_ctx; + int rc; + + rc = spu_acquire(ctx); + if (rc) + return rc; + + kfree(ctx->switch_log); + ctx->switch_log = NULL; + spu_release(ctx); + + return 0; +} + +static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n) +{ + struct switch_log_entry *p; + + p = ctx->switch_log->log + ctx->switch_log->tail % SWITCH_LOG_BUFSIZE; + + return snprintf(tbuf, n, "%u.%09u %d %u %u %llu\n", + (unsigned int) p->tstamp.tv_sec, + (unsigned int) p->tstamp.tv_nsec, + p->spu_id, + (unsigned int) p->type, + (unsigned int) p->val, + (unsigned long long) p->timebase); +} + +static ssize_t spufs_switch_log_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + struct inode *inode = file_inode(file); + struct spu_context *ctx = SPUFS_I(inode)->i_ctx; + int error = 0, cnt = 0; + + if (!buf) + return -EINVAL; + + error = spu_acquire(ctx); + if (error) + return error; + + while (cnt < len) { + char tbuf[128]; + int width; + + if (spufs_switch_log_used(ctx) == 0) { + if (cnt > 0) { + /* If there's data ready to go, we can + * just return straight away */ + break; + + } else if (file->f_flags & O_NONBLOCK) { + error = -EAGAIN; + break; + + } else { + /* spufs_wait will drop the mutex and + * re-acquire, but since we're in read(), the + * file cannot be _released (and so + * ctx->switch_log is stable). + */ + error = spufs_wait(ctx->switch_log->wait, + spufs_switch_log_used(ctx) > 0); + + /* On error, spufs_wait returns without the + * state mutex held */ + if (error) + return error; + + /* We may have had entries read from underneath + * us while we dropped the mutex in spufs_wait, + * so re-check */ + if (spufs_switch_log_used(ctx) == 0) + continue; + } + } + + width = switch_log_sprint(ctx, tbuf, sizeof(tbuf)); + if (width < len) + ctx->switch_log->tail = + (ctx->switch_log->tail + 1) % + SWITCH_LOG_BUFSIZE; + else + /* If the record is greater than space available return + * partial buffer (so far) */ + break; + + error = copy_to_user(buf + cnt, tbuf, width); + if (error) + break; + cnt += width; + } + + spu_release(ctx); + + return cnt == 0 ? error : cnt; +} + +static unsigned int spufs_switch_log_poll(struct file *file, poll_table *wait) +{ + struct inode *inode = file_inode(file); + struct spu_context *ctx = SPUFS_I(inode)->i_ctx; + unsigned int mask = 0; + int rc; + + poll_wait(file, &ctx->switch_log->wait, wait); + + rc = spu_acquire(ctx); + if (rc) + return rc; + + if (spufs_switch_log_used(ctx) > 0) + mask |= POLLIN; + + spu_release(ctx); + + return mask; +} + +static const struct file_operations spufs_switch_log_fops = { + .open = spufs_switch_log_open, + .read = spufs_switch_log_read, + .poll = spufs_switch_log_poll, + .release = spufs_switch_log_release, + .llseek = no_llseek, +}; + +/** + * Log a context switch event to a switch log reader. + * + * Must be called with ctx->state_mutex held. + */ +void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx, + u32 type, u32 val) +{ + if (!ctx->switch_log) + return; + + if (spufs_switch_log_avail(ctx) > 1) { + struct switch_log_entry *p; + + p = ctx->switch_log->log + ctx->switch_log->head; + ktime_get_ts(&p->tstamp); + p->timebase = get_tb(); + p->spu_id = spu ? spu->number : -1; + p->type = type; + p->val = val; + + ctx->switch_log->head = + (ctx->switch_log->head + 1) % SWITCH_LOG_BUFSIZE; + } + + wake_up(&ctx->switch_log->wait); +} + +static int spufs_show_ctx(struct seq_file *s, void *private) +{ + struct spu_context *ctx = s->private; + u64 mfc_control_RW; + + mutex_lock(&ctx->state_mutex); + if (ctx->spu) { + struct spu *spu = ctx->spu; + struct spu_priv2 __iomem *priv2 = spu->priv2; + + spin_lock_irq(&spu->register_lock); + mfc_control_RW = in_be64(&priv2->mfc_control_RW); + spin_unlock_irq(&spu->register_lock); + } else { + struct spu_state *csa = &ctx->csa; + + mfc_control_RW = csa->priv2.mfc_control_RW; + } + + seq_printf(s, "%c flgs(%lx) sflgs(%lx) pri(%d) ts(%d) spu(%02d)" + " %c %llx %llx %llx %llx %x %x\n", + ctx->state == SPU_STATE_SAVED ? 'S' : 'R', + ctx->flags, + ctx->sched_flags, + ctx->prio, + ctx->time_slice, + ctx->spu ? ctx->spu->number : -1, + !list_empty(&ctx->rq) ? 'q' : ' ', + ctx->csa.class_0_pending, + ctx->csa.class_0_dar, + ctx->csa.class_1_dsisr, + mfc_control_RW, + ctx->ops->runcntl_read(ctx), + ctx->ops->status_read(ctx)); + + mutex_unlock(&ctx->state_mutex); + + return 0; +} + +static int spufs_ctx_open(struct inode *inode, struct file *file) +{ + return single_open(file, spufs_show_ctx, SPUFS_I(inode)->i_ctx); +} + +static const struct file_operations spufs_ctx_fops = { + .open = spufs_ctx_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +const struct spufs_tree_descr spufs_dir_contents[] = { + { "capabilities", &spufs_caps_fops, 0444, }, + { "mem", &spufs_mem_fops, 0666, LS_SIZE, }, + { "regs", &spufs_regs_fops, 0666, sizeof(struct spu_reg128[128]), }, + { "mbox", &spufs_mbox_fops, 0444, }, + { "ibox", &spufs_ibox_fops, 0444, }, + { "wbox", &spufs_wbox_fops, 0222, }, + { "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), }, + { "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), }, + { "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), }, + { "signal1", &spufs_signal1_fops, 0666, }, + { "signal2", &spufs_signal2_fops, 0666, }, + { "signal1_type", &spufs_signal1_type, 0666, }, + { "signal2_type", &spufs_signal2_type, 0666, }, + { "cntl", &spufs_cntl_fops, 0666, }, + { "fpcr", &spufs_fpcr_fops, 0666, sizeof(struct spu_reg128), }, + { "lslr", &spufs_lslr_ops, 0444, }, + { "mfc", &spufs_mfc_fops, 0666, }, + { "mss", &spufs_mss_fops, 0666, }, + { "npc", &spufs_npc_ops, 0666, }, + { "srr0", &spufs_srr0_ops, 0666, }, + { "decr", &spufs_decr_ops, 0666, }, + { "decr_status", &spufs_decr_status_ops, 0666, }, + { "event_mask", &spufs_event_mask_ops, 0666, }, + { "event_status", &spufs_event_status_ops, 0444, }, + { "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, }, + { "phys-id", &spufs_id_ops, 0666, }, + { "object-id", &spufs_object_id_ops, 0666, }, + { "mbox_info", &spufs_mbox_info_fops, 0444, sizeof(u32), }, + { "ibox_info", &spufs_ibox_info_fops, 0444, sizeof(u32), }, + { "wbox_info", &spufs_wbox_info_fops, 0444, sizeof(u32), }, + { "dma_info", &spufs_dma_info_fops, 0444, + sizeof(struct spu_dma_info), }, + { "proxydma_info", &spufs_proxydma_info_fops, 0444, + sizeof(struct spu_proxydma_info)}, + { "tid", &spufs_tid_fops, 0444, }, + { "stat", &spufs_stat_fops, 0444, }, + { "switch_log", &spufs_switch_log_fops, 0444 }, + {}, +}; + +const struct spufs_tree_descr spufs_dir_nosched_contents[] = { + { "capabilities", &spufs_caps_fops, 0444, }, + { "mem", &spufs_mem_fops, 0666, LS_SIZE, }, + { "mbox", &spufs_mbox_fops, 0444, }, + { "ibox", &spufs_ibox_fops, 0444, }, + { "wbox", &spufs_wbox_fops, 0222, }, + { "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), }, + { "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), }, + { "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), }, + { "signal1", &spufs_signal1_nosched_fops, 0222, }, + { "signal2", &spufs_signal2_nosched_fops, 0222, }, + { "signal1_type", &spufs_signal1_type, 0666, }, + { "signal2_type", &spufs_signal2_type, 0666, }, + { "mss", &spufs_mss_fops, 0666, }, + { "mfc", &spufs_mfc_fops, 0666, }, + { "cntl", &spufs_cntl_fops, 0666, }, + { "npc", &spufs_npc_ops, 0666, }, + { "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, }, + { "phys-id", &spufs_id_ops, 0666, }, + { "object-id", &spufs_object_id_ops, 0666, }, + { "tid", &spufs_tid_fops, 0444, }, + { "stat", &spufs_stat_fops, 0444, }, + {}, +}; + +const struct spufs_tree_descr spufs_dir_debug_contents[] = { + { ".ctx", &spufs_ctx_fops, 0444, }, + {}, +}; + +const struct spufs_coredump_reader spufs_coredump_read[] = { + { "regs", __spufs_regs_read, NULL, sizeof(struct spu_reg128[128])}, + { "fpcr", __spufs_fpcr_read, NULL, sizeof(struct spu_reg128) }, + { "lslr", NULL, spufs_lslr_get, 19 }, + { "decr", NULL, spufs_decr_get, 19 }, + { "decr_status", NULL, spufs_decr_status_get, 19 }, + { "mem", __spufs_mem_read, NULL, LS_SIZE, }, + { "signal1", __spufs_signal1_read, NULL, sizeof(u32) }, + { "signal1_type", NULL, spufs_signal1_type_get, 19 }, + { "signal2", __spufs_signal2_read, NULL, sizeof(u32) }, + { "signal2_type", NULL, spufs_signal2_type_get, 19 }, + { "event_mask", NULL, spufs_event_mask_get, 19 }, + { "event_status", NULL, spufs_event_status_get, 19 }, + { "mbox_info", __spufs_mbox_info_read, NULL, sizeof(u32) }, + { "ibox_info", __spufs_ibox_info_read, NULL, sizeof(u32) }, + { "wbox_info", __spufs_wbox_info_read, NULL, 4 * sizeof(u32)}, + { "dma_info", __spufs_dma_info_read, NULL, sizeof(struct spu_dma_info)}, + { "proxydma_info", __spufs_proxydma_info_read, + NULL, sizeof(struct spu_proxydma_info)}, + { "object-id", NULL, spufs_object_id_get, 19 }, + { "npc", NULL, spufs_npc_get, 19 }, + { NULL }, +}; diff --git a/kernel/arch/powerpc/platforms/cell/spufs/gang.c b/kernel/arch/powerpc/platforms/cell/spufs/gang.c new file mode 100644 index 000000000..71a443253 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/gang.c @@ -0,0 +1,87 @@ +/* + * SPU file system + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/list.h> +#include <linux/slab.h> + +#include "spufs.h" + +struct spu_gang *alloc_spu_gang(void) +{ + struct spu_gang *gang; + + gang = kzalloc(sizeof *gang, GFP_KERNEL); + if (!gang) + goto out; + + kref_init(&gang->kref); + mutex_init(&gang->mutex); + mutex_init(&gang->aff_mutex); + INIT_LIST_HEAD(&gang->list); + INIT_LIST_HEAD(&gang->aff_list_head); + +out: + return gang; +} + +static void destroy_spu_gang(struct kref *kref) +{ + struct spu_gang *gang; + gang = container_of(kref, struct spu_gang, kref); + WARN_ON(gang->contexts || !list_empty(&gang->list)); + kfree(gang); +} + +struct spu_gang *get_spu_gang(struct spu_gang *gang) +{ + kref_get(&gang->kref); + return gang; +} + +int put_spu_gang(struct spu_gang *gang) +{ + return kref_put(&gang->kref, &destroy_spu_gang); +} + +void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx) +{ + mutex_lock(&gang->mutex); + ctx->gang = get_spu_gang(gang); + list_add(&ctx->gang_list, &gang->list); + gang->contexts++; + mutex_unlock(&gang->mutex); +} + +void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx) +{ + mutex_lock(&gang->mutex); + WARN_ON(ctx->gang != gang); + if (!list_empty(&ctx->aff_list)) { + list_del_init(&ctx->aff_list); + gang->aff_flags &= ~AFF_OFFSETS_SET; + } + list_del_init(&ctx->gang_list); + gang->contexts--; + mutex_unlock(&gang->mutex); + + put_spu_gang(gang); +} diff --git a/kernel/arch/powerpc/platforms/cell/spufs/hw_ops.c b/kernel/arch/powerpc/platforms/cell/spufs/hw_ops.c new file mode 100644 index 000000000..8655c4cbe --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/hw_ops.c @@ -0,0 +1,349 @@ +/* hw_ops.c - query/set operations on active SPU context. + * + * Copyright (C) IBM 2005 + * Author: Mark Nutter <mnutter@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/poll.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/unistd.h> + +#include <asm/io.h> +#include <asm/spu.h> +#include <asm/spu_priv1.h> +#include <asm/spu_csa.h> +#include <asm/mmu_context.h> +#include "spufs.h" + +static int spu_hw_mbox_read(struct spu_context *ctx, u32 * data) +{ + struct spu *spu = ctx->spu; + struct spu_problem __iomem *prob = spu->problem; + u32 mbox_stat; + int ret = 0; + + spin_lock_irq(&spu->register_lock); + mbox_stat = in_be32(&prob->mb_stat_R); + if (mbox_stat & 0x0000ff) { + *data = in_be32(&prob->pu_mb_R); + ret = 4; + } + spin_unlock_irq(&spu->register_lock); + return ret; +} + +static u32 spu_hw_mbox_stat_read(struct spu_context *ctx) +{ + return in_be32(&ctx->spu->problem->mb_stat_R); +} + +static unsigned int spu_hw_mbox_stat_poll(struct spu_context *ctx, + unsigned int events) +{ + struct spu *spu = ctx->spu; + int ret = 0; + u32 stat; + + spin_lock_irq(&spu->register_lock); + stat = in_be32(&spu->problem->mb_stat_R); + + /* if the requested event is there, return the poll + mask, otherwise enable the interrupt to get notified, + but first mark any pending interrupts as done so + we don't get woken up unnecessarily */ + + if (events & (POLLIN | POLLRDNORM)) { + if (stat & 0xff0000) + ret |= POLLIN | POLLRDNORM; + else { + spu_int_stat_clear(spu, 2, CLASS2_MAILBOX_INTR); + spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR); + } + } + if (events & (POLLOUT | POLLWRNORM)) { + if (stat & 0x00ff00) + ret = POLLOUT | POLLWRNORM; + else { + spu_int_stat_clear(spu, 2, + CLASS2_MAILBOX_THRESHOLD_INTR); + spu_int_mask_or(spu, 2, + CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR); + } + } + spin_unlock_irq(&spu->register_lock); + return ret; +} + +static int spu_hw_ibox_read(struct spu_context *ctx, u32 * data) +{ + struct spu *spu = ctx->spu; + struct spu_problem __iomem *prob = spu->problem; + struct spu_priv2 __iomem *priv2 = spu->priv2; + int ret; + + spin_lock_irq(&spu->register_lock); + if (in_be32(&prob->mb_stat_R) & 0xff0000) { + /* read the first available word */ + *data = in_be64(&priv2->puint_mb_R); + ret = 4; + } else { + /* make sure we get woken up by the interrupt */ + spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR); + ret = 0; + } + spin_unlock_irq(&spu->register_lock); + return ret; +} + +static int spu_hw_wbox_write(struct spu_context *ctx, u32 data) +{ + struct spu *spu = ctx->spu; + struct spu_problem __iomem *prob = spu->problem; + int ret; + + spin_lock_irq(&spu->register_lock); + if (in_be32(&prob->mb_stat_R) & 0x00ff00) { + /* we have space to write wbox_data to */ + out_be32(&prob->spu_mb_W, data); + ret = 4; + } else { + /* make sure we get woken up by the interrupt when space + becomes available */ + spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR); + ret = 0; + } + spin_unlock_irq(&spu->register_lock); + return ret; +} + +static void spu_hw_signal1_write(struct spu_context *ctx, u32 data) +{ + out_be32(&ctx->spu->problem->signal_notify1, data); +} + +static void spu_hw_signal2_write(struct spu_context *ctx, u32 data) +{ + out_be32(&ctx->spu->problem->signal_notify2, data); +} + +static void spu_hw_signal1_type_set(struct spu_context *ctx, u64 val) +{ + struct spu *spu = ctx->spu; + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 tmp; + + spin_lock_irq(&spu->register_lock); + tmp = in_be64(&priv2->spu_cfg_RW); + if (val) + tmp |= 1; + else + tmp &= ~1; + out_be64(&priv2->spu_cfg_RW, tmp); + spin_unlock_irq(&spu->register_lock); +} + +static u64 spu_hw_signal1_type_get(struct spu_context *ctx) +{ + return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 1) != 0); +} + +static void spu_hw_signal2_type_set(struct spu_context *ctx, u64 val) +{ + struct spu *spu = ctx->spu; + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 tmp; + + spin_lock_irq(&spu->register_lock); + tmp = in_be64(&priv2->spu_cfg_RW); + if (val) + tmp |= 2; + else + tmp &= ~2; + out_be64(&priv2->spu_cfg_RW, tmp); + spin_unlock_irq(&spu->register_lock); +} + +static u64 spu_hw_signal2_type_get(struct spu_context *ctx) +{ + return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 2) != 0); +} + +static u32 spu_hw_npc_read(struct spu_context *ctx) +{ + return in_be32(&ctx->spu->problem->spu_npc_RW); +} + +static void spu_hw_npc_write(struct spu_context *ctx, u32 val) +{ + out_be32(&ctx->spu->problem->spu_npc_RW, val); +} + +static u32 spu_hw_status_read(struct spu_context *ctx) +{ + return in_be32(&ctx->spu->problem->spu_status_R); +} + +static char *spu_hw_get_ls(struct spu_context *ctx) +{ + return ctx->spu->local_store; +} + +static void spu_hw_privcntl_write(struct spu_context *ctx, u64 val) +{ + out_be64(&ctx->spu->priv2->spu_privcntl_RW, val); +} + +static u32 spu_hw_runcntl_read(struct spu_context *ctx) +{ + return in_be32(&ctx->spu->problem->spu_runcntl_RW); +} + +static void spu_hw_runcntl_write(struct spu_context *ctx, u32 val) +{ + spin_lock_irq(&ctx->spu->register_lock); + if (val & SPU_RUNCNTL_ISOLATE) + spu_hw_privcntl_write(ctx, + SPU_PRIVCNT_LOAD_REQUEST_ENABLE_MASK); + out_be32(&ctx->spu->problem->spu_runcntl_RW, val); + spin_unlock_irq(&ctx->spu->register_lock); +} + +static void spu_hw_runcntl_stop(struct spu_context *ctx) +{ + spin_lock_irq(&ctx->spu->register_lock); + out_be32(&ctx->spu->problem->spu_runcntl_RW, SPU_RUNCNTL_STOP); + while (in_be32(&ctx->spu->problem->spu_status_R) & SPU_STATUS_RUNNING) + cpu_relax(); + spin_unlock_irq(&ctx->spu->register_lock); +} + +static void spu_hw_master_start(struct spu_context *ctx) +{ + struct spu *spu = ctx->spu; + u64 sr1; + + spin_lock_irq(&spu->register_lock); + sr1 = spu_mfc_sr1_get(spu) | MFC_STATE1_MASTER_RUN_CONTROL_MASK; + spu_mfc_sr1_set(spu, sr1); + spin_unlock_irq(&spu->register_lock); +} + +static void spu_hw_master_stop(struct spu_context *ctx) +{ + struct spu *spu = ctx->spu; + u64 sr1; + + spin_lock_irq(&spu->register_lock); + sr1 = spu_mfc_sr1_get(spu) & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK; + spu_mfc_sr1_set(spu, sr1); + spin_unlock_irq(&spu->register_lock); +} + +static int spu_hw_set_mfc_query(struct spu_context * ctx, u32 mask, u32 mode) +{ + struct spu_problem __iomem *prob = ctx->spu->problem; + int ret; + + spin_lock_irq(&ctx->spu->register_lock); + ret = -EAGAIN; + if (in_be32(&prob->dma_querytype_RW)) + goto out; + ret = 0; + out_be32(&prob->dma_querymask_RW, mask); + out_be32(&prob->dma_querytype_RW, mode); +out: + spin_unlock_irq(&ctx->spu->register_lock); + return ret; +} + +static u32 spu_hw_read_mfc_tagstatus(struct spu_context * ctx) +{ + return in_be32(&ctx->spu->problem->dma_tagstatus_R); +} + +static u32 spu_hw_get_mfc_free_elements(struct spu_context *ctx) +{ + return in_be32(&ctx->spu->problem->dma_qstatus_R); +} + +static int spu_hw_send_mfc_command(struct spu_context *ctx, + struct mfc_dma_command *cmd) +{ + u32 status; + struct spu_problem __iomem *prob = ctx->spu->problem; + + spin_lock_irq(&ctx->spu->register_lock); + out_be32(&prob->mfc_lsa_W, cmd->lsa); + out_be64(&prob->mfc_ea_W, cmd->ea); + out_be32(&prob->mfc_union_W.by32.mfc_size_tag32, + cmd->size << 16 | cmd->tag); + out_be32(&prob->mfc_union_W.by32.mfc_class_cmd32, + cmd->class << 16 | cmd->cmd); + status = in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32); + spin_unlock_irq(&ctx->spu->register_lock); + + switch (status & 0xffff) { + case 0: + return 0; + case 2: + return -EAGAIN; + default: + return -EINVAL; + } +} + +static void spu_hw_restart_dma(struct spu_context *ctx) +{ + struct spu_priv2 __iomem *priv2 = ctx->spu->priv2; + + if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags)) + out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND); +} + +struct spu_context_ops spu_hw_ops = { + .mbox_read = spu_hw_mbox_read, + .mbox_stat_read = spu_hw_mbox_stat_read, + .mbox_stat_poll = spu_hw_mbox_stat_poll, + .ibox_read = spu_hw_ibox_read, + .wbox_write = spu_hw_wbox_write, + .signal1_write = spu_hw_signal1_write, + .signal2_write = spu_hw_signal2_write, + .signal1_type_set = spu_hw_signal1_type_set, + .signal1_type_get = spu_hw_signal1_type_get, + .signal2_type_set = spu_hw_signal2_type_set, + .signal2_type_get = spu_hw_signal2_type_get, + .npc_read = spu_hw_npc_read, + .npc_write = spu_hw_npc_write, + .status_read = spu_hw_status_read, + .get_ls = spu_hw_get_ls, + .privcntl_write = spu_hw_privcntl_write, + .runcntl_read = spu_hw_runcntl_read, + .runcntl_write = spu_hw_runcntl_write, + .runcntl_stop = spu_hw_runcntl_stop, + .master_start = spu_hw_master_start, + .master_stop = spu_hw_master_stop, + .set_mfc_query = spu_hw_set_mfc_query, + .read_mfc_tagstatus = spu_hw_read_mfc_tagstatus, + .get_mfc_free_elements = spu_hw_get_mfc_free_elements, + .send_mfc_command = spu_hw_send_mfc_command, + .restart_dma = spu_hw_restart_dma, +}; diff --git a/kernel/arch/powerpc/platforms/cell/spufs/inode.c b/kernel/arch/powerpc/platforms/cell/spufs/inode.c new file mode 100644 index 000000000..1ba6307be --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/inode.c @@ -0,0 +1,811 @@ + +/* + * SPU file system + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/fsnotify.h> +#include <linux/backing-dev.h> +#include <linux/init.h> +#include <linux/ioctl.h> +#include <linux/module.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/pagemap.h> +#include <linux/poll.h> +#include <linux/slab.h> +#include <linux/parser.h> + +#include <asm/prom.h> +#include <asm/spu.h> +#include <asm/spu_priv1.h> +#include <asm/uaccess.h> + +#include "spufs.h" + +struct spufs_sb_info { + int debug; +}; + +static struct kmem_cache *spufs_inode_cache; +char *isolated_loader; +static int isolated_loader_size; + +static struct spufs_sb_info *spufs_get_sb_info(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static struct inode * +spufs_alloc_inode(struct super_block *sb) +{ + struct spufs_inode_info *ei; + + ei = kmem_cache_alloc(spufs_inode_cache, GFP_KERNEL); + if (!ei) + return NULL; + + ei->i_gang = NULL; + ei->i_ctx = NULL; + ei->i_openers = 0; + + return &ei->vfs_inode; +} + +static void spufs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + kmem_cache_free(spufs_inode_cache, SPUFS_I(inode)); +} + +static void spufs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, spufs_i_callback); +} + +static void +spufs_init_once(void *p) +{ + struct spufs_inode_info *ei = p; + + inode_init_once(&ei->vfs_inode); +} + +static struct inode * +spufs_new_inode(struct super_block *sb, umode_t mode) +{ + struct inode *inode; + + inode = new_inode(sb); + if (!inode) + goto out; + + inode->i_ino = get_next_ino(); + inode->i_mode = mode; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; +out: + return inode; +} + +static int +spufs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = d_inode(dentry); + + if ((attr->ia_valid & ATTR_SIZE) && + (attr->ia_size != inode->i_size)) + return -EINVAL; + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; +} + + +static int +spufs_new_file(struct super_block *sb, struct dentry *dentry, + const struct file_operations *fops, umode_t mode, + size_t size, struct spu_context *ctx) +{ + static const struct inode_operations spufs_file_iops = { + .setattr = spufs_setattr, + }; + struct inode *inode; + int ret; + + ret = -ENOSPC; + inode = spufs_new_inode(sb, S_IFREG | mode); + if (!inode) + goto out; + + ret = 0; + inode->i_op = &spufs_file_iops; + inode->i_fop = fops; + inode->i_size = size; + inode->i_private = SPUFS_I(inode)->i_ctx = get_spu_context(ctx); + d_add(dentry, inode); +out: + return ret; +} + +static void +spufs_evict_inode(struct inode *inode) +{ + struct spufs_inode_info *ei = SPUFS_I(inode); + clear_inode(inode); + if (ei->i_ctx) + put_spu_context(ei->i_ctx); + if (ei->i_gang) + put_spu_gang(ei->i_gang); +} + +static void spufs_prune_dir(struct dentry *dir) +{ + struct dentry *dentry, *tmp; + + mutex_lock(&d_inode(dir)->i_mutex); + list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) { + spin_lock(&dentry->d_lock); + if (!(d_unhashed(dentry)) && d_really_is_positive(dentry)) { + dget_dlock(dentry); + __d_drop(dentry); + spin_unlock(&dentry->d_lock); + simple_unlink(d_inode(dir), dentry); + /* XXX: what was dcache_lock protecting here? Other + * filesystems (IB, configfs) release dcache_lock + * before unlink */ + dput(dentry); + } else { + spin_unlock(&dentry->d_lock); + } + } + shrink_dcache_parent(dir); + mutex_unlock(&d_inode(dir)->i_mutex); +} + +/* Caller must hold parent->i_mutex */ +static int spufs_rmdir(struct inode *parent, struct dentry *dir) +{ + /* remove all entries */ + int res; + spufs_prune_dir(dir); + d_drop(dir); + res = simple_rmdir(parent, dir); + /* We have to give up the mm_struct */ + spu_forget(SPUFS_I(d_inode(dir))->i_ctx); + return res; +} + +static int spufs_fill_dir(struct dentry *dir, + const struct spufs_tree_descr *files, umode_t mode, + struct spu_context *ctx) +{ + while (files->name && files->name[0]) { + int ret; + struct dentry *dentry = d_alloc_name(dir, files->name); + if (!dentry) + return -ENOMEM; + ret = spufs_new_file(dir->d_sb, dentry, files->ops, + files->mode & mode, files->size, ctx); + if (ret) + return ret; + files++; + } + return 0; +} + +static int spufs_dir_close(struct inode *inode, struct file *file) +{ + struct spu_context *ctx; + struct inode *parent; + struct dentry *dir; + int ret; + + dir = file->f_path.dentry; + parent = d_inode(dir->d_parent); + ctx = SPUFS_I(d_inode(dir))->i_ctx; + + mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT); + ret = spufs_rmdir(parent, dir); + mutex_unlock(&parent->i_mutex); + WARN_ON(ret); + + return dcache_dir_close(inode, file); +} + +const struct file_operations spufs_context_fops = { + .open = dcache_dir_open, + .release = spufs_dir_close, + .llseek = dcache_dir_lseek, + .read = generic_read_dir, + .iterate = dcache_readdir, + .fsync = noop_fsync, +}; +EXPORT_SYMBOL_GPL(spufs_context_fops); + +static int +spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags, + umode_t mode) +{ + int ret; + struct inode *inode; + struct spu_context *ctx; + + inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR); + if (!inode) + return -ENOSPC; + + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + inode->i_mode &= S_ISGID; + } + ctx = alloc_spu_context(SPUFS_I(dir)->i_gang); /* XXX gang */ + SPUFS_I(inode)->i_ctx = ctx; + if (!ctx) { + iput(inode); + return -ENOSPC; + } + + ctx->flags = flags; + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + mutex_lock(&inode->i_mutex); + + dget(dentry); + inc_nlink(dir); + inc_nlink(inode); + + d_instantiate(dentry, inode); + + if (flags & SPU_CREATE_NOSCHED) + ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents, + mode, ctx); + else + ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx); + + if (!ret && spufs_get_sb_info(dir->i_sb)->debug) + ret = spufs_fill_dir(dentry, spufs_dir_debug_contents, + mode, ctx); + + if (ret) + spufs_rmdir(dir, dentry); + + mutex_unlock(&inode->i_mutex); + + return ret; +} + +static int spufs_context_open(struct path *path) +{ + int ret; + struct file *filp; + + ret = get_unused_fd_flags(0); + if (ret < 0) + return ret; + + filp = dentry_open(path, O_RDONLY, current_cred()); + if (IS_ERR(filp)) { + put_unused_fd(ret); + return PTR_ERR(filp); + } + + filp->f_op = &spufs_context_fops; + fd_install(ret, filp); + return ret; +} + +static struct spu_context * +spufs_assert_affinity(unsigned int flags, struct spu_gang *gang, + struct file *filp) +{ + struct spu_context *tmp, *neighbor, *err; + int count, node; + int aff_supp; + + aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next, + struct spu, cbe_list))->aff_list); + + if (!aff_supp) + return ERR_PTR(-EINVAL); + + if (flags & SPU_CREATE_GANG) + return ERR_PTR(-EINVAL); + + if (flags & SPU_CREATE_AFFINITY_MEM && + gang->aff_ref_ctx && + gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM) + return ERR_PTR(-EEXIST); + + if (gang->aff_flags & AFF_MERGED) + return ERR_PTR(-EBUSY); + + neighbor = NULL; + if (flags & SPU_CREATE_AFFINITY_SPU) { + if (!filp || filp->f_op != &spufs_context_fops) + return ERR_PTR(-EINVAL); + + neighbor = get_spu_context( + SPUFS_I(file_inode(filp))->i_ctx); + + if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) && + !list_is_last(&neighbor->aff_list, &gang->aff_list_head) && + !list_entry(neighbor->aff_list.next, struct spu_context, + aff_list)->aff_head) { + err = ERR_PTR(-EEXIST); + goto out_put_neighbor; + } + + if (gang != neighbor->gang) { + err = ERR_PTR(-EINVAL); + goto out_put_neighbor; + } + + count = 1; + list_for_each_entry(tmp, &gang->aff_list_head, aff_list) + count++; + if (list_empty(&neighbor->aff_list)) + count++; + + for (node = 0; node < MAX_NUMNODES; node++) { + if ((cbe_spu_info[node].n_spus - atomic_read( + &cbe_spu_info[node].reserved_spus)) >= count) + break; + } + + if (node == MAX_NUMNODES) { + err = ERR_PTR(-EEXIST); + goto out_put_neighbor; + } + } + + return neighbor; + +out_put_neighbor: + put_spu_context(neighbor); + return err; +} + +static void +spufs_set_affinity(unsigned int flags, struct spu_context *ctx, + struct spu_context *neighbor) +{ + if (flags & SPU_CREATE_AFFINITY_MEM) + ctx->gang->aff_ref_ctx = ctx; + + if (flags & SPU_CREATE_AFFINITY_SPU) { + if (list_empty(&neighbor->aff_list)) { + list_add_tail(&neighbor->aff_list, + &ctx->gang->aff_list_head); + neighbor->aff_head = 1; + } + + if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head) + || list_entry(neighbor->aff_list.next, struct spu_context, + aff_list)->aff_head) { + list_add(&ctx->aff_list, &neighbor->aff_list); + } else { + list_add_tail(&ctx->aff_list, &neighbor->aff_list); + if (neighbor->aff_head) { + neighbor->aff_head = 0; + ctx->aff_head = 1; + } + } + + if (!ctx->gang->aff_ref_ctx) + ctx->gang->aff_ref_ctx = ctx; + } +} + +static int +spufs_create_context(struct inode *inode, struct dentry *dentry, + struct vfsmount *mnt, int flags, umode_t mode, + struct file *aff_filp) +{ + int ret; + int affinity; + struct spu_gang *gang; + struct spu_context *neighbor; + struct path path = {.mnt = mnt, .dentry = dentry}; + + if ((flags & SPU_CREATE_NOSCHED) && + !capable(CAP_SYS_NICE)) + return -EPERM; + + if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE)) + == SPU_CREATE_ISOLATE) + return -EINVAL; + + if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader) + return -ENODEV; + + gang = NULL; + neighbor = NULL; + affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU); + if (affinity) { + gang = SPUFS_I(inode)->i_gang; + if (!gang) + return -EINVAL; + mutex_lock(&gang->aff_mutex); + neighbor = spufs_assert_affinity(flags, gang, aff_filp); + if (IS_ERR(neighbor)) { + ret = PTR_ERR(neighbor); + goto out_aff_unlock; + } + } + + ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO); + if (ret) + goto out_aff_unlock; + + if (affinity) { + spufs_set_affinity(flags, SPUFS_I(d_inode(dentry))->i_ctx, + neighbor); + if (neighbor) + put_spu_context(neighbor); + } + + ret = spufs_context_open(&path); + if (ret < 0) + WARN_ON(spufs_rmdir(inode, dentry)); + +out_aff_unlock: + if (affinity) + mutex_unlock(&gang->aff_mutex); + return ret; +} + +static int +spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + int ret; + struct inode *inode; + struct spu_gang *gang; + + ret = -ENOSPC; + inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR); + if (!inode) + goto out; + + ret = 0; + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + inode->i_mode &= S_ISGID; + } + gang = alloc_spu_gang(); + SPUFS_I(inode)->i_ctx = NULL; + SPUFS_I(inode)->i_gang = gang; + if (!gang) + goto out_iput; + + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + d_instantiate(dentry, inode); + inc_nlink(dir); + inc_nlink(d_inode(dentry)); + return ret; + +out_iput: + iput(inode); +out: + return ret; +} + +static int spufs_gang_open(struct path *path) +{ + int ret; + struct file *filp; + + ret = get_unused_fd_flags(0); + if (ret < 0) + return ret; + + /* + * get references for dget and mntget, will be released + * in error path of *_open(). + */ + filp = dentry_open(path, O_RDONLY, current_cred()); + if (IS_ERR(filp)) { + put_unused_fd(ret); + return PTR_ERR(filp); + } + + filp->f_op = &simple_dir_operations; + fd_install(ret, filp); + return ret; +} + +static int spufs_create_gang(struct inode *inode, + struct dentry *dentry, + struct vfsmount *mnt, umode_t mode) +{ + struct path path = {.mnt = mnt, .dentry = dentry}; + int ret; + + ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO); + if (!ret) { + ret = spufs_gang_open(&path); + if (ret < 0) { + int err = simple_rmdir(inode, dentry); + WARN_ON(err); + } + } + return ret; +} + + +static struct file_system_type spufs_type; + +long spufs_create(struct path *path, struct dentry *dentry, + unsigned int flags, umode_t mode, struct file *filp) +{ + struct inode *dir = d_inode(path->dentry); + int ret; + + /* check if we are on spufs */ + if (path->dentry->d_sb->s_type != &spufs_type) + return -EINVAL; + + /* don't accept undefined flags */ + if (flags & (~SPU_CREATE_FLAG_ALL)) + return -EINVAL; + + /* only threads can be underneath a gang */ + if (path->dentry != path->dentry->d_sb->s_root) + if ((flags & SPU_CREATE_GANG) || !SPUFS_I(dir)->i_gang) + return -EINVAL; + + mode &= ~current_umask(); + + if (flags & SPU_CREATE_GANG) + ret = spufs_create_gang(dir, dentry, path->mnt, mode); + else + ret = spufs_create_context(dir, dentry, path->mnt, flags, mode, + filp); + if (ret >= 0) + fsnotify_mkdir(dir, dentry); + + return ret; +} + +/* File system initialization */ +enum { + Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err, +}; + +static const match_table_t spufs_tokens = { + { Opt_uid, "uid=%d" }, + { Opt_gid, "gid=%d" }, + { Opt_mode, "mode=%o" }, + { Opt_debug, "debug" }, + { Opt_err, NULL }, +}; + +static int +spufs_parse_options(struct super_block *sb, char *options, struct inode *root) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + + while ((p = strsep(&options, ",")) != NULL) { + int token, option; + + if (!*p) + continue; + + token = match_token(p, spufs_tokens, args); + switch (token) { + case Opt_uid: + if (match_int(&args[0], &option)) + return 0; + root->i_uid = make_kuid(current_user_ns(), option); + if (!uid_valid(root->i_uid)) + return 0; + break; + case Opt_gid: + if (match_int(&args[0], &option)) + return 0; + root->i_gid = make_kgid(current_user_ns(), option); + if (!gid_valid(root->i_gid)) + return 0; + break; + case Opt_mode: + if (match_octal(&args[0], &option)) + return 0; + root->i_mode = option | S_IFDIR; + break; + case Opt_debug: + spufs_get_sb_info(sb)->debug = 1; + break; + default: + return 0; + } + } + return 1; +} + +static void spufs_exit_isolated_loader(void) +{ + free_pages((unsigned long) isolated_loader, + get_order(isolated_loader_size)); +} + +static void +spufs_init_isolated_loader(void) +{ + struct device_node *dn; + const char *loader; + int size; + + dn = of_find_node_by_path("/spu-isolation"); + if (!dn) + return; + + loader = of_get_property(dn, "loader", &size); + if (!loader) + return; + + /* the loader must be align on a 16 byte boundary */ + isolated_loader = (char *)__get_free_pages(GFP_KERNEL, get_order(size)); + if (!isolated_loader) + return; + + isolated_loader_size = size; + memcpy(isolated_loader, loader, size); + printk(KERN_INFO "spufs: SPU isolation mode enabled\n"); +} + +static int +spufs_create_root(struct super_block *sb, void *data) +{ + struct inode *inode; + int ret; + + ret = -ENODEV; + if (!spu_management_ops) + goto out; + + ret = -ENOMEM; + inode = spufs_new_inode(sb, S_IFDIR | 0775); + if (!inode) + goto out; + + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + SPUFS_I(inode)->i_ctx = NULL; + inc_nlink(inode); + + ret = -EINVAL; + if (!spufs_parse_options(sb, data, inode)) + goto out_iput; + + ret = -ENOMEM; + sb->s_root = d_make_root(inode); + if (!sb->s_root) + goto out; + + return 0; +out_iput: + iput(inode); +out: + return ret; +} + +static int +spufs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct spufs_sb_info *info; + static const struct super_operations s_ops = { + .alloc_inode = spufs_alloc_inode, + .destroy_inode = spufs_destroy_inode, + .statfs = simple_statfs, + .evict_inode = spufs_evict_inode, + .show_options = generic_show_options, + }; + + save_mount_options(sb, data); + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = SPUFS_MAGIC; + sb->s_op = &s_ops; + sb->s_fs_info = info; + + return spufs_create_root(sb, data); +} + +static struct dentry * +spufs_mount(struct file_system_type *fstype, int flags, + const char *name, void *data) +{ + return mount_single(fstype, flags, data, spufs_fill_super); +} + +static struct file_system_type spufs_type = { + .owner = THIS_MODULE, + .name = "spufs", + .mount = spufs_mount, + .kill_sb = kill_litter_super, +}; +MODULE_ALIAS_FS("spufs"); + +static int __init spufs_init(void) +{ + int ret; + + ret = -ENODEV; + if (!spu_management_ops) + goto out; + + ret = -ENOMEM; + spufs_inode_cache = kmem_cache_create("spufs_inode_cache", + sizeof(struct spufs_inode_info), 0, + SLAB_HWCACHE_ALIGN, spufs_init_once); + + if (!spufs_inode_cache) + goto out; + ret = spu_sched_init(); + if (ret) + goto out_cache; + ret = register_spu_syscalls(&spufs_calls); + if (ret) + goto out_sched; + ret = register_filesystem(&spufs_type); + if (ret) + goto out_syscalls; + + spufs_init_isolated_loader(); + + return 0; + +out_syscalls: + unregister_spu_syscalls(&spufs_calls); +out_sched: + spu_sched_exit(); +out_cache: + kmem_cache_destroy(spufs_inode_cache); +out: + return ret; +} +module_init(spufs_init); + +static void __exit spufs_exit(void) +{ + spu_sched_exit(); + spufs_exit_isolated_loader(); + unregister_spu_syscalls(&spufs_calls); + unregister_filesystem(&spufs_type); + kmem_cache_destroy(spufs_inode_cache); +} +module_exit(spufs_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>"); + diff --git a/kernel/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/kernel/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c new file mode 100644 index 000000000..147069938 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c @@ -0,0 +1,183 @@ +/* + * SPU local store allocation routines + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> + +#include <asm/spu.h> +#include <asm/spu_csa.h> +#include <asm/mmu.h> + +#include "spufs.h" + +static int spu_alloc_lscsa_std(struct spu_state *csa) +{ + struct spu_lscsa *lscsa; + unsigned char *p; + + lscsa = vzalloc(sizeof(struct spu_lscsa)); + if (!lscsa) + return -ENOMEM; + csa->lscsa = lscsa; + + /* Set LS pages reserved to allow for user-space mapping. */ + for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE) + SetPageReserved(vmalloc_to_page(p)); + + return 0; +} + +static void spu_free_lscsa_std(struct spu_state *csa) +{ + /* Clear reserved bit before vfree. */ + unsigned char *p; + + if (csa->lscsa == NULL) + return; + + for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE) + ClearPageReserved(vmalloc_to_page(p)); + + vfree(csa->lscsa); +} + +#ifdef CONFIG_SPU_FS_64K_LS + +#define SPU_64K_PAGE_SHIFT 16 +#define SPU_64K_PAGE_ORDER (SPU_64K_PAGE_SHIFT - PAGE_SHIFT) +#define SPU_64K_PAGE_COUNT (1ul << SPU_64K_PAGE_ORDER) + +int spu_alloc_lscsa(struct spu_state *csa) +{ + struct page **pgarray; + unsigned char *p; + int i, j, n_4k; + + /* Check availability of 64K pages */ + if (!spu_64k_pages_available()) + goto fail; + + csa->use_big_pages = 1; + + pr_debug("spu_alloc_lscsa(csa=0x%p), trying to allocate 64K pages\n", + csa); + + /* First try to allocate our 64K pages. We need 5 of them + * with the current implementation. In the future, we should try + * to separate the lscsa with the actual local store image, thus + * allowing us to require only 4 64K pages per context + */ + for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) { + /* XXX This is likely to fail, we should use a special pool + * similar to what hugetlbfs does. + */ + csa->lscsa_pages[i] = alloc_pages(GFP_KERNEL, + SPU_64K_PAGE_ORDER); + if (csa->lscsa_pages[i] == NULL) + goto fail; + } + + pr_debug(" success ! creating vmap...\n"); + + /* Now we need to create a vmalloc mapping of these for the kernel + * and SPU context switch code to use. Currently, we stick to a + * normal kernel vmalloc mapping, which in our case will be 4K + */ + n_4k = SPU_64K_PAGE_COUNT * SPU_LSCSA_NUM_BIG_PAGES; + pgarray = kmalloc(sizeof(struct page *) * n_4k, GFP_KERNEL); + if (pgarray == NULL) + goto fail; + for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) + for (j = 0; j < SPU_64K_PAGE_COUNT; j++) + /* We assume all the struct page's are contiguous + * which should be hopefully the case for an order 4 + * allocation.. + */ + pgarray[i * SPU_64K_PAGE_COUNT + j] = + csa->lscsa_pages[i] + j; + csa->lscsa = vmap(pgarray, n_4k, VM_USERMAP, PAGE_KERNEL); + kfree(pgarray); + if (csa->lscsa == NULL) + goto fail; + + memset(csa->lscsa, 0, sizeof(struct spu_lscsa)); + + /* Set LS pages reserved to allow for user-space mapping. + * + * XXX isn't that a bit obsolete ? I think we should just + * make sure the page count is high enough. Anyway, won't harm + * for now + */ + for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE) + SetPageReserved(vmalloc_to_page(p)); + + pr_debug(" all good !\n"); + + return 0; +fail: + pr_debug("spufs: failed to allocate lscsa 64K pages, falling back\n"); + spu_free_lscsa(csa); + return spu_alloc_lscsa_std(csa); +} + +void spu_free_lscsa(struct spu_state *csa) +{ + unsigned char *p; + int i; + + if (!csa->use_big_pages) { + spu_free_lscsa_std(csa); + return; + } + csa->use_big_pages = 0; + + if (csa->lscsa == NULL) + goto free_pages; + + for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE) + ClearPageReserved(vmalloc_to_page(p)); + + vunmap(csa->lscsa); + csa->lscsa = NULL; + + free_pages: + + for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) + if (csa->lscsa_pages[i]) + __free_pages(csa->lscsa_pages[i], SPU_64K_PAGE_ORDER); +} + +#else /* CONFIG_SPU_FS_64K_LS */ + +int spu_alloc_lscsa(struct spu_state *csa) +{ + return spu_alloc_lscsa_std(csa); +} + +void spu_free_lscsa(struct spu_state *csa) +{ + spu_free_lscsa_std(csa); +} + +#endif /* !defined(CONFIG_SPU_FS_64K_LS) */ diff --git a/kernel/arch/powerpc/platforms/cell/spufs/run.c b/kernel/arch/powerpc/platforms/cell/spufs/run.c new file mode 100644 index 000000000..4ddf769a6 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/run.c @@ -0,0 +1,454 @@ +#define DEBUG + +#include <linux/wait.h> +#include <linux/ptrace.h> + +#include <asm/spu.h> +#include <asm/spu_priv1.h> +#include <asm/io.h> +#include <asm/unistd.h> + +#include "spufs.h" + +/* interrupt-level stop callback function. */ +void spufs_stop_callback(struct spu *spu, int irq) +{ + struct spu_context *ctx = spu->ctx; + + /* + * It should be impossible to preempt a context while an exception + * is being processed, since the context switch code is specially + * coded to deal with interrupts ... But, just in case, sanity check + * the context pointer. It is OK to return doing nothing since + * the exception will be regenerated when the context is resumed. + */ + if (ctx) { + /* Copy exception arguments into module specific structure */ + switch(irq) { + case 0 : + ctx->csa.class_0_pending = spu->class_0_pending; + ctx->csa.class_0_dar = spu->class_0_dar; + break; + case 1 : + ctx->csa.class_1_dsisr = spu->class_1_dsisr; + ctx->csa.class_1_dar = spu->class_1_dar; + break; + case 2 : + break; + } + + /* ensure that the exception status has hit memory before a + * thread waiting on the context's stop queue is woken */ + smp_wmb(); + + wake_up_all(&ctx->stop_wq); + } +} + +int spu_stopped(struct spu_context *ctx, u32 *stat) +{ + u64 dsisr; + u32 stopped; + + stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP | + SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP; + +top: + *stat = ctx->ops->status_read(ctx); + if (*stat & stopped) { + /* + * If the spu hasn't finished stopping, we need to + * re-read the register to get the stopped value. + */ + if (*stat & SPU_STATUS_RUNNING) + goto top; + return 1; + } + + if (test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags)) + return 1; + + dsisr = ctx->csa.class_1_dsisr; + if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)) + return 1; + + if (ctx->csa.class_0_pending) + return 1; + + return 0; +} + +static int spu_setup_isolated(struct spu_context *ctx) +{ + int ret; + u64 __iomem *mfc_cntl; + u64 sr1; + u32 status; + unsigned long timeout; + const u32 status_loading = SPU_STATUS_RUNNING + | SPU_STATUS_ISOLATED_STATE | SPU_STATUS_ISOLATED_LOAD_STATUS; + + ret = -ENODEV; + if (!isolated_loader) + goto out; + + /* + * We need to exclude userspace access to the context. + * + * To protect against memory access we invalidate all ptes + * and make sure the pagefault handlers block on the mutex. + */ + spu_unmap_mappings(ctx); + + mfc_cntl = &ctx->spu->priv2->mfc_control_RW; + + /* purge the MFC DMA queue to ensure no spurious accesses before we + * enter kernel mode */ + timeout = jiffies + HZ; + out_be64(mfc_cntl, MFC_CNTL_PURGE_DMA_REQUEST); + while ((in_be64(mfc_cntl) & MFC_CNTL_PURGE_DMA_STATUS_MASK) + != MFC_CNTL_PURGE_DMA_COMPLETE) { + if (time_after(jiffies, timeout)) { + printk(KERN_ERR "%s: timeout flushing MFC DMA queue\n", + __func__); + ret = -EIO; + goto out; + } + cond_resched(); + } + + /* clear purge status */ + out_be64(mfc_cntl, 0); + + /* put the SPE in kernel mode to allow access to the loader */ + sr1 = spu_mfc_sr1_get(ctx->spu); + sr1 &= ~MFC_STATE1_PROBLEM_STATE_MASK; + spu_mfc_sr1_set(ctx->spu, sr1); + + /* start the loader */ + ctx->ops->signal1_write(ctx, (unsigned long)isolated_loader >> 32); + ctx->ops->signal2_write(ctx, + (unsigned long)isolated_loader & 0xffffffff); + + ctx->ops->runcntl_write(ctx, + SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE); + + ret = 0; + timeout = jiffies + HZ; + while (((status = ctx->ops->status_read(ctx)) & status_loading) == + status_loading) { + if (time_after(jiffies, timeout)) { + printk(KERN_ERR "%s: timeout waiting for loader\n", + __func__); + ret = -EIO; + goto out_drop_priv; + } + cond_resched(); + } + + if (!(status & SPU_STATUS_RUNNING)) { + /* If isolated LOAD has failed: run SPU, we will get a stop-and + * signal later. */ + pr_debug("%s: isolated LOAD failed\n", __func__); + ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); + ret = -EACCES; + goto out_drop_priv; + } + + if (!(status & SPU_STATUS_ISOLATED_STATE)) { + /* This isn't allowed by the CBEA, but check anyway */ + pr_debug("%s: SPU fell out of isolated mode?\n", __func__); + ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_STOP); + ret = -EINVAL; + goto out_drop_priv; + } + +out_drop_priv: + /* Finished accessing the loader. Drop kernel mode */ + sr1 |= MFC_STATE1_PROBLEM_STATE_MASK; + spu_mfc_sr1_set(ctx->spu, sr1); + +out: + return ret; +} + +static int spu_run_init(struct spu_context *ctx, u32 *npc) +{ + unsigned long runcntl = SPU_RUNCNTL_RUNNABLE; + int ret; + + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + + /* + * NOSCHED is synchronous scheduling with respect to the caller. + * The caller waits for the context to be loaded. + */ + if (ctx->flags & SPU_CREATE_NOSCHED) { + if (ctx->state == SPU_STATE_SAVED) { + ret = spu_activate(ctx, 0); + if (ret) + return ret; + } + } + + /* + * Apply special setup as required. + */ + if (ctx->flags & SPU_CREATE_ISOLATE) { + if (!(ctx->ops->status_read(ctx) & SPU_STATUS_ISOLATED_STATE)) { + ret = spu_setup_isolated(ctx); + if (ret) + return ret; + } + + /* + * If userspace has set the runcntrl register (eg, to + * issue an isolated exit), we need to re-set it here + */ + runcntl = ctx->ops->runcntl_read(ctx) & + (SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE); + if (runcntl == 0) + runcntl = SPU_RUNCNTL_RUNNABLE; + } else { + unsigned long privcntl; + + if (test_thread_flag(TIF_SINGLESTEP)) + privcntl = SPU_PRIVCNTL_MODE_SINGLE_STEP; + else + privcntl = SPU_PRIVCNTL_MODE_NORMAL; + + ctx->ops->privcntl_write(ctx, privcntl); + ctx->ops->npc_write(ctx, *npc); + } + + ctx->ops->runcntl_write(ctx, runcntl); + + if (ctx->flags & SPU_CREATE_NOSCHED) { + spuctx_switch_state(ctx, SPU_UTIL_USER); + } else { + + if (ctx->state == SPU_STATE_SAVED) { + ret = spu_activate(ctx, 0); + if (ret) + return ret; + } else { + spuctx_switch_state(ctx, SPU_UTIL_USER); + } + } + + set_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags); + return 0; +} + +static int spu_run_fini(struct spu_context *ctx, u32 *npc, + u32 *status) +{ + int ret = 0; + + spu_del_from_rq(ctx); + + *status = ctx->ops->status_read(ctx); + *npc = ctx->ops->npc_read(ctx); + + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); + clear_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags); + spu_switch_log_notify(NULL, ctx, SWITCH_LOG_EXIT, *status); + spu_release(ctx); + + if (signal_pending(current)) + ret = -ERESTARTSYS; + + return ret; +} + +/* + * SPU syscall restarting is tricky because we violate the basic + * assumption that the signal handler is running on the interrupted + * thread. Here instead, the handler runs on PowerPC user space code, + * while the syscall was called from the SPU. + * This means we can only do a very rough approximation of POSIX + * signal semantics. + */ +static int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret, + unsigned int *npc) +{ + int ret; + + switch (*spu_ret) { + case -ERESTARTSYS: + case -ERESTARTNOINTR: + /* + * Enter the regular syscall restarting for + * sys_spu_run, then restart the SPU syscall + * callback. + */ + *npc -= 8; + ret = -ERESTARTSYS; + break; + case -ERESTARTNOHAND: + case -ERESTART_RESTARTBLOCK: + /* + * Restart block is too hard for now, just return -EINTR + * to the SPU. + * ERESTARTNOHAND comes from sys_pause, we also return + * -EINTR from there. + * Assume that we need to be restarted ourselves though. + */ + *spu_ret = -EINTR; + ret = -ERESTARTSYS; + break; + default: + printk(KERN_WARNING "%s: unexpected return code %ld\n", + __func__, *spu_ret); + ret = 0; + } + return ret; +} + +static int spu_process_callback(struct spu_context *ctx) +{ + struct spu_syscall_block s; + u32 ls_pointer, npc; + void __iomem *ls; + long spu_ret; + int ret; + + /* get syscall block from local store */ + npc = ctx->ops->npc_read(ctx) & ~3; + ls = (void __iomem *)ctx->ops->get_ls(ctx); + ls_pointer = in_be32(ls + npc); + if (ls_pointer > (LS_SIZE - sizeof(s))) + return -EFAULT; + memcpy_fromio(&s, ls + ls_pointer, sizeof(s)); + + /* do actual syscall without pinning the spu */ + ret = 0; + spu_ret = -ENOSYS; + npc += 4; + + if (s.nr_ret < __NR_syscalls) { + spu_release(ctx); + /* do actual system call from here */ + spu_ret = spu_sys_callback(&s); + if (spu_ret <= -ERESTARTSYS) { + ret = spu_handle_restartsys(ctx, &spu_ret, &npc); + } + mutex_lock(&ctx->state_mutex); + if (ret == -ERESTARTSYS) + return ret; + } + + /* need to re-get the ls, as it may have changed when we released the + * spu */ + ls = (void __iomem *)ctx->ops->get_ls(ctx); + + /* write result, jump over indirect pointer */ + memcpy_toio(ls + ls_pointer, &spu_ret, sizeof(spu_ret)); + ctx->ops->npc_write(ctx, npc); + ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); + return ret; +} + +long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) +{ + int ret; + struct spu *spu; + u32 status; + + if (mutex_lock_interruptible(&ctx->run_mutex)) + return -ERESTARTSYS; + + ctx->event_return = 0; + + ret = spu_acquire(ctx); + if (ret) + goto out_unlock; + + spu_enable_spu(ctx); + + spu_update_sched_info(ctx); + + ret = spu_run_init(ctx, npc); + if (ret) { + spu_release(ctx); + goto out; + } + + do { + ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status)); + if (unlikely(ret)) { + /* + * This is nasty: we need the state_mutex for all the + * bookkeeping even if the syscall was interrupted by + * a signal. ewww. + */ + mutex_lock(&ctx->state_mutex); + break; + } + spu = ctx->spu; + if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE, + &ctx->sched_flags))) { + if (!(status & SPU_STATUS_STOPPED_BY_STOP)) { + spu_switch_notify(spu, ctx); + continue; + } + } + + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + + if ((status & SPU_STATUS_STOPPED_BY_STOP) && + (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) { + ret = spu_process_callback(ctx); + if (ret) + break; + status &= ~SPU_STATUS_STOPPED_BY_STOP; + } + ret = spufs_handle_class1(ctx); + if (ret) + break; + + ret = spufs_handle_class0(ctx); + if (ret) + break; + + if (signal_pending(current)) + ret = -ERESTARTSYS; + } while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP | + SPU_STATUS_STOPPED_BY_HALT | + SPU_STATUS_SINGLE_STEP))); + + spu_disable_spu(ctx); + ret = spu_run_fini(ctx, npc, &status); + spu_yield(ctx); + + if ((status & SPU_STATUS_STOPPED_BY_STOP) && + (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100)) + ctx->stats.libassist++; + + if ((ret == 0) || + ((ret == -ERESTARTSYS) && + ((status & SPU_STATUS_STOPPED_BY_HALT) || + (status & SPU_STATUS_SINGLE_STEP) || + ((status & SPU_STATUS_STOPPED_BY_STOP) && + (status >> SPU_STOP_STATUS_SHIFT != 0x2104))))) + ret = status; + + /* Note: we don't need to force_sig SIGTRAP on single-step + * since we have TIF_SINGLESTEP set, thus the kernel will do + * it upon return from the syscall anyawy + */ + if (unlikely(status & SPU_STATUS_SINGLE_STEP)) + ret = -ERESTARTSYS; + + else if (unlikely((status & SPU_STATUS_STOPPED_BY_STOP) + && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff)) { + force_sig(SIGTRAP, current); + ret = -ERESTARTSYS; + } + +out: + *event = ctx->event_return; +out_unlock: + mutex_unlock(&ctx->run_mutex); + return ret; +} diff --git a/kernel/arch/powerpc/platforms/cell/spufs/sched.c b/kernel/arch/powerpc/platforms/cell/spufs/sched.c new file mode 100644 index 000000000..998f632e7 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/sched.c @@ -0,0 +1,1172 @@ +/* sched.c - SPU scheduler. + * + * Copyright (C) IBM 2005 + * Author: Mark Nutter <mnutter@us.ibm.com> + * + * 2006-03-31 NUMA domains added. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#undef DEBUG + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/sched/rt.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/completion.h> +#include <linux/vmalloc.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/numa.h> +#include <linux/mutex.h> +#include <linux/notifier.h> +#include <linux/kthread.h> +#include <linux/pid_namespace.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> + +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/spu.h> +#include <asm/spu_csa.h> +#include <asm/spu_priv1.h> +#include "spufs.h" +#define CREATE_TRACE_POINTS +#include "sputrace.h" + +struct spu_prio_array { + DECLARE_BITMAP(bitmap, MAX_PRIO); + struct list_head runq[MAX_PRIO]; + spinlock_t runq_lock; + int nr_waiting; +}; + +static unsigned long spu_avenrun[3]; +static struct spu_prio_array *spu_prio; +static struct task_struct *spusched_task; +static struct timer_list spusched_timer; +static struct timer_list spuloadavg_timer; + +/* + * Priority of a normal, non-rt, non-niced'd process (aka nice level 0). + */ +#define NORMAL_PRIO 120 + +/* + * Frequency of the spu scheduler tick. By default we do one SPU scheduler + * tick for every 10 CPU scheduler ticks. + */ +#define SPUSCHED_TICK (10) + +/* + * These are the 'tuning knobs' of the scheduler: + * + * Minimum timeslice is 5 msecs (or 1 spu scheduler tick, whichever is + * larger), default timeslice is 100 msecs, maximum timeslice is 800 msecs. + */ +#define MIN_SPU_TIMESLICE max(5 * HZ / (1000 * SPUSCHED_TICK), 1) +#define DEF_SPU_TIMESLICE (100 * HZ / (1000 * SPUSCHED_TICK)) + +#define SCALE_PRIO(x, prio) \ + max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_SPU_TIMESLICE) + +/* + * scale user-nice values [ -20 ... 0 ... 19 ] to time slice values: + * [800ms ... 100ms ... 5ms] + * + * The higher a thread's priority, the bigger timeslices + * it gets during one round of execution. But even the lowest + * priority thread gets MIN_TIMESLICE worth of execution time. + */ +void spu_set_timeslice(struct spu_context *ctx) +{ + if (ctx->prio < NORMAL_PRIO) + ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE * 4, ctx->prio); + else + ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE, ctx->prio); +} + +/* + * Update scheduling information from the owning thread. + */ +void __spu_update_sched_info(struct spu_context *ctx) +{ + /* + * assert that the context is not on the runqueue, so it is safe + * to change its scheduling parameters. + */ + BUG_ON(!list_empty(&ctx->rq)); + + /* + * 32-Bit assignments are atomic on powerpc, and we don't care about + * memory ordering here because retrieving the controlling thread is + * per definition racy. + */ + ctx->tid = current->pid; + + /* + * We do our own priority calculations, so we normally want + * ->static_prio to start with. Unfortunately this field + * contains junk for threads with a realtime scheduling + * policy so we have to look at ->prio in this case. + */ + if (rt_prio(current->prio)) + ctx->prio = current->prio; + else + ctx->prio = current->static_prio; + ctx->policy = current->policy; + + /* + * TO DO: the context may be loaded, so we may need to activate + * it again on a different node. But it shouldn't hurt anything + * to update its parameters, because we know that the scheduler + * is not actively looking at this field, since it is not on the + * runqueue. The context will be rescheduled on the proper node + * if it is timesliced or preempted. + */ + cpumask_copy(&ctx->cpus_allowed, tsk_cpus_allowed(current)); + + /* Save the current cpu id for spu interrupt routing. */ + ctx->last_ran = raw_smp_processor_id(); +} + +void spu_update_sched_info(struct spu_context *ctx) +{ + int node; + + if (ctx->state == SPU_STATE_RUNNABLE) { + node = ctx->spu->node; + + /* + * Take list_mutex to sync with find_victim(). + */ + mutex_lock(&cbe_spu_info[node].list_mutex); + __spu_update_sched_info(ctx); + mutex_unlock(&cbe_spu_info[node].list_mutex); + } else { + __spu_update_sched_info(ctx); + } +} + +static int __node_allowed(struct spu_context *ctx, int node) +{ + if (nr_cpus_node(node)) { + const struct cpumask *mask = cpumask_of_node(node); + + if (cpumask_intersects(mask, &ctx->cpus_allowed)) + return 1; + } + + return 0; +} + +static int node_allowed(struct spu_context *ctx, int node) +{ + int rval; + + spin_lock(&spu_prio->runq_lock); + rval = __node_allowed(ctx, node); + spin_unlock(&spu_prio->runq_lock); + + return rval; +} + +void do_notify_spus_active(void) +{ + int node; + + /* + * Wake up the active spu_contexts. + * + * When the awakened processes see their "notify_active" flag is set, + * they will call spu_switch_notify(). + */ + for_each_online_node(node) { + struct spu *spu; + + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->alloc_state != SPU_FREE) { + struct spu_context *ctx = spu->ctx; + set_bit(SPU_SCHED_NOTIFY_ACTIVE, + &ctx->sched_flags); + mb(); + wake_up_all(&ctx->stop_wq); + } + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + } +} + +/** + * spu_bind_context - bind spu context to physical spu + * @spu: physical spu to bind to + * @ctx: context to bind + */ +static void spu_bind_context(struct spu *spu, struct spu_context *ctx) +{ + spu_context_trace(spu_bind_context__enter, ctx, spu); + + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + + if (ctx->flags & SPU_CREATE_NOSCHED) + atomic_inc(&cbe_spu_info[spu->node].reserved_spus); + + ctx->stats.slb_flt_base = spu->stats.slb_flt; + ctx->stats.class2_intr_base = spu->stats.class2_intr; + + spu_associate_mm(spu, ctx->owner); + + spin_lock_irq(&spu->register_lock); + spu->ctx = ctx; + spu->flags = 0; + ctx->spu = spu; + ctx->ops = &spu_hw_ops; + spu->pid = current->pid; + spu->tgid = current->tgid; + spu->ibox_callback = spufs_ibox_callback; + spu->wbox_callback = spufs_wbox_callback; + spu->stop_callback = spufs_stop_callback; + spu->mfc_callback = spufs_mfc_callback; + spin_unlock_irq(&spu->register_lock); + + spu_unmap_mappings(ctx); + + spu_switch_log_notify(spu, ctx, SWITCH_LOG_START, 0); + spu_restore(&ctx->csa, spu); + spu->timestamp = jiffies; + spu_switch_notify(spu, ctx); + ctx->state = SPU_STATE_RUNNABLE; + + spuctx_switch_state(ctx, SPU_UTIL_USER); +} + +/* + * Must be used with the list_mutex held. + */ +static inline int sched_spu(struct spu *spu) +{ + BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex)); + + return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED)); +} + +static void aff_merge_remaining_ctxs(struct spu_gang *gang) +{ + struct spu_context *ctx; + + list_for_each_entry(ctx, &gang->aff_list_head, aff_list) { + if (list_empty(&ctx->aff_list)) + list_add(&ctx->aff_list, &gang->aff_list_head); + } + gang->aff_flags |= AFF_MERGED; +} + +static void aff_set_offsets(struct spu_gang *gang) +{ + struct spu_context *ctx; + int offset; + + offset = -1; + list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, + aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + ctx->aff_offset = offset--; + } + + offset = 0; + list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + ctx->aff_offset = offset++; + } + + gang->aff_flags |= AFF_OFFSETS_SET; +} + +static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, + int group_size, int lowest_offset) +{ + struct spu *spu; + int node, n; + + /* + * TODO: A better algorithm could be used to find a good spu to be + * used as reference location for the ctxs chain. + */ + node = cpu_to_node(raw_smp_processor_id()); + for (n = 0; n < MAX_NUMNODES; n++, node++) { + /* + * "available_spus" counts how many spus are not potentially + * going to be used by other affinity gangs whose reference + * context is already in place. Although this code seeks to + * avoid having affinity gangs with a summed amount of + * contexts bigger than the amount of spus in the node, + * this may happen sporadically. In this case, available_spus + * becomes negative, which is harmless. + */ + int available_spus; + + node = (node < MAX_NUMNODES) ? node : 0; + if (!node_allowed(ctx, node)) + continue; + + available_spus = 0; + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->ctx && spu->ctx->gang && !spu->ctx->aff_offset + && spu->ctx->gang->aff_ref_spu) + available_spus -= spu->ctx->gang->contexts; + available_spus++; + } + if (available_spus < ctx->gang->contexts) { + mutex_unlock(&cbe_spu_info[node].list_mutex); + continue; + } + + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if ((!mem_aff || spu->has_mem_affinity) && + sched_spu(spu)) { + mutex_unlock(&cbe_spu_info[node].list_mutex); + return spu; + } + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + } + return NULL; +} + +static void aff_set_ref_point_location(struct spu_gang *gang) +{ + int mem_aff, gs, lowest_offset; + struct spu_context *ctx; + struct spu *tmp; + + mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM; + lowest_offset = 0; + gs = 0; + + list_for_each_entry(tmp, &gang->aff_list_head, aff_list) + gs++; + + list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, + aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + lowest_offset = ctx->aff_offset; + } + + gang->aff_ref_spu = aff_ref_location(gang->aff_ref_ctx, mem_aff, gs, + lowest_offset); +} + +static struct spu *ctx_location(struct spu *ref, int offset, int node) +{ + struct spu *spu; + + spu = NULL; + if (offset >= 0) { + list_for_each_entry(spu, ref->aff_list.prev, aff_list) { + BUG_ON(spu->node != node); + if (offset == 0) + break; + if (sched_spu(spu)) + offset--; + } + } else { + list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) { + BUG_ON(spu->node != node); + if (offset == 0) + break; + if (sched_spu(spu)) + offset++; + } + } + + return spu; +} + +/* + * affinity_check is called each time a context is going to be scheduled. + * It returns the spu ptr on which the context must run. + */ +static int has_affinity(struct spu_context *ctx) +{ + struct spu_gang *gang = ctx->gang; + + if (list_empty(&ctx->aff_list)) + return 0; + + if (atomic_read(&ctx->gang->aff_sched_count) == 0) + ctx->gang->aff_ref_spu = NULL; + + if (!gang->aff_ref_spu) { + if (!(gang->aff_flags & AFF_MERGED)) + aff_merge_remaining_ctxs(gang); + if (!(gang->aff_flags & AFF_OFFSETS_SET)) + aff_set_offsets(gang); + aff_set_ref_point_location(gang); + } + + return gang->aff_ref_spu != NULL; +} + +/** + * spu_unbind_context - unbind spu context from physical spu + * @spu: physical spu to unbind from + * @ctx: context to unbind + */ +static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) +{ + u32 status; + + spu_context_trace(spu_unbind_context__enter, ctx, spu); + + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + + if (spu->ctx->flags & SPU_CREATE_NOSCHED) + atomic_dec(&cbe_spu_info[spu->node].reserved_spus); + + if (ctx->gang) + /* + * If ctx->gang->aff_sched_count is positive, SPU affinity is + * being considered in this gang. Using atomic_dec_if_positive + * allow us to skip an explicit check for affinity in this gang + */ + atomic_dec_if_positive(&ctx->gang->aff_sched_count); + + spu_switch_notify(spu, NULL); + spu_unmap_mappings(ctx); + spu_save(&ctx->csa, spu); + spu_switch_log_notify(spu, ctx, SWITCH_LOG_STOP, 0); + + spin_lock_irq(&spu->register_lock); + spu->timestamp = jiffies; + ctx->state = SPU_STATE_SAVED; + spu->ibox_callback = NULL; + spu->wbox_callback = NULL; + spu->stop_callback = NULL; + spu->mfc_callback = NULL; + spu->pid = 0; + spu->tgid = 0; + ctx->ops = &spu_backing_ops; + spu->flags = 0; + spu->ctx = NULL; + spin_unlock_irq(&spu->register_lock); + + spu_associate_mm(spu, NULL); + + ctx->stats.slb_flt += + (spu->stats.slb_flt - ctx->stats.slb_flt_base); + ctx->stats.class2_intr += + (spu->stats.class2_intr - ctx->stats.class2_intr_base); + + /* This maps the underlying spu state to idle */ + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); + ctx->spu = NULL; + + if (spu_stopped(ctx, &status)) + wake_up_all(&ctx->stop_wq); +} + +/** + * spu_add_to_rq - add a context to the runqueue + * @ctx: context to add + */ +static void __spu_add_to_rq(struct spu_context *ctx) +{ + /* + * Unfortunately this code path can be called from multiple threads + * on behalf of a single context due to the way the problem state + * mmap support works. + * + * Fortunately we need to wake up all these threads at the same time + * and can simply skip the runqueue addition for every but the first + * thread getting into this codepath. + * + * It's still quite hacky, and long-term we should proxy all other + * threads through the owner thread so that spu_run is in control + * of all the scheduling activity for a given context. + */ + if (list_empty(&ctx->rq)) { + list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]); + set_bit(ctx->prio, spu_prio->bitmap); + if (!spu_prio->nr_waiting++) + mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK); + } +} + +static void spu_add_to_rq(struct spu_context *ctx) +{ + spin_lock(&spu_prio->runq_lock); + __spu_add_to_rq(ctx); + spin_unlock(&spu_prio->runq_lock); +} + +static void __spu_del_from_rq(struct spu_context *ctx) +{ + int prio = ctx->prio; + + if (!list_empty(&ctx->rq)) { + if (!--spu_prio->nr_waiting) + del_timer(&spusched_timer); + list_del_init(&ctx->rq); + + if (list_empty(&spu_prio->runq[prio])) + clear_bit(prio, spu_prio->bitmap); + } +} + +void spu_del_from_rq(struct spu_context *ctx) +{ + spin_lock(&spu_prio->runq_lock); + __spu_del_from_rq(ctx); + spin_unlock(&spu_prio->runq_lock); +} + +static void spu_prio_wait(struct spu_context *ctx) +{ + DEFINE_WAIT(wait); + + /* + * The caller must explicitly wait for a context to be loaded + * if the nosched flag is set. If NOSCHED is not set, the caller + * queues the context and waits for an spu event or error. + */ + BUG_ON(!(ctx->flags & SPU_CREATE_NOSCHED)); + + spin_lock(&spu_prio->runq_lock); + prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE); + if (!signal_pending(current)) { + __spu_add_to_rq(ctx); + spin_unlock(&spu_prio->runq_lock); + mutex_unlock(&ctx->state_mutex); + schedule(); + mutex_lock(&ctx->state_mutex); + spin_lock(&spu_prio->runq_lock); + __spu_del_from_rq(ctx); + } + spin_unlock(&spu_prio->runq_lock); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&ctx->stop_wq, &wait); +} + +static struct spu *spu_get_idle(struct spu_context *ctx) +{ + struct spu *spu, *aff_ref_spu; + int node, n; + + spu_context_nospu_trace(spu_get_idle__enter, ctx); + + if (ctx->gang) { + mutex_lock(&ctx->gang->aff_mutex); + if (has_affinity(ctx)) { + aff_ref_spu = ctx->gang->aff_ref_spu; + atomic_inc(&ctx->gang->aff_sched_count); + mutex_unlock(&ctx->gang->aff_mutex); + node = aff_ref_spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); + spu = ctx_location(aff_ref_spu, ctx->aff_offset, node); + if (spu && spu->alloc_state == SPU_FREE) + goto found; + mutex_unlock(&cbe_spu_info[node].list_mutex); + + atomic_dec(&ctx->gang->aff_sched_count); + goto not_found; + } + mutex_unlock(&ctx->gang->aff_mutex); + } + node = cpu_to_node(raw_smp_processor_id()); + for (n = 0; n < MAX_NUMNODES; n++, node++) { + node = (node < MAX_NUMNODES) ? node : 0; + if (!node_allowed(ctx, node)) + continue; + + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->alloc_state == SPU_FREE) + goto found; + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + } + + not_found: + spu_context_nospu_trace(spu_get_idle__not_found, ctx); + return NULL; + + found: + spu->alloc_state = SPU_USED; + mutex_unlock(&cbe_spu_info[node].list_mutex); + spu_context_trace(spu_get_idle__found, ctx, spu); + spu_init_channels(spu); + return spu; +} + +/** + * find_victim - find a lower priority context to preempt + * @ctx: canidate context for running + * + * Returns the freed physical spu to run the new context on. + */ +static struct spu *find_victim(struct spu_context *ctx) +{ + struct spu_context *victim = NULL; + struct spu *spu; + int node, n; + + spu_context_nospu_trace(spu_find_victim__enter, ctx); + + /* + * Look for a possible preemption candidate on the local node first. + * If there is no candidate look at the other nodes. This isn't + * exactly fair, but so far the whole spu scheduler tries to keep + * a strong node affinity. We might want to fine-tune this in + * the future. + */ + restart: + node = cpu_to_node(raw_smp_processor_id()); + for (n = 0; n < MAX_NUMNODES; n++, node++) { + node = (node < MAX_NUMNODES) ? node : 0; + if (!node_allowed(ctx, node)) + continue; + + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + struct spu_context *tmp = spu->ctx; + + if (tmp && tmp->prio > ctx->prio && + !(tmp->flags & SPU_CREATE_NOSCHED) && + (!victim || tmp->prio > victim->prio)) { + victim = spu->ctx; + } + } + if (victim) + get_spu_context(victim); + mutex_unlock(&cbe_spu_info[node].list_mutex); + + if (victim) { + /* + * This nests ctx->state_mutex, but we always lock + * higher priority contexts before lower priority + * ones, so this is safe until we introduce + * priority inheritance schemes. + * + * XXX if the highest priority context is locked, + * this can loop a long time. Might be better to + * look at another context or give up after X retries. + */ + if (!mutex_trylock(&victim->state_mutex)) { + put_spu_context(victim); + victim = NULL; + goto restart; + } + + spu = victim->spu; + if (!spu || victim->prio <= ctx->prio) { + /* + * This race can happen because we've dropped + * the active list mutex. Not a problem, just + * restart the search. + */ + mutex_unlock(&victim->state_mutex); + put_spu_context(victim); + victim = NULL; + goto restart; + } + + spu_context_trace(__spu_deactivate__unload, ctx, spu); + + mutex_lock(&cbe_spu_info[node].list_mutex); + cbe_spu_info[node].nr_active--; + spu_unbind_context(spu, victim); + mutex_unlock(&cbe_spu_info[node].list_mutex); + + victim->stats.invol_ctx_switch++; + spu->stats.invol_ctx_switch++; + if (test_bit(SPU_SCHED_SPU_RUN, &victim->sched_flags)) + spu_add_to_rq(victim); + + mutex_unlock(&victim->state_mutex); + put_spu_context(victim); + + return spu; + } + } + + return NULL; +} + +static void __spu_schedule(struct spu *spu, struct spu_context *ctx) +{ + int node = spu->node; + int success = 0; + + spu_set_timeslice(ctx); + + mutex_lock(&cbe_spu_info[node].list_mutex); + if (spu->ctx == NULL) { + spu_bind_context(spu, ctx); + cbe_spu_info[node].nr_active++; + spu->alloc_state = SPU_USED; + success = 1; + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + + if (success) + wake_up_all(&ctx->run_wq); + else + spu_add_to_rq(ctx); +} + +static void spu_schedule(struct spu *spu, struct spu_context *ctx) +{ + /* not a candidate for interruptible because it's called either + from the scheduler thread or from spu_deactivate */ + mutex_lock(&ctx->state_mutex); + if (ctx->state == SPU_STATE_SAVED) + __spu_schedule(spu, ctx); + spu_release(ctx); +} + +/** + * spu_unschedule - remove a context from a spu, and possibly release it. + * @spu: The SPU to unschedule from + * @ctx: The context currently scheduled on the SPU + * @free_spu Whether to free the SPU for other contexts + * + * Unbinds the context @ctx from the SPU @spu. If @free_spu is non-zero, the + * SPU is made available for other contexts (ie, may be returned by + * spu_get_idle). If this is zero, the caller is expected to schedule another + * context to this spu. + * + * Should be called with ctx->state_mutex held. + */ +static void spu_unschedule(struct spu *spu, struct spu_context *ctx, + int free_spu) +{ + int node = spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); + cbe_spu_info[node].nr_active--; + if (free_spu) + spu->alloc_state = SPU_FREE; + spu_unbind_context(spu, ctx); + ctx->stats.invol_ctx_switch++; + spu->stats.invol_ctx_switch++; + mutex_unlock(&cbe_spu_info[node].list_mutex); +} + +/** + * spu_activate - find a free spu for a context and execute it + * @ctx: spu context to schedule + * @flags: flags (currently ignored) + * + * Tries to find a free spu to run @ctx. If no free spu is available + * add the context to the runqueue so it gets woken up once an spu + * is available. + */ +int spu_activate(struct spu_context *ctx, unsigned long flags) +{ + struct spu *spu; + + /* + * If there are multiple threads waiting for a single context + * only one actually binds the context while the others will + * only be able to acquire the state_mutex once the context + * already is in runnable state. + */ + if (ctx->spu) + return 0; + +spu_activate_top: + if (signal_pending(current)) + return -ERESTARTSYS; + + spu = spu_get_idle(ctx); + /* + * If this is a realtime thread we try to get it running by + * preempting a lower priority thread. + */ + if (!spu && rt_prio(ctx->prio)) + spu = find_victim(ctx); + if (spu) { + unsigned long runcntl; + + runcntl = ctx->ops->runcntl_read(ctx); + __spu_schedule(spu, ctx); + if (runcntl & SPU_RUNCNTL_RUNNABLE) + spuctx_switch_state(ctx, SPU_UTIL_USER); + + return 0; + } + + if (ctx->flags & SPU_CREATE_NOSCHED) { + spu_prio_wait(ctx); + goto spu_activate_top; + } + + spu_add_to_rq(ctx); + + return 0; +} + +/** + * grab_runnable_context - try to find a runnable context + * + * Remove the highest priority context on the runqueue and return it + * to the caller. Returns %NULL if no runnable context was found. + */ +static struct spu_context *grab_runnable_context(int prio, int node) +{ + struct spu_context *ctx; + int best; + + spin_lock(&spu_prio->runq_lock); + best = find_first_bit(spu_prio->bitmap, prio); + while (best < prio) { + struct list_head *rq = &spu_prio->runq[best]; + + list_for_each_entry(ctx, rq, rq) { + /* XXX(hch): check for affinity here as well */ + if (__node_allowed(ctx, node)) { + __spu_del_from_rq(ctx); + goto found; + } + } + best++; + } + ctx = NULL; + found: + spin_unlock(&spu_prio->runq_lock); + return ctx; +} + +static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) +{ + struct spu *spu = ctx->spu; + struct spu_context *new = NULL; + + if (spu) { + new = grab_runnable_context(max_prio, spu->node); + if (new || force) { + spu_unschedule(spu, ctx, new == NULL); + if (new) { + if (new->flags & SPU_CREATE_NOSCHED) + wake_up(&new->stop_wq); + else { + spu_release(ctx); + spu_schedule(spu, new); + /* this one can't easily be made + interruptible */ + mutex_lock(&ctx->state_mutex); + } + } + } + } + + return new != NULL; +} + +/** + * spu_deactivate - unbind a context from it's physical spu + * @ctx: spu context to unbind + * + * Unbind @ctx from the physical spu it is running on and schedule + * the highest priority context to run on the freed physical spu. + */ +void spu_deactivate(struct spu_context *ctx) +{ + spu_context_nospu_trace(spu_deactivate__enter, ctx); + __spu_deactivate(ctx, 1, MAX_PRIO); +} + +/** + * spu_yield - yield a physical spu if others are waiting + * @ctx: spu context to yield + * + * Check if there is a higher priority context waiting and if yes + * unbind @ctx from the physical spu and schedule the highest + * priority context to run on the freed physical spu instead. + */ +void spu_yield(struct spu_context *ctx) +{ + spu_context_nospu_trace(spu_yield__enter, ctx); + if (!(ctx->flags & SPU_CREATE_NOSCHED)) { + mutex_lock(&ctx->state_mutex); + __spu_deactivate(ctx, 0, MAX_PRIO); + mutex_unlock(&ctx->state_mutex); + } +} + +static noinline void spusched_tick(struct spu_context *ctx) +{ + struct spu_context *new = NULL; + struct spu *spu = NULL; + + if (spu_acquire(ctx)) + BUG(); /* a kernel thread never has signals pending */ + + if (ctx->state != SPU_STATE_RUNNABLE) + goto out; + if (ctx->flags & SPU_CREATE_NOSCHED) + goto out; + if (ctx->policy == SCHED_FIFO) + goto out; + + if (--ctx->time_slice && test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags)) + goto out; + + spu = ctx->spu; + + spu_context_trace(spusched_tick__preempt, ctx, spu); + + new = grab_runnable_context(ctx->prio + 1, spu->node); + if (new) { + spu_unschedule(spu, ctx, 0); + if (test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags)) + spu_add_to_rq(ctx); + } else { + spu_context_nospu_trace(spusched_tick__newslice, ctx); + if (!ctx->time_slice) + ctx->time_slice++; + } +out: + spu_release(ctx); + + if (new) + spu_schedule(spu, new); +} + +/** + * count_active_contexts - count nr of active tasks + * + * Return the number of tasks currently running or waiting to run. + * + * Note that we don't take runq_lock / list_mutex here. Reading + * a single 32bit value is atomic on powerpc, and we don't care + * about memory ordering issues here. + */ +static unsigned long count_active_contexts(void) +{ + int nr_active = 0, node; + + for (node = 0; node < MAX_NUMNODES; node++) + nr_active += cbe_spu_info[node].nr_active; + nr_active += spu_prio->nr_waiting; + + return nr_active; +} + +/** + * spu_calc_load - update the avenrun load estimates. + * + * No locking against reading these values from userspace, as for + * the CPU loadavg code. + */ +static void spu_calc_load(void) +{ + unsigned long active_tasks; /* fixed-point */ + + active_tasks = count_active_contexts() * FIXED_1; + CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks); + CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks); + CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks); +} + +static void spusched_wake(unsigned long data) +{ + mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK); + wake_up_process(spusched_task); +} + +static void spuloadavg_wake(unsigned long data) +{ + mod_timer(&spuloadavg_timer, jiffies + LOAD_FREQ); + spu_calc_load(); +} + +static int spusched_thread(void *unused) +{ + struct spu *spu; + int node; + + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + for (node = 0; node < MAX_NUMNODES; node++) { + struct mutex *mtx = &cbe_spu_info[node].list_mutex; + + mutex_lock(mtx); + list_for_each_entry(spu, &cbe_spu_info[node].spus, + cbe_list) { + struct spu_context *ctx = spu->ctx; + + if (ctx) { + get_spu_context(ctx); + mutex_unlock(mtx); + spusched_tick(ctx); + mutex_lock(mtx); + put_spu_context(ctx); + } + } + mutex_unlock(mtx); + } + } + + return 0; +} + +void spuctx_switch_state(struct spu_context *ctx, + enum spu_utilization_state new_state) +{ + unsigned long long curtime; + signed long long delta; + struct spu *spu; + enum spu_utilization_state old_state; + int node; + + curtime = ktime_get_ns(); + delta = curtime - ctx->stats.tstamp; + + WARN_ON(!mutex_is_locked(&ctx->state_mutex)); + WARN_ON(delta < 0); + + spu = ctx->spu; + old_state = ctx->stats.util_state; + ctx->stats.util_state = new_state; + ctx->stats.tstamp = curtime; + + /* + * Update the physical SPU utilization statistics. + */ + if (spu) { + ctx->stats.times[old_state] += delta; + spu->stats.times[old_state] += delta; + spu->stats.util_state = new_state; + spu->stats.tstamp = curtime; + node = spu->node; + if (old_state == SPU_UTIL_USER) + atomic_dec(&cbe_spu_info[node].busy_spus); + if (new_state == SPU_UTIL_USER) + atomic_inc(&cbe_spu_info[node].busy_spus); + } +} + +#define LOAD_INT(x) ((x) >> FSHIFT) +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) + +static int show_spu_loadavg(struct seq_file *s, void *private) +{ + int a, b, c; + + a = spu_avenrun[0] + (FIXED_1/200); + b = spu_avenrun[1] + (FIXED_1/200); + c = spu_avenrun[2] + (FIXED_1/200); + + /* + * Note that last_pid doesn't really make much sense for the + * SPU loadavg (it even seems very odd on the CPU side...), + * but we include it here to have a 100% compatible interface. + */ + seq_printf(s, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n", + LOAD_INT(a), LOAD_FRAC(a), + LOAD_INT(b), LOAD_FRAC(b), + LOAD_INT(c), LOAD_FRAC(c), + count_active_contexts(), + atomic_read(&nr_spu_contexts), + task_active_pid_ns(current)->last_pid); + return 0; +} + +static int spu_loadavg_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_spu_loadavg, NULL); +} + +static const struct file_operations spu_loadavg_fops = { + .open = spu_loadavg_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int __init spu_sched_init(void) +{ + struct proc_dir_entry *entry; + int err = -ENOMEM, i; + + spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL); + if (!spu_prio) + goto out; + + for (i = 0; i < MAX_PRIO; i++) { + INIT_LIST_HEAD(&spu_prio->runq[i]); + __clear_bit(i, spu_prio->bitmap); + } + spin_lock_init(&spu_prio->runq_lock); + + setup_timer(&spusched_timer, spusched_wake, 0); + setup_timer(&spuloadavg_timer, spuloadavg_wake, 0); + + spusched_task = kthread_run(spusched_thread, NULL, "spusched"); + if (IS_ERR(spusched_task)) { + err = PTR_ERR(spusched_task); + goto out_free_spu_prio; + } + + mod_timer(&spuloadavg_timer, 0); + + entry = proc_create("spu_loadavg", 0, NULL, &spu_loadavg_fops); + if (!entry) + goto out_stop_kthread; + + pr_debug("spusched: tick: %d, min ticks: %d, default ticks: %d\n", + SPUSCHED_TICK, MIN_SPU_TIMESLICE, DEF_SPU_TIMESLICE); + return 0; + + out_stop_kthread: + kthread_stop(spusched_task); + out_free_spu_prio: + kfree(spu_prio); + out: + return err; +} + +void spu_sched_exit(void) +{ + struct spu *spu; + int node; + + remove_proc_entry("spu_loadavg", NULL); + + del_timer_sync(&spusched_timer); + del_timer_sync(&spuloadavg_timer); + kthread_stop(spusched_task); + + for (node = 0; node < MAX_NUMNODES; node++) { + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) + if (spu->alloc_state != SPU_FREE) + spu->alloc_state = SPU_FREE; + mutex_unlock(&cbe_spu_info[node].list_mutex); + } + kfree(spu_prio); +} diff --git a/kernel/arch/powerpc/platforms/cell/spufs/spu_restore.c b/kernel/arch/powerpc/platforms/cell/spufs/spu_restore.c new file mode 100644 index 000000000..72c905f1e --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/spu_restore.c @@ -0,0 +1,336 @@ +/* + * spu_restore.c + * + * (C) Copyright IBM Corp. 2005 + * + * SPU-side context restore sequence outlined in + * Synergistic Processor Element Book IV + * + * Author: Mark Nutter <mnutter@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + + +#ifndef LS_SIZE +#define LS_SIZE 0x40000 /* 256K (in bytes) */ +#endif + +typedef unsigned int u32; +typedef unsigned long long u64; + +#include <spu_intrinsics.h> +#include <asm/spu_csa.h> +#include "spu_utils.h" + +#define BR_INSTR 0x327fff80 /* br -4 */ +#define NOP_INSTR 0x40200000 /* nop */ +#define HEQ_INSTR 0x7b000000 /* heq $0, $0 */ +#define STOP_INSTR 0x00000000 /* stop 0x0 */ +#define ILLEGAL_INSTR 0x00800000 /* illegal instr */ +#define RESTORE_COMPLETE 0x00003ffc /* stop 0x3ffc */ + +static inline void fetch_regs_from_mem(addr64 lscsa_ea) +{ + unsigned int ls = (unsigned int)®s_spill[0]; + unsigned int size = sizeof(regs_spill); + unsigned int tag_id = 0; + unsigned int cmd = 0x40; /* GET */ + + spu_writech(MFC_LSA, ls); + spu_writech(MFC_EAH, lscsa_ea.ui[0]); + spu_writech(MFC_EAL, lscsa_ea.ui[1]); + spu_writech(MFC_Size, size); + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void restore_upper_240kb(addr64 lscsa_ea) +{ + unsigned int ls = 16384; + unsigned int list = (unsigned int)&dma_list[0]; + unsigned int size = sizeof(dma_list); + unsigned int tag_id = 0; + unsigned int cmd = 0x44; /* GETL */ + + /* Restore, Step 4: + * Enqueue the GETL command (tag 0) to the MFC SPU command + * queue to transfer the upper 240 kb of LS from CSA. + */ + spu_writech(MFC_LSA, ls); + spu_writech(MFC_EAH, lscsa_ea.ui[0]); + spu_writech(MFC_EAL, list); + spu_writech(MFC_Size, size); + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void restore_decr(void) +{ + unsigned int offset; + unsigned int decr_running; + unsigned int decr; + + /* Restore, Step 6(moved): + * If the LSCSA "decrementer running" flag is set + * then write the SPU_WrDec channel with the + * decrementer value from LSCSA. + */ + offset = LSCSA_QW_OFFSET(decr_status); + decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING; + if (decr_running) { + offset = LSCSA_QW_OFFSET(decr); + decr = regs_spill[offset].slot[0]; + spu_writech(SPU_WrDec, decr); + } +} + +static inline void write_ppu_mb(void) +{ + unsigned int offset; + unsigned int data; + + /* Restore, Step 11: + * Write the MFC_WrOut_MB channel with the PPU_MB + * data from LSCSA. + */ + offset = LSCSA_QW_OFFSET(ppu_mb); + data = regs_spill[offset].slot[0]; + spu_writech(SPU_WrOutMbox, data); +} + +static inline void write_ppuint_mb(void) +{ + unsigned int offset; + unsigned int data; + + /* Restore, Step 12: + * Write the MFC_WrInt_MB channel with the PPUINT_MB + * data from LSCSA. + */ + offset = LSCSA_QW_OFFSET(ppuint_mb); + data = regs_spill[offset].slot[0]; + spu_writech(SPU_WrOutIntrMbox, data); +} + +static inline void restore_fpcr(void) +{ + unsigned int offset; + vector unsigned int fpcr; + + /* Restore, Step 13: + * Restore the floating-point status and control + * register from the LSCSA. + */ + offset = LSCSA_QW_OFFSET(fpcr); + fpcr = regs_spill[offset].v; + spu_mtfpscr(fpcr); +} + +static inline void restore_srr0(void) +{ + unsigned int offset; + unsigned int srr0; + + /* Restore, Step 14: + * Restore the SPU SRR0 data from the LSCSA. + */ + offset = LSCSA_QW_OFFSET(srr0); + srr0 = regs_spill[offset].slot[0]; + spu_writech(SPU_WrSRR0, srr0); +} + +static inline void restore_event_mask(void) +{ + unsigned int offset; + unsigned int event_mask; + + /* Restore, Step 15: + * Restore the SPU_RdEventMsk data from the LSCSA. + */ + offset = LSCSA_QW_OFFSET(event_mask); + event_mask = regs_spill[offset].slot[0]; + spu_writech(SPU_WrEventMask, event_mask); +} + +static inline void restore_tag_mask(void) +{ + unsigned int offset; + unsigned int tag_mask; + + /* Restore, Step 16: + * Restore the SPU_RdTagMsk data from the LSCSA. + */ + offset = LSCSA_QW_OFFSET(tag_mask); + tag_mask = regs_spill[offset].slot[0]; + spu_writech(MFC_WrTagMask, tag_mask); +} + +static inline void restore_complete(void) +{ + extern void exit_fini(void); + unsigned int *exit_instrs = (unsigned int *)exit_fini; + unsigned int offset; + unsigned int stopped_status; + unsigned int stopped_code; + + /* Restore, Step 18: + * Issue a stop-and-signal instruction with + * "good context restore" signal value. + * + * Restore, Step 19: + * There may be additional instructions placed + * here by the PPE Sequence for SPU Context + * Restore in order to restore the correct + * "stopped state". + * + * This step is handled here by analyzing the + * LSCSA.stopped_status and then modifying the + * exit() function to behave appropriately. + */ + + offset = LSCSA_QW_OFFSET(stopped_status); + stopped_status = regs_spill[offset].slot[0]; + stopped_code = regs_spill[offset].slot[1]; + + switch (stopped_status) { + case SPU_STOPPED_STATUS_P_I: + /* SPU_Status[P,I]=1. Add illegal instruction + * followed by stop-and-signal instruction after + * end of restore code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = ILLEGAL_INSTR; + exit_instrs[2] = STOP_INSTR | stopped_code; + break; + case SPU_STOPPED_STATUS_P_H: + /* SPU_Status[P,H]=1. Add 'heq $0, $0' followed + * by stop-and-signal instruction after end of + * restore code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = HEQ_INSTR; + exit_instrs[2] = STOP_INSTR | stopped_code; + break; + case SPU_STOPPED_STATUS_S_P: + /* SPU_Status[S,P]=1. Add nop instruction + * followed by 'br -4' after end of restore + * code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = STOP_INSTR | stopped_code; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + case SPU_STOPPED_STATUS_S_I: + /* SPU_Status[S,I]=1. Add illegal instruction + * followed by 'br -4' after end of restore code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = ILLEGAL_INSTR; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + case SPU_STOPPED_STATUS_I: + /* SPU_Status[I]=1. Add illegal instruction followed + * by infinite loop after end of restore sequence. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = ILLEGAL_INSTR; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + case SPU_STOPPED_STATUS_S: + /* SPU_Status[S]=1. Add two 'nop' instructions. */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = NOP_INSTR; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + case SPU_STOPPED_STATUS_H: + /* SPU_Status[H]=1. Add 'heq $0, $0' instruction + * after end of restore code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = HEQ_INSTR; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + case SPU_STOPPED_STATUS_P: + /* SPU_Status[P]=1. Add stop-and-signal instruction + * after end of restore code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = STOP_INSTR | stopped_code; + break; + case SPU_STOPPED_STATUS_R: + /* SPU_Status[I,S,H,P,R]=0. Add infinite loop. */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = NOP_INSTR; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + default: + /* SPU_Status[R]=1. No additional instructions. */ + break; + } + spu_sync(); +} + +/** + * main - entry point for SPU-side context restore. + * + * This code deviates from the documented sequence in the + * following aspects: + * + * 1. The EA for LSCSA is passed from PPE in the + * signal notification channels. + * 2. The register spill area is pulled by SPU + * into LS, rather than pushed by PPE. + * 3. All 128 registers are restored by exit(). + * 4. The exit() function is modified at run + * time in order to properly restore the + * SPU_Status register. + */ +int main() +{ + addr64 lscsa_ea; + + lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1); + lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2); + fetch_regs_from_mem(lscsa_ea); + + set_event_mask(); /* Step 1. */ + set_tag_mask(); /* Step 2. */ + build_dma_list(lscsa_ea); /* Step 3. */ + restore_upper_240kb(lscsa_ea); /* Step 4. */ + /* Step 5: done by 'exit'. */ + enqueue_putllc(lscsa_ea); /* Step 7. */ + set_tag_update(); /* Step 8. */ + read_tag_status(); /* Step 9. */ + restore_decr(); /* moved Step 6. */ + read_llar_status(); /* Step 10. */ + write_ppu_mb(); /* Step 11. */ + write_ppuint_mb(); /* Step 12. */ + restore_fpcr(); /* Step 13. */ + restore_srr0(); /* Step 14. */ + restore_event_mask(); /* Step 15. */ + restore_tag_mask(); /* Step 16. */ + /* Step 17. done by 'exit'. */ + restore_complete(); /* Step 18. */ + + return 0; +} diff --git a/kernel/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S b/kernel/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S new file mode 100644 index 000000000..2905949de --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S @@ -0,0 +1,116 @@ +/* + * crt0_r.S: Entry function for SPU-side context restore. + * + * Copyright (C) 2005 IBM + * + * Entry and exit function for SPU-side of the context restore + * sequence. Sets up an initial stack frame, then branches to + * 'main'. On return, restores all 128 registers from the LSCSA + * and exits. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <asm/spu_csa.h> + +.data +.align 7 +.globl regs_spill +regs_spill: +.space SIZEOF_SPU_SPILL_REGS, 0x0 + +.text +.global _start +_start: + /* Initialize the stack pointer to point to 16368 + * (16kb-16). The back chain pointer is initialized + * to NULL. + */ + il $0, 0 + il $SP, 16368 + stqd $0, 0($SP) + + /* Allocate a minimum stack frame for the called main. + * This is needed so that main has a place to save the + * link register when it calls another function. + */ + stqd $SP, -160($SP) + ai $SP, $SP, -160 + + /* Call the program's main function. */ + brsl $0, main + +.global exit +.global _exit +exit: +_exit: + /* SPU Context Restore, Step 5: Restore the remaining 112 GPRs. */ + ila $3, regs_spill + 256 +restore_regs: + lqr $4, restore_reg_insts +restore_reg_loop: + ai $4, $4, 4 + .balignl 16, 0x40200000 +restore_reg_insts: /* must be quad-word aligned. */ + lqd $16, 0($3) + lqd $17, 16($3) + lqd $18, 32($3) + lqd $19, 48($3) + andi $5, $4, 0x7F + stqr $4, restore_reg_insts + ai $3, $3, 64 + brnz $5, restore_reg_loop + + /* SPU Context Restore Step 17: Restore the first 16 GPRs. */ + lqa $0, regs_spill + 0 + lqa $1, regs_spill + 16 + lqa $2, regs_spill + 32 + lqa $3, regs_spill + 48 + lqa $4, regs_spill + 64 + lqa $5, regs_spill + 80 + lqa $6, regs_spill + 96 + lqa $7, regs_spill + 112 + lqa $8, regs_spill + 128 + lqa $9, regs_spill + 144 + lqa $10, regs_spill + 160 + lqa $11, regs_spill + 176 + lqa $12, regs_spill + 192 + lqa $13, regs_spill + 208 + lqa $14, regs_spill + 224 + lqa $15, regs_spill + 240 + + /* Under normal circumstances, the 'exit' function + * terminates with 'stop SPU_RESTORE_COMPLETE', + * indicating that the SPU-side restore code has + * completed. + * + * However it is possible that instructions immediately + * following the 'stop 0x3ffc' have been modified at run + * time so as to recreate the exact SPU_Status settings + * from the application, e.g. illegal instruciton, halt, + * etc. + */ +.global exit_fini +.global _exit_fini +exit_fini: +_exit_fini: + stop SPU_RESTORE_COMPLETE + stop 0 + stop 0 + stop 0 + + /* Pad the size of this crt0.o to be multiple of 16 bytes. */ +.balignl 16, 0x0 diff --git a/kernel/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped b/kernel/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped new file mode 100644 index 000000000..f383b027e --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped @@ -0,0 +1,935 @@ +/* + * spu_restore_dump.h: Copyright (C) 2005 IBM. + * Hex-dump auto generated from spu_restore.c. + * Do not edit! + */ +static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { +0x40800000, +0x409ff801, +0x24000080, +0x24fd8081, +0x1cd80081, +0x33001180, +0x42034003, +0x33800284, +0x1c010204, +0x40200000, +0x40200000, +0x40200000, +0x34000190, +0x34004191, +0x34008192, +0x3400c193, +0x141fc205, +0x23fffd84, +0x1c100183, +0x217ffa85, +0x3080b000, +0x3080b201, +0x3080b402, +0x3080b603, +0x3080b804, +0x3080ba05, +0x3080bc06, +0x3080be07, +0x3080c008, +0x3080c209, +0x3080c40a, +0x3080c60b, +0x3080c80c, +0x3080ca0d, +0x3080cc0e, +0x3080ce0f, +0x00003ffc, +0x00000000, +0x00000000, +0x00000000, +0x01a00182, +0x3ec00083, +0xb0a14103, +0x01a00204, +0x3ec10083, +0x4202c002, +0xb0a14203, +0x21a00802, +0x3fbf028a, +0x3f20050a, +0x3fbe0502, +0x3fe30102, +0x21a00882, +0x3f82028b, +0x3fe3058b, +0x3fbf0584, +0x3f200204, +0x3fbe0204, +0x3fe30204, +0x04000203, +0x21a00903, +0x40848002, +0x21a00982, +0x40800003, +0x21a00a03, +0x40802002, +0x21a00a82, +0x21a00083, +0x40800082, +0x21a00b02, +0x10002612, +0x42a00003, +0x42074006, +0x1800c204, +0x40a00008, +0x40800789, +0x1c010305, +0x34000302, +0x1cffc489, +0x3ec00303, +0x3ec00287, +0xb0408403, +0x24000302, +0x34000282, +0x1c020306, +0xb0408207, +0x18020204, +0x24000282, +0x217ffa09, +0x04000402, +0x21a00802, +0x3fbe0504, +0x3fe30204, +0x21a00884, +0x42074002, +0x21a00902, +0x40803c03, +0x21a00983, +0x04000485, +0x21a00a05, +0x40802202, +0x21a00a82, +0x21a00805, +0x21a00884, +0x3fbf0582, +0x3f200102, +0x3fbe0102, +0x3fe30102, +0x21a00902, +0x40804003, +0x21a00983, +0x21a00a05, +0x40805a02, +0x21a00a82, +0x40800083, +0x21a00b83, +0x01a00c02, +0x30809c03, +0x34000182, +0x14004102, +0x21002082, +0x01a00d82, +0x3080a003, +0x34000182, +0x21a00e02, +0x3080a203, +0x34000182, +0x21a00f02, +0x3080a403, +0x34000182, +0x77400100, +0x3080a603, +0x34000182, +0x21a00702, +0x3080a803, +0x34000182, +0x21a00082, +0x3080aa03, +0x34000182, +0x21a00b02, +0x4020007f, +0x3080ae02, +0x42004805, +0x3080ac04, +0x34000103, +0x34000202, +0x1cffc183, +0x3b810106, +0x0f608184, +0x42013802, +0x5c020183, +0x38810102, +0x3b810102, +0x21000e83, +0x4020007f, +0x35000100, +0x00000470, +0x000002f8, +0x00000430, +0x00000360, +0x000002f8, +0x000003c8, +0x000004a8, +0x00000298, +0x00000360, +0x00200000, +0x409ffe02, +0x30801203, +0x40800208, +0x3ec40084, +0x40800407, +0x3ac20289, +0xb060c104, +0x3ac1c284, +0x20801203, +0x38820282, +0x41004003, +0xb0408189, +0x28820282, +0x3881c282, +0xb0408304, +0x2881c282, +0x00400000, +0x40800003, +0x35000000, +0x30809e03, +0x34000182, +0x21a00382, +0x4020007f, +0x327fde00, +0x409ffe02, +0x30801203, +0x40800206, +0x3ec40084, +0x40800407, +0x40800608, +0x3ac1828a, +0x3ac20289, +0xb060c104, +0x3ac1c284, +0x20801203, +0x38818282, +0x41004003, +0xb040818a, +0x10005b0b, +0x41201003, +0x28818282, +0x3881c282, +0xb0408184, +0x41193f83, +0x60ffc003, +0x2881c282, +0x38820282, +0xb0408189, +0x28820282, +0x327fef80, +0x409ffe02, +0x30801203, +0x40800207, +0x3ec40086, +0x4120100b, +0x10005b14, +0x40800404, +0x3ac1c289, +0x40800608, +0xb060c106, +0x3ac10286, +0x3ac2028a, +0x20801203, +0x3881c282, +0x41193f83, +0x60ffc003, +0xb0408589, +0x2881c282, +0x38810282, +0xb0408586, +0x28810282, +0x38820282, +0xb040818a, +0x28820282, +0x4020007f, +0x327fe280, +0x409ffe02, +0x30801203, +0x40800207, +0x3ec40084, +0x40800408, +0x10005b14, +0x40800609, +0x3ac1c28a, +0x3ac2028b, +0xb060c104, +0x3ac24284, +0x20801203, +0x41201003, +0x3881c282, +0xb040830a, +0x2881c282, +0x38820282, +0xb040818b, +0x41193f83, +0x60ffc003, +0x28820282, +0x38824282, +0xb0408184, +0x28824282, +0x4020007f, +0x327fd580, +0x409ffe02, +0x1000658e, +0x40800206, +0x30801203, +0x40800407, +0x3ec40084, +0x40800608, +0x3ac1828a, +0x3ac20289, +0xb060c104, +0x3ac1c284, +0x20801203, +0x413d8003, +0x38818282, +0x4020007f, +0x327fd800, +0x409ffe03, +0x30801202, +0x40800207, +0x3ec40084, +0x10005b09, +0x3ac1c288, +0xb0408184, +0x4020007f, +0x4020007f, +0x20801202, +0x3881c282, +0xb0408308, +0x2881c282, +0x327fc680, +0x409ffe02, +0x1000588b, +0x40800208, +0x30801203, +0x40800407, +0x3ec40084, +0x3ac20289, +0xb060c104, +0x3ac1c284, +0x20801203, +0x413d8003, +0x38820282, +0x327fbd80, +0x00200000, +0x00000da0, +0x00000000, +0x00000000, +0x00000000, +0x00000d90, +0x00000000, +0x00000000, +0x00000000, +0x00000db0, +0x00000000, +0x00000000, +0x00000000, +0x00000dc0, +0x00000000, +0x00000000, +0x00000000, +0x00000d80, +0x00000000, +0x00000000, +0x00000000, +0x00000df0, +0x00000000, +0x00000000, +0x00000000, +0x00000de0, +0x00000000, +0x00000000, +0x00000000, +0x00000dd0, +0x00000000, +0x00000000, +0x00000000, +0x00000e04, +0x00000000, +0x00000000, +0x00000000, +0x00000e00, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +}; diff --git a/kernel/arch/powerpc/platforms/cell/spufs/spu_save.c b/kernel/arch/powerpc/platforms/cell/spufs/spu_save.c new file mode 100644 index 000000000..ae95cc170 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/spu_save.c @@ -0,0 +1,195 @@ +/* + * spu_save.c + * + * (C) Copyright IBM Corp. 2005 + * + * SPU-side context save sequence outlined in + * Synergistic Processor Element Book IV + * + * Author: Mark Nutter <mnutter@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + + +#ifndef LS_SIZE +#define LS_SIZE 0x40000 /* 256K (in bytes) */ +#endif + +typedef unsigned int u32; +typedef unsigned long long u64; + +#include <spu_intrinsics.h> +#include <asm/spu_csa.h> +#include "spu_utils.h" + +static inline void save_event_mask(void) +{ + unsigned int offset; + + /* Save, Step 2: + * Read the SPU_RdEventMsk channel and save to the LSCSA. + */ + offset = LSCSA_QW_OFFSET(event_mask); + regs_spill[offset].slot[0] = spu_readch(SPU_RdEventMask); +} + +static inline void save_tag_mask(void) +{ + unsigned int offset; + + /* Save, Step 3: + * Read the SPU_RdTagMsk channel and save to the LSCSA. + */ + offset = LSCSA_QW_OFFSET(tag_mask); + regs_spill[offset].slot[0] = spu_readch(MFC_RdTagMask); +} + +static inline void save_upper_240kb(addr64 lscsa_ea) +{ + unsigned int ls = 16384; + unsigned int list = (unsigned int)&dma_list[0]; + unsigned int size = sizeof(dma_list); + unsigned int tag_id = 0; + unsigned int cmd = 0x24; /* PUTL */ + + /* Save, Step 7: + * Enqueue the PUTL command (tag 0) to the MFC SPU command + * queue to transfer the remaining 240 kb of LS to CSA. + */ + spu_writech(MFC_LSA, ls); + spu_writech(MFC_EAH, lscsa_ea.ui[0]); + spu_writech(MFC_EAL, list); + spu_writech(MFC_Size, size); + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void save_fpcr(void) +{ + // vector unsigned int fpcr; + unsigned int offset; + + /* Save, Step 9: + * Issue the floating-point status and control register + * read instruction, and save to the LSCSA. + */ + offset = LSCSA_QW_OFFSET(fpcr); + regs_spill[offset].v = spu_mffpscr(); +} + +static inline void save_decr(void) +{ + unsigned int offset; + + /* Save, Step 10: + * Read and save the SPU_RdDec channel data to + * the LSCSA. + */ + offset = LSCSA_QW_OFFSET(decr); + regs_spill[offset].slot[0] = spu_readch(SPU_RdDec); +} + +static inline void save_srr0(void) +{ + unsigned int offset; + + /* Save, Step 11: + * Read and save the SPU_WSRR0 channel data to + * the LSCSA. + */ + offset = LSCSA_QW_OFFSET(srr0); + regs_spill[offset].slot[0] = spu_readch(SPU_RdSRR0); +} + +static inline void spill_regs_to_mem(addr64 lscsa_ea) +{ + unsigned int ls = (unsigned int)®s_spill[0]; + unsigned int size = sizeof(regs_spill); + unsigned int tag_id = 0; + unsigned int cmd = 0x20; /* PUT */ + + /* Save, Step 13: + * Enqueue a PUT command (tag 0) to send the LSCSA + * to the CSA. + */ + spu_writech(MFC_LSA, ls); + spu_writech(MFC_EAH, lscsa_ea.ui[0]); + spu_writech(MFC_EAL, lscsa_ea.ui[1]); + spu_writech(MFC_Size, size); + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void enqueue_sync(addr64 lscsa_ea) +{ + unsigned int tag_id = 0; + unsigned int cmd = 0xCC; + + /* Save, Step 14: + * Enqueue an MFC_SYNC command (tag 0). + */ + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void save_complete(void) +{ + /* Save, Step 18: + * Issue a stop-and-signal instruction indicating + * "save complete". Note: This function will not + * return!! + */ + spu_stop(SPU_SAVE_COMPLETE); +} + +/** + * main - entry point for SPU-side context save. + * + * This code deviates from the documented sequence as follows: + * + * 1. The EA for LSCSA is passed from PPE in the + * signal notification channels. + * 2. All 128 registers are saved by crt0.o. + */ +int main() +{ + addr64 lscsa_ea; + + lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1); + lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2); + + /* Step 1: done by exit(). */ + save_event_mask(); /* Step 2. */ + save_tag_mask(); /* Step 3. */ + set_event_mask(); /* Step 4. */ + set_tag_mask(); /* Step 5. */ + build_dma_list(lscsa_ea); /* Step 6. */ + save_upper_240kb(lscsa_ea); /* Step 7. */ + /* Step 8: done by exit(). */ + save_fpcr(); /* Step 9. */ + save_decr(); /* Step 10. */ + save_srr0(); /* Step 11. */ + enqueue_putllc(lscsa_ea); /* Step 12. */ + spill_regs_to_mem(lscsa_ea); /* Step 13. */ + enqueue_sync(lscsa_ea); /* Step 14. */ + set_tag_update(); /* Step 15. */ + read_tag_status(); /* Step 16. */ + read_llar_status(); /* Step 17. */ + save_complete(); /* Step 18. */ + + return 0; +} diff --git a/kernel/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S b/kernel/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S new file mode 100644 index 000000000..6659d6a66 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S @@ -0,0 +1,102 @@ +/* + * crt0_s.S: Entry function for SPU-side context save. + * + * Copyright (C) 2005 IBM + * + * Entry function for SPU-side of the context save sequence. + * Saves all 128 GPRs, sets up an initial stack frame, then + * branches to 'main'. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <asm/spu_csa.h> + +.data +.align 7 +.globl regs_spill +regs_spill: +.space SIZEOF_SPU_SPILL_REGS, 0x0 + +.text +.global _start +_start: + /* SPU Context Save Step 1: Save the first 16 GPRs. */ + stqa $0, regs_spill + 0 + stqa $1, regs_spill + 16 + stqa $2, regs_spill + 32 + stqa $3, regs_spill + 48 + stqa $4, regs_spill + 64 + stqa $5, regs_spill + 80 + stqa $6, regs_spill + 96 + stqa $7, regs_spill + 112 + stqa $8, regs_spill + 128 + stqa $9, regs_spill + 144 + stqa $10, regs_spill + 160 + stqa $11, regs_spill + 176 + stqa $12, regs_spill + 192 + stqa $13, regs_spill + 208 + stqa $14, regs_spill + 224 + stqa $15, regs_spill + 240 + + /* SPU Context Save, Step 8: Save the remaining 112 GPRs. */ + ila $3, regs_spill + 256 +save_regs: + lqr $4, save_reg_insts +save_reg_loop: + ai $4, $4, 4 + .balignl 16, 0x40200000 +save_reg_insts: /* must be quad-word aligned. */ + stqd $16, 0($3) + stqd $17, 16($3) + stqd $18, 32($3) + stqd $19, 48($3) + andi $5, $4, 0x7F + stqr $4, save_reg_insts + ai $3, $3, 64 + brnz $5, save_reg_loop + + /* Initialize the stack pointer to point to 16368 + * (16kb-16). The back chain pointer is initialized + * to NULL. + */ + il $0, 0 + il $SP, 16368 + stqd $0, 0($SP) + + /* Allocate a minimum stack frame for the called main. + * This is needed so that main has a place to save the + * link register when it calls another function. + */ + stqd $SP, -160($SP) + ai $SP, $SP, -160 + + /* Call the program's main function. */ + brsl $0, main + + /* In this case main should not return; if it does + * there has been an error in the sequence. Execute + * stop-and-signal with code=0. + */ +.global exit +.global _exit +exit: +_exit: + stop 0x0 + + /* Pad the size of this crt0.o to be multiple of 16 bytes. */ +.balignl 16, 0x0 + diff --git a/kernel/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped b/kernel/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped new file mode 100644 index 000000000..b9f81ac8a --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped @@ -0,0 +1,743 @@ +/* + * spu_save_dump.h: Copyright (C) 2005 IBM. + * Hex-dump auto generated from spu_save.c. + * Do not edit! + */ +static unsigned int spu_save_code[] __attribute__((__aligned__(128))) = { +0x20805000, +0x20805201, +0x20805402, +0x20805603, +0x20805804, +0x20805a05, +0x20805c06, +0x20805e07, +0x20806008, +0x20806209, +0x2080640a, +0x2080660b, +0x2080680c, +0x20806a0d, +0x20806c0e, +0x20806e0f, +0x4201c003, +0x33800184, +0x1c010204, +0x40200000, +0x24000190, +0x24004191, +0x24008192, +0x2400c193, +0x141fc205, +0x23fffd84, +0x1c100183, +0x217ffb85, +0x40800000, +0x409ff801, +0x24000080, +0x24fd8081, +0x1cd80081, +0x33000180, +0x00000000, +0x00000000, +0x01a00182, +0x3ec00083, +0xb1c38103, +0x01a00204, +0x3ec10082, +0x4201400d, +0xb1c38202, +0x01a00583, +0x34218682, +0x3ed80684, +0xb0408184, +0x24218682, +0x01a00603, +0x00200000, +0x34214682, +0x3ed40684, +0xb0408184, +0x40800003, +0x24214682, +0x21a00083, +0x40800082, +0x21a00b02, +0x4020007f, +0x1000251e, +0x42a00002, +0x32800008, +0x4205c00c, +0x00200000, +0x40a0000b, +0x3f82070f, +0x4080020a, +0x40800709, +0x3fe3078f, +0x3fbf0783, +0x3f200183, +0x3fbe0183, +0x3fe30187, +0x18008387, +0x4205c002, +0x3ac30404, +0x1cffc489, +0x00200000, +0x18008403, +0x38830402, +0x4cffc486, +0x3ac28185, +0xb0408584, +0x28830402, +0x1c020408, +0x38828182, +0xb0408385, +0x1802c387, +0x28828182, +0x217ff886, +0x04000582, +0x32800007, +0x21a00802, +0x3fbf0705, +0x3f200285, +0x3fbe0285, +0x3fe30285, +0x21a00885, +0x04000603, +0x21a00903, +0x40803c02, +0x21a00982, +0x04000386, +0x21a00a06, +0x40801202, +0x21a00a82, +0x73000003, +0x24200683, +0x01a00404, +0x00200000, +0x34204682, +0x3ec40683, +0xb0408203, +0x24204682, +0x01a00783, +0x00200000, +0x3421c682, +0x3edc0684, +0xb0408184, +0x2421c682, +0x21a00806, +0x21a00885, +0x3fbf0784, +0x3f200204, +0x3fbe0204, +0x3fe30204, +0x21a00904, +0x40804002, +0x21a00982, +0x21a00a06, +0x40805a02, +0x21a00a82, +0x04000683, +0x21a00803, +0x21a00885, +0x21a00904, +0x40848002, +0x21a00982, +0x21a00a06, +0x40801002, +0x21a00a82, +0x21a00a06, +0x40806602, +0x00200000, +0x35800009, +0x21a00a82, +0x40800083, +0x21a00b83, +0x01a00c02, +0x01a00d83, +0x00003ffb, +0x40800003, +0x4020007f, +0x35000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +}; diff --git a/kernel/arch/powerpc/platforms/cell/spufs/spu_utils.h b/kernel/arch/powerpc/platforms/cell/spufs/spu_utils.h new file mode 100644 index 000000000..58359feb6 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/spu_utils.h @@ -0,0 +1,160 @@ +/* + * utils.h: Utilities for SPU-side of the context switch operation. + * + * (C) Copyright IBM 2005 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _SPU_CONTEXT_UTILS_H_ +#define _SPU_CONTEXT_UTILS_H_ + +/* + * 64-bit safe EA. + */ +typedef union { + unsigned long long ull; + unsigned int ui[2]; +} addr64; + +/* + * 128-bit register template. + */ +typedef union { + unsigned int slot[4]; + vector unsigned int v; +} spu_reg128v; + +/* + * DMA list structure. + */ +struct dma_list_elem { + unsigned int size; + unsigned int ea_low; +}; + +/* + * Declare storage for 8-byte aligned DMA list. + */ +struct dma_list_elem dma_list[15] __attribute__ ((aligned(8))); + +/* + * External definition for storage + * declared in crt0. + */ +extern spu_reg128v regs_spill[NR_SPU_SPILL_REGS]; + +/* + * Compute LSCSA byte offset for a given field. + */ +static struct spu_lscsa *dummy = (struct spu_lscsa *)0; +#define LSCSA_BYTE_OFFSET(_field) \ + ((char *)(&(dummy->_field)) - (char *)(&(dummy->gprs[0].slot[0]))) +#define LSCSA_QW_OFFSET(_field) (LSCSA_BYTE_OFFSET(_field) >> 4) + +static inline void set_event_mask(void) +{ + unsigned int event_mask = 0; + + /* Save, Step 4: + * Restore, Step 1: + * Set the SPU_RdEventMsk channel to zero to mask + * all events. + */ + spu_writech(SPU_WrEventMask, event_mask); +} + +static inline void set_tag_mask(void) +{ + unsigned int tag_mask = 1; + + /* Save, Step 5: + * Restore, Step 2: + * Set the SPU_WrTagMsk channel to '01' to unmask + * only tag group 0. + */ + spu_writech(MFC_WrTagMask, tag_mask); +} + +static inline void build_dma_list(addr64 lscsa_ea) +{ + unsigned int ea_low; + int i; + + /* Save, Step 6: + * Restore, Step 3: + * Update the effective address for the CSA in the + * pre-canned DMA-list in local storage. + */ + ea_low = lscsa_ea.ui[1]; + ea_low += LSCSA_BYTE_OFFSET(ls[16384]); + + for (i = 0; i < 15; i++, ea_low += 16384) { + dma_list[i].size = 16384; + dma_list[i].ea_low = ea_low; + } +} + +static inline void enqueue_putllc(addr64 lscsa_ea) +{ + unsigned int ls = 0; + unsigned int size = 128; + unsigned int tag_id = 0; + unsigned int cmd = 0xB4; /* PUTLLC */ + + /* Save, Step 12: + * Restore, Step 7: + * Send a PUTLLC (tag 0) command to the MFC using + * an effective address in the CSA in order to + * remove any possible lock-line reservation. + */ + spu_writech(MFC_LSA, ls); + spu_writech(MFC_EAH, lscsa_ea.ui[0]); + spu_writech(MFC_EAL, lscsa_ea.ui[1]); + spu_writech(MFC_Size, size); + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void set_tag_update(void) +{ + unsigned int update_any = 1; + + /* Save, Step 15: + * Restore, Step 8: + * Write the MFC_TagUpdate channel with '01'. + */ + spu_writech(MFC_WrTagUpdate, update_any); +} + +static inline void read_tag_status(void) +{ + /* Save, Step 16: + * Restore, Step 9: + * Read the MFC_TagStat channel data. + */ + spu_readch(MFC_RdTagStat); +} + +static inline void read_llar_status(void) +{ + /* Save, Step 17: + * Restore, Step 10: + * Read the MFC_AtomicStat channel data. + */ + spu_readch(MFC_RdAtomicStat); +} + +#endif /* _SPU_CONTEXT_UTILS_H_ */ diff --git a/kernel/arch/powerpc/platforms/cell/spufs/spufs.h b/kernel/arch/powerpc/platforms/cell/spufs/spufs.h new file mode 100644 index 000000000..bcfd6f063 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/spufs.h @@ -0,0 +1,376 @@ +/* + * SPU file system + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef SPUFS_H +#define SPUFS_H + +#include <linux/kref.h> +#include <linux/mutex.h> +#include <linux/spinlock.h> +#include <linux/fs.h> +#include <linux/cpumask.h> + +#include <asm/spu.h> +#include <asm/spu_csa.h> +#include <asm/spu_info.h> + +#define SPUFS_PS_MAP_SIZE 0x20000 +#define SPUFS_MFC_MAP_SIZE 0x1000 +#define SPUFS_CNTL_MAP_SIZE 0x1000 +#define SPUFS_SIGNAL_MAP_SIZE PAGE_SIZE +#define SPUFS_MSS_MAP_SIZE 0x1000 + +/* The magic number for our file system */ +enum { + SPUFS_MAGIC = 0x23c9b64e, +}; + +struct spu_context_ops; +struct spu_gang; + +/* ctx->sched_flags */ +enum { + SPU_SCHED_NOTIFY_ACTIVE, + SPU_SCHED_WAS_ACTIVE, /* was active upon spu_acquire_saved() */ + SPU_SCHED_SPU_RUN, /* context is within spu_run */ +}; + +enum { + SWITCH_LOG_BUFSIZE = 4096, +}; + +enum { + SWITCH_LOG_START, + SWITCH_LOG_STOP, + SWITCH_LOG_EXIT, +}; + +struct switch_log { + wait_queue_head_t wait; + unsigned long head; + unsigned long tail; + struct switch_log_entry { + struct timespec tstamp; + s32 spu_id; + u32 type; + u32 val; + u64 timebase; + } log[]; +}; + +struct spu_context { + struct spu *spu; /* pointer to a physical SPU */ + struct spu_state csa; /* SPU context save area. */ + spinlock_t mmio_lock; /* protects mmio access */ + struct address_space *local_store; /* local store mapping. */ + struct address_space *mfc; /* 'mfc' area mappings. */ + struct address_space *cntl; /* 'control' area mappings. */ + struct address_space *signal1; /* 'signal1' area mappings. */ + struct address_space *signal2; /* 'signal2' area mappings. */ + struct address_space *mss; /* 'mss' area mappings. */ + struct address_space *psmap; /* 'psmap' area mappings. */ + struct mutex mapping_lock; + u64 object_id; /* user space pointer for oprofile */ + + enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state; + struct mutex state_mutex; + struct mutex run_mutex; + + struct mm_struct *owner; + + struct kref kref; + wait_queue_head_t ibox_wq; + wait_queue_head_t wbox_wq; + wait_queue_head_t stop_wq; + wait_queue_head_t mfc_wq; + wait_queue_head_t run_wq; + struct fasync_struct *ibox_fasync; + struct fasync_struct *wbox_fasync; + struct fasync_struct *mfc_fasync; + u32 tagwait; + struct spu_context_ops *ops; + struct work_struct reap_work; + unsigned long flags; + unsigned long event_return; + + struct list_head gang_list; + struct spu_gang *gang; + struct kref *prof_priv_kref; + void ( * prof_priv_release) (struct kref *kref); + + /* owner thread */ + pid_t tid; + + /* scheduler fields */ + struct list_head rq; + unsigned int time_slice; + unsigned long sched_flags; + cpumask_t cpus_allowed; + int policy; + int prio; + int last_ran; + + /* statistics */ + struct { + /* updates protected by ctx->state_mutex */ + enum spu_utilization_state util_state; + unsigned long long tstamp; /* time of last state switch */ + unsigned long long times[SPU_UTIL_MAX]; + unsigned long long vol_ctx_switch; + unsigned long long invol_ctx_switch; + unsigned long long min_flt; + unsigned long long maj_flt; + unsigned long long hash_flt; + unsigned long long slb_flt; + unsigned long long slb_flt_base; /* # at last ctx switch */ + unsigned long long class2_intr; + unsigned long long class2_intr_base; /* # at last ctx switch */ + unsigned long long libassist; + } stats; + + /* context switch log */ + struct switch_log *switch_log; + + struct list_head aff_list; + int aff_head; + int aff_offset; +}; + +struct spu_gang { + struct list_head list; + struct mutex mutex; + struct kref kref; + int contexts; + + struct spu_context *aff_ref_ctx; + struct list_head aff_list_head; + struct mutex aff_mutex; + int aff_flags; + struct spu *aff_ref_spu; + atomic_t aff_sched_count; +}; + +/* Flag bits for spu_gang aff_flags */ +#define AFF_OFFSETS_SET 1 +#define AFF_MERGED 2 + +struct mfc_dma_command { + int32_t pad; /* reserved */ + uint32_t lsa; /* local storage address */ + uint64_t ea; /* effective address */ + uint16_t size; /* transfer size */ + uint16_t tag; /* command tag */ + uint16_t class; /* class ID */ + uint16_t cmd; /* command opcode */ +}; + + +/* SPU context query/set operations. */ +struct spu_context_ops { + int (*mbox_read) (struct spu_context * ctx, u32 * data); + u32(*mbox_stat_read) (struct spu_context * ctx); + unsigned int (*mbox_stat_poll)(struct spu_context *ctx, + unsigned int events); + int (*ibox_read) (struct spu_context * ctx, u32 * data); + int (*wbox_write) (struct spu_context * ctx, u32 data); + u32(*signal1_read) (struct spu_context * ctx); + void (*signal1_write) (struct spu_context * ctx, u32 data); + u32(*signal2_read) (struct spu_context * ctx); + void (*signal2_write) (struct spu_context * ctx, u32 data); + void (*signal1_type_set) (struct spu_context * ctx, u64 val); + u64(*signal1_type_get) (struct spu_context * ctx); + void (*signal2_type_set) (struct spu_context * ctx, u64 val); + u64(*signal2_type_get) (struct spu_context * ctx); + u32(*npc_read) (struct spu_context * ctx); + void (*npc_write) (struct spu_context * ctx, u32 data); + u32(*status_read) (struct spu_context * ctx); + char*(*get_ls) (struct spu_context * ctx); + void (*privcntl_write) (struct spu_context *ctx, u64 data); + u32 (*runcntl_read) (struct spu_context * ctx); + void (*runcntl_write) (struct spu_context * ctx, u32 data); + void (*runcntl_stop) (struct spu_context * ctx); + void (*master_start) (struct spu_context * ctx); + void (*master_stop) (struct spu_context * ctx); + int (*set_mfc_query)(struct spu_context * ctx, u32 mask, u32 mode); + u32 (*read_mfc_tagstatus)(struct spu_context * ctx); + u32 (*get_mfc_free_elements)(struct spu_context *ctx); + int (*send_mfc_command)(struct spu_context * ctx, + struct mfc_dma_command * cmd); + void (*dma_info_read) (struct spu_context * ctx, + struct spu_dma_info * info); + void (*proxydma_info_read) (struct spu_context * ctx, + struct spu_proxydma_info * info); + void (*restart_dma)(struct spu_context *ctx); +}; + +extern struct spu_context_ops spu_hw_ops; +extern struct spu_context_ops spu_backing_ops; + +struct spufs_inode_info { + struct spu_context *i_ctx; + struct spu_gang *i_gang; + struct inode vfs_inode; + int i_openers; +}; +#define SPUFS_I(inode) \ + container_of(inode, struct spufs_inode_info, vfs_inode) + +struct spufs_tree_descr { + const char *name; + const struct file_operations *ops; + umode_t mode; + size_t size; +}; + +extern const struct spufs_tree_descr spufs_dir_contents[]; +extern const struct spufs_tree_descr spufs_dir_nosched_contents[]; +extern const struct spufs_tree_descr spufs_dir_debug_contents[]; + +/* system call implementation */ +extern struct spufs_calls spufs_calls; +struct coredump_params; +long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status); +long spufs_create(struct path *nd, struct dentry *dentry, unsigned int flags, + umode_t mode, struct file *filp); +/* ELF coredump callbacks for writing SPU ELF notes */ +extern int spufs_coredump_extra_notes_size(void); +extern int spufs_coredump_extra_notes_write(struct coredump_params *cprm); + +extern const struct file_operations spufs_context_fops; + +/* gang management */ +struct spu_gang *alloc_spu_gang(void); +struct spu_gang *get_spu_gang(struct spu_gang *gang); +int put_spu_gang(struct spu_gang *gang); +void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx); +void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx); + +/* fault handling */ +int spufs_handle_class1(struct spu_context *ctx); +int spufs_handle_class0(struct spu_context *ctx); + +/* affinity */ +struct spu *affinity_check(struct spu_context *ctx); + +/* context management */ +extern atomic_t nr_spu_contexts; +static inline int __must_check spu_acquire(struct spu_context *ctx) +{ + return mutex_lock_interruptible(&ctx->state_mutex); +} + +static inline void spu_release(struct spu_context *ctx) +{ + mutex_unlock(&ctx->state_mutex); +} + +struct spu_context * alloc_spu_context(struct spu_gang *gang); +void destroy_spu_context(struct kref *kref); +struct spu_context * get_spu_context(struct spu_context *ctx); +int put_spu_context(struct spu_context *ctx); +void spu_unmap_mappings(struct spu_context *ctx); + +void spu_forget(struct spu_context *ctx); +int __must_check spu_acquire_saved(struct spu_context *ctx); +void spu_release_saved(struct spu_context *ctx); + +int spu_stopped(struct spu_context *ctx, u32 * stat); +void spu_del_from_rq(struct spu_context *ctx); +int spu_activate(struct spu_context *ctx, unsigned long flags); +void spu_deactivate(struct spu_context *ctx); +void spu_yield(struct spu_context *ctx); +void spu_switch_notify(struct spu *spu, struct spu_context *ctx); +void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx, + u32 type, u32 val); +void spu_set_timeslice(struct spu_context *ctx); +void spu_update_sched_info(struct spu_context *ctx); +void __spu_update_sched_info(struct spu_context *ctx); +int __init spu_sched_init(void); +void spu_sched_exit(void); + +extern char *isolated_loader; + +/* + * spufs_wait + * Same as wait_event_interruptible(), except that here + * we need to call spu_release(ctx) before sleeping, and + * then spu_acquire(ctx) when awoken. + * + * Returns with state_mutex re-acquired when successful or + * with -ERESTARTSYS and the state_mutex dropped when interrupted. + */ + +#define spufs_wait(wq, condition) \ +({ \ + int __ret = 0; \ + DEFINE_WAIT(__wait); \ + for (;;) { \ + prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE); \ + if (condition) \ + break; \ + spu_release(ctx); \ + if (signal_pending(current)) { \ + __ret = -ERESTARTSYS; \ + break; \ + } \ + schedule(); \ + __ret = spu_acquire(ctx); \ + if (__ret) \ + break; \ + } \ + finish_wait(&(wq), &__wait); \ + __ret; \ +}) + +size_t spu_wbox_write(struct spu_context *ctx, u32 data); +size_t spu_ibox_read(struct spu_context *ctx, u32 *data); + +/* irq callback funcs. */ +void spufs_ibox_callback(struct spu *spu); +void spufs_wbox_callback(struct spu *spu); +void spufs_stop_callback(struct spu *spu, int irq); +void spufs_mfc_callback(struct spu *spu); +void spufs_dma_callback(struct spu *spu, int type); + +extern struct spu_coredump_calls spufs_coredump_calls; +struct spufs_coredump_reader { + char *name; + ssize_t (*read)(struct spu_context *ctx, + char __user *buffer, size_t size, loff_t *pos); + u64 (*get)(struct spu_context *ctx); + size_t size; +}; +extern const struct spufs_coredump_reader spufs_coredump_read[]; +extern int spufs_coredump_num_notes; + +extern int spu_init_csa(struct spu_state *csa); +extern void spu_fini_csa(struct spu_state *csa); +extern int spu_save(struct spu_state *prev, struct spu *spu); +extern int spu_restore(struct spu_state *new, struct spu *spu); +extern int spu_switch(struct spu_state *prev, struct spu_state *new, + struct spu *spu); +extern int spu_alloc_lscsa(struct spu_state *csa); +extern void spu_free_lscsa(struct spu_state *csa); + +extern void spuctx_switch_state(struct spu_context *ctx, + enum spu_utilization_state new_state); + +#endif diff --git a/kernel/arch/powerpc/platforms/cell/spufs/sputrace.h b/kernel/arch/powerpc/platforms/cell/spufs/sputrace.h new file mode 100644 index 000000000..db2656aa4 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/sputrace.h @@ -0,0 +1,39 @@ +#if !defined(_TRACE_SPUFS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SPUFS_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM spufs + +TRACE_EVENT(spufs_context, + TP_PROTO(struct spu_context *ctx, struct spu *spu, const char *name), + TP_ARGS(ctx, spu, name), + + TP_STRUCT__entry( + __field(const char *, name) + __field(int, owner_tid) + __field(int, number) + ), + + TP_fast_assign( + __entry->name = name; + __entry->owner_tid = ctx->tid; + __entry->number = spu ? spu->number : -1; + ), + + TP_printk("%s (ctxthread = %d, spu = %d)", + __entry->name, __entry->owner_tid, __entry->number) +); + +#define spu_context_trace(name, ctx, spu) \ + trace_spufs_context(ctx, spu, __stringify(name)) +#define spu_context_nospu_trace(name, ctx) \ + trace_spufs_context(ctx, NULL, __stringify(name)) + +#endif /* _TRACE_SPUFS_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE sputrace +#include <trace/define_trace.h> diff --git a/kernel/arch/powerpc/platforms/cell/spufs/switch.c b/kernel/arch/powerpc/platforms/cell/spufs/switch.c new file mode 100644 index 000000000..dde35551e --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/switch.c @@ -0,0 +1,2222 @@ +/* + * spu_switch.c + * + * (C) Copyright IBM Corp. 2005 + * + * Author: Mark Nutter <mnutter@us.ibm.com> + * + * Host-side part of SPU context switch sequence outlined in + * Synergistic Processor Element, Book IV. + * + * A fully premptive switch of an SPE is very expensive in terms + * of time and system resources. SPE Book IV indicates that SPE + * allocation should follow a "serially reusable device" model, + * in which the SPE is assigned a task until it completes. When + * this is not possible, this sequence may be used to premptively + * save, and then later (optionally) restore the context of a + * program executing on an SPE. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/export.h> +#include <linux/errno.h> +#include <linux/hardirq.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/smp.h> +#include <linux/stddef.h> +#include <linux/unistd.h> + +#include <asm/io.h> +#include <asm/spu.h> +#include <asm/spu_priv1.h> +#include <asm/spu_csa.h> +#include <asm/mmu_context.h> + +#include "spufs.h" + +#include "spu_save_dump.h" +#include "spu_restore_dump.h" + +#if 0 +#define POLL_WHILE_TRUE(_c) { \ + do { \ + } while (_c); \ + } +#else +#define RELAX_SPIN_COUNT 1000 +#define POLL_WHILE_TRUE(_c) { \ + do { \ + int _i; \ + for (_i=0; _i<RELAX_SPIN_COUNT && (_c); _i++) { \ + cpu_relax(); \ + } \ + if (unlikely(_c)) yield(); \ + else break; \ + } while (_c); \ + } +#endif /* debug */ + +#define POLL_WHILE_FALSE(_c) POLL_WHILE_TRUE(!(_c)) + +static inline void acquire_spu_lock(struct spu *spu) +{ + /* Save, Step 1: + * Restore, Step 1: + * Acquire SPU-specific mutual exclusion lock. + * TBD. + */ +} + +static inline void release_spu_lock(struct spu *spu) +{ + /* Restore, Step 76: + * Release SPU-specific mutual exclusion lock. + * TBD. + */ +} + +static inline int check_spu_isolate(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 isolate_state; + + /* Save, Step 2: + * Save, Step 6: + * If SPU_Status[E,L,IS] any field is '1', this + * SPU is in isolate state and cannot be context + * saved at this time. + */ + isolate_state = SPU_STATUS_ISOLATED_STATE | + SPU_STATUS_ISOLATED_LOAD_STATUS | SPU_STATUS_ISOLATED_EXIT_STATUS; + return (in_be32(&prob->spu_status_R) & isolate_state) ? 1 : 0; +} + +static inline void disable_interrupts(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 3: + * Restore, Step 2: + * Save INT_Mask_class0 in CSA. + * Write INT_MASK_class0 with value of 0. + * Save INT_Mask_class1 in CSA. + * Write INT_MASK_class1 with value of 0. + * Save INT_Mask_class2 in CSA. + * Write INT_MASK_class2 with value of 0. + * Synchronize all three interrupts to be sure + * we no longer execute a handler on another CPU. + */ + spin_lock_irq(&spu->register_lock); + if (csa) { + csa->priv1.int_mask_class0_RW = spu_int_mask_get(spu, 0); + csa->priv1.int_mask_class1_RW = spu_int_mask_get(spu, 1); + csa->priv1.int_mask_class2_RW = spu_int_mask_get(spu, 2); + } + spu_int_mask_set(spu, 0, 0ul); + spu_int_mask_set(spu, 1, 0ul); + spu_int_mask_set(spu, 2, 0ul); + eieio(); + spin_unlock_irq(&spu->register_lock); + + /* + * This flag needs to be set before calling synchronize_irq so + * that the update will be visible to the relevant handlers + * via a simple load. + */ + set_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags); + clear_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags); + synchronize_irq(spu->irqs[0]); + synchronize_irq(spu->irqs[1]); + synchronize_irq(spu->irqs[2]); +} + +static inline void set_watchdog_timer(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 4: + * Restore, Step 25. + * Set a software watchdog timer, which specifies the + * maximum allowable time for a context save sequence. + * + * For present, this implementation will not set a global + * watchdog timer, as virtualization & variable system load + * may cause unpredictable execution times. + */ +} + +static inline void inhibit_user_access(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 5: + * Restore, Step 3: + * Inhibit user-space access (if provided) to this + * SPU by unmapping the virtual pages assigned to + * the SPU memory-mapped I/O (MMIO) for problem + * state. TBD. + */ +} + +static inline void set_switch_pending(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 7: + * Restore, Step 5: + * Set a software context switch pending flag. + * Done above in Step 3 - disable_interrupts(). + */ +} + +static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 8: + * Suspend DMA and save MFC_CNTL. + */ + switch (in_be64(&priv2->mfc_control_RW) & + MFC_CNTL_SUSPEND_DMA_STATUS_MASK) { + case MFC_CNTL_SUSPEND_IN_PROGRESS: + POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) & + MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == + MFC_CNTL_SUSPEND_COMPLETE); + /* fall through */ + case MFC_CNTL_SUSPEND_COMPLETE: + if (csa) + csa->priv2.mfc_control_RW = + in_be64(&priv2->mfc_control_RW) | + MFC_CNTL_SUSPEND_DMA_QUEUE; + break; + case MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION: + out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE); + POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) & + MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == + MFC_CNTL_SUSPEND_COMPLETE); + if (csa) + csa->priv2.mfc_control_RW = + in_be64(&priv2->mfc_control_RW) & + ~MFC_CNTL_SUSPEND_DMA_QUEUE & + ~MFC_CNTL_SUSPEND_MASK; + break; + } +} + +static inline void save_spu_runcntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 9: + * Save SPU_Runcntl in the CSA. This value contains + * the "Application Desired State". + */ + csa->prob.spu_runcntl_RW = in_be32(&prob->spu_runcntl_RW); +} + +static inline void save_mfc_sr1(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 10: + * Save MFC_SR1 in the CSA. + */ + csa->priv1.mfc_sr1_RW = spu_mfc_sr1_get(spu); +} + +static inline void save_spu_status(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 11: + * Read SPU_Status[R], and save to CSA. + */ + if ((in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) == 0) { + csa->prob.spu_status_R = in_be32(&prob->spu_status_R); + } else { + u32 stopped; + + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + stopped = + SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP | + SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP; + if ((in_be32(&prob->spu_status_R) & stopped) == 0) + csa->prob.spu_status_R = SPU_STATUS_RUNNING; + else + csa->prob.spu_status_R = in_be32(&prob->spu_status_R); + } +} + +static inline void save_mfc_stopped_status(struct spu_state *csa, + struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + const u64 mask = MFC_CNTL_DECREMENTER_RUNNING | + MFC_CNTL_DMA_QUEUES_EMPTY; + + /* Save, Step 12: + * Read MFC_CNTL[Ds]. Update saved copy of + * CSA.MFC_CNTL[Ds]. + * + * update: do the same with MFC_CNTL[Q]. + */ + csa->priv2.mfc_control_RW &= ~mask; + csa->priv2.mfc_control_RW |= in_be64(&priv2->mfc_control_RW) & mask; +} + +static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 13: + * Write MFC_CNTL[Dh] set to a '1' to halt + * the decrementer. + */ + out_be64(&priv2->mfc_control_RW, + MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK); + eieio(); +} + +static inline void save_timebase(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 14: + * Read PPE Timebase High and Timebase low registers + * and save in CSA. TBD. + */ + csa->suspend_time = get_cycles(); +} + +static inline void remove_other_spu_access(struct spu_state *csa, + struct spu *spu) +{ + /* Save, Step 15: + * Remove other SPU access to this SPU by unmapping + * this SPU's pages from their address space. TBD. + */ +} + +static inline void do_mfc_mssync(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 16: + * Restore, Step 11. + * Write SPU_MSSync register. Poll SPU_MSSync[P] + * for a value of 0. + */ + out_be64(&prob->spc_mssync_RW, 1UL); + POLL_WHILE_TRUE(in_be64(&prob->spc_mssync_RW) & MS_SYNC_PENDING); +} + +static inline void issue_mfc_tlbie(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 17: + * Restore, Step 12. + * Restore, Step 48. + * Write TLB_Invalidate_Entry[IS,VPN,L,Lp]=0 register. + * Then issue a PPE sync instruction. + */ + spu_tlb_invalidate(spu); + mb(); +} + +static inline void handle_pending_interrupts(struct spu_state *csa, + struct spu *spu) +{ + /* Save, Step 18: + * Handle any pending interrupts from this SPU + * here. This is OS or hypervisor specific. One + * option is to re-enable interrupts to handle any + * pending interrupts, with the interrupt handlers + * recognizing the software Context Switch Pending + * flag, to ensure the SPU execution or MFC command + * queue is not restarted. TBD. + */ +} + +static inline void save_mfc_queues(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + int i; + + /* Save, Step 19: + * If MFC_Cntl[Se]=0 then save + * MFC command queues. + */ + if ((in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DMA_QUEUES_EMPTY) == 0) { + for (i = 0; i < 8; i++) { + csa->priv2.puq[i].mfc_cq_data0_RW = + in_be64(&priv2->puq[i].mfc_cq_data0_RW); + csa->priv2.puq[i].mfc_cq_data1_RW = + in_be64(&priv2->puq[i].mfc_cq_data1_RW); + csa->priv2.puq[i].mfc_cq_data2_RW = + in_be64(&priv2->puq[i].mfc_cq_data2_RW); + csa->priv2.puq[i].mfc_cq_data3_RW = + in_be64(&priv2->puq[i].mfc_cq_data3_RW); + } + for (i = 0; i < 16; i++) { + csa->priv2.spuq[i].mfc_cq_data0_RW = + in_be64(&priv2->spuq[i].mfc_cq_data0_RW); + csa->priv2.spuq[i].mfc_cq_data1_RW = + in_be64(&priv2->spuq[i].mfc_cq_data1_RW); + csa->priv2.spuq[i].mfc_cq_data2_RW = + in_be64(&priv2->spuq[i].mfc_cq_data2_RW); + csa->priv2.spuq[i].mfc_cq_data3_RW = + in_be64(&priv2->spuq[i].mfc_cq_data3_RW); + } + } +} + +static inline void save_ppu_querymask(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 20: + * Save the PPU_QueryMask register + * in the CSA. + */ + csa->prob.dma_querymask_RW = in_be32(&prob->dma_querymask_RW); +} + +static inline void save_ppu_querytype(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 21: + * Save the PPU_QueryType register + * in the CSA. + */ + csa->prob.dma_querytype_RW = in_be32(&prob->dma_querytype_RW); +} + +static inline void save_ppu_tagstatus(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save the Prxy_TagStatus register in the CSA. + * + * It is unnecessary to restore dma_tagstatus_R, however, + * dma_tagstatus_R in the CSA is accessed via backing_ops, so + * we must save it. + */ + csa->prob.dma_tagstatus_R = in_be32(&prob->dma_tagstatus_R); +} + +static inline void save_mfc_csr_tsq(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 22: + * Save the MFC_CSR_TSQ register + * in the LSCSA. + */ + csa->priv2.spu_tag_status_query_RW = + in_be64(&priv2->spu_tag_status_query_RW); +} + +static inline void save_mfc_csr_cmd(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 23: + * Save the MFC_CSR_CMD1 and MFC_CSR_CMD2 + * registers in the CSA. + */ + csa->priv2.spu_cmd_buf1_RW = in_be64(&priv2->spu_cmd_buf1_RW); + csa->priv2.spu_cmd_buf2_RW = in_be64(&priv2->spu_cmd_buf2_RW); +} + +static inline void save_mfc_csr_ato(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 24: + * Save the MFC_CSR_ATO register in + * the CSA. + */ + csa->priv2.spu_atomic_status_RW = in_be64(&priv2->spu_atomic_status_RW); +} + +static inline void save_mfc_tclass_id(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 25: + * Save the MFC_TCLASS_ID register in + * the CSA. + */ + csa->priv1.mfc_tclass_id_RW = spu_mfc_tclass_id_get(spu); +} + +static inline void set_mfc_tclass_id(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 26: + * Restore, Step 23. + * Write the MFC_TCLASS_ID register with + * the value 0x10000000. + */ + spu_mfc_tclass_id_set(spu, 0x10000000); + eieio(); +} + +static inline void purge_mfc_queue(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 27: + * Restore, Step 14. + * Write MFC_CNTL[Pc]=1 (purge queue). + */ + out_be64(&priv2->mfc_control_RW, + MFC_CNTL_PURGE_DMA_REQUEST | + MFC_CNTL_SUSPEND_MASK); + eieio(); +} + +static inline void wait_purge_complete(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 28: + * Poll MFC_CNTL[Ps] until value '11' is read + * (purge complete). + */ + POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) & + MFC_CNTL_PURGE_DMA_STATUS_MASK) == + MFC_CNTL_PURGE_DMA_COMPLETE); +} + +static inline void setup_mfc_sr1(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 30: + * Restore, Step 18: + * Write MFC_SR1 with MFC_SR1[D=0,S=1] and + * MFC_SR1[TL,R,Pr,T] set correctly for the + * OS specific environment. + * + * Implementation note: The SPU-side code + * for save/restore is privileged, so the + * MFC_SR1[Pr] bit is not set. + * + */ + spu_mfc_sr1_set(spu, (MFC_STATE1_MASTER_RUN_CONTROL_MASK | + MFC_STATE1_RELOCATE_MASK | + MFC_STATE1_BUS_TLBIE_MASK)); +} + +static inline void save_spu_npc(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 31: + * Save SPU_NPC in the CSA. + */ + csa->prob.spu_npc_RW = in_be32(&prob->spu_npc_RW); +} + +static inline void save_spu_privcntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 32: + * Save SPU_PrivCntl in the CSA. + */ + csa->priv2.spu_privcntl_RW = in_be64(&priv2->spu_privcntl_RW); +} + +static inline void reset_spu_privcntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 33: + * Restore, Step 16: + * Write SPU_PrivCntl[S,Le,A] fields reset to 0. + */ + out_be64(&priv2->spu_privcntl_RW, 0UL); + eieio(); +} + +static inline void save_spu_lslr(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 34: + * Save SPU_LSLR in the CSA. + */ + csa->priv2.spu_lslr_RW = in_be64(&priv2->spu_lslr_RW); +} + +static inline void reset_spu_lslr(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 35: + * Restore, Step 17. + * Reset SPU_LSLR. + */ + out_be64(&priv2->spu_lslr_RW, LS_ADDR_MASK); + eieio(); +} + +static inline void save_spu_cfg(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 36: + * Save SPU_Cfg in the CSA. + */ + csa->priv2.spu_cfg_RW = in_be64(&priv2->spu_cfg_RW); +} + +static inline void save_pm_trace(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 37: + * Save PM_Trace_Tag_Wait_Mask in the CSA. + * Not performed by this implementation. + */ +} + +static inline void save_mfc_rag(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 38: + * Save RA_GROUP_ID register and the + * RA_ENABLE reigster in the CSA. + */ + csa->priv1.resource_allocation_groupID_RW = + spu_resource_allocation_groupID_get(spu); + csa->priv1.resource_allocation_enable_RW = + spu_resource_allocation_enable_get(spu); +} + +static inline void save_ppu_mb_stat(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 39: + * Save MB_Stat register in the CSA. + */ + csa->prob.mb_stat_R = in_be32(&prob->mb_stat_R); +} + +static inline void save_ppu_mb(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 40: + * Save the PPU_MB register in the CSA. + */ + csa->prob.pu_mb_R = in_be32(&prob->pu_mb_R); +} + +static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 41: + * Save the PPUINT_MB register in the CSA. + */ + csa->priv2.puint_mb_R = in_be64(&priv2->puint_mb_R); +} + +static inline void save_ch_part1(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + int i; + + /* Save, Step 42: + */ + + /* Save CH 1, without channel count */ + out_be64(&priv2->spu_chnlcntptr_RW, 1); + csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW); + + /* Save the following CH: [0,3,4,24,25,27] */ + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + csa->spu_chnldata_RW[idx] = in_be64(&priv2->spu_chnldata_RW); + csa->spu_chnlcnt_RW[idx] = in_be64(&priv2->spu_chnlcnt_RW); + out_be64(&priv2->spu_chnldata_RW, 0UL); + out_be64(&priv2->spu_chnlcnt_RW, 0UL); + eieio(); + } +} + +static inline void save_spu_mb(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + int i; + + /* Save, Step 43: + * Save SPU Read Mailbox Channel. + */ + out_be64(&priv2->spu_chnlcntptr_RW, 29UL); + eieio(); + csa->spu_chnlcnt_RW[29] = in_be64(&priv2->spu_chnlcnt_RW); + for (i = 0; i < 4; i++) { + csa->spu_mailbox_data[i] = in_be64(&priv2->spu_chnldata_RW); + } + out_be64(&priv2->spu_chnlcnt_RW, 0UL); + eieio(); +} + +static inline void save_mfc_cmd(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 44: + * Save MFC_CMD Channel. + */ + out_be64(&priv2->spu_chnlcntptr_RW, 21UL); + eieio(); + csa->spu_chnlcnt_RW[21] = in_be64(&priv2->spu_chnlcnt_RW); + eieio(); +} + +static inline void reset_ch(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 ch_indices[4] = { 21UL, 23UL, 28UL, 30UL }; + u64 ch_counts[4] = { 16UL, 1UL, 1UL, 1UL }; + u64 idx; + int i; + + /* Save, Step 45: + * Reset the following CH: [21, 23, 28, 30] + */ + for (i = 0; i < 4; i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]); + eieio(); + } +} + +static inline void resume_mfc_queue(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 46: + * Restore, Step 25. + * Write MFC_CNTL[Sc]=0 (resume queue processing). + */ + out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE); +} + +static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu, + unsigned int *code, int code_size) +{ + /* Save, Step 47: + * Restore, Step 30. + * If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All + * register, then initialize SLB_VSID and SLB_ESID + * to provide access to SPU context save code and + * LSCSA. + * + * This implementation places both the context + * switch code and LSCSA in kernel address space. + * + * Further this implementation assumes that the + * MFC_SR1[R]=1 (in other words, assume that + * translation is desired by OS environment). + */ + spu_invalidate_slbs(spu); + spu_setup_kernel_slbs(spu, csa->lscsa, code, code_size); +} + +static inline void set_switch_active(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 48: + * Restore, Step 23. + * Change the software context switch pending flag + * to context switch active. This implementation does + * not uses a switch active flag. + * + * Now that we have saved the mfc in the csa, we can add in the + * restart command if an exception occurred. + */ + if (test_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags)) + csa->priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND; + clear_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags); + mb(); +} + +static inline void enable_interrupts(struct spu_state *csa, struct spu *spu) +{ + unsigned long class1_mask = CLASS1_ENABLE_SEGMENT_FAULT_INTR | + CLASS1_ENABLE_STORAGE_FAULT_INTR; + + /* Save, Step 49: + * Restore, Step 22: + * Reset and then enable interrupts, as + * needed by OS. + * + * This implementation enables only class1 + * (translation) interrupts. + */ + spin_lock_irq(&spu->register_lock); + spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK); + spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK); + spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK); + spu_int_mask_set(spu, 0, 0ul); + spu_int_mask_set(spu, 1, class1_mask); + spu_int_mask_set(spu, 2, 0ul); + spin_unlock_irq(&spu->register_lock); +} + +static inline int send_mfc_dma(struct spu *spu, unsigned long ea, + unsigned int ls_offset, unsigned int size, + unsigned int tag, unsigned int rclass, + unsigned int cmd) +{ + struct spu_problem __iomem *prob = spu->problem; + union mfc_tag_size_class_cmd command; + unsigned int transfer_size; + volatile unsigned int status = 0x0; + + while (size > 0) { + transfer_size = + (size > MFC_MAX_DMA_SIZE) ? MFC_MAX_DMA_SIZE : size; + command.u.mfc_size = transfer_size; + command.u.mfc_tag = tag; + command.u.mfc_rclassid = rclass; + command.u.mfc_cmd = cmd; + do { + out_be32(&prob->mfc_lsa_W, ls_offset); + out_be64(&prob->mfc_ea_W, ea); + out_be64(&prob->mfc_union_W.all64, command.all64); + status = + in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32); + if (unlikely(status & 0x2)) { + cpu_relax(); + } + } while (status & 0x3); + size -= transfer_size; + ea += transfer_size; + ls_offset += transfer_size; + } + return 0; +} + +static inline void save_ls_16kb(struct spu_state *csa, struct spu *spu) +{ + unsigned long addr = (unsigned long)&csa->lscsa->ls[0]; + unsigned int ls_offset = 0x0; + unsigned int size = 16384; + unsigned int tag = 0; + unsigned int rclass = 0; + unsigned int cmd = MFC_PUT_CMD; + + /* Save, Step 50: + * Issue a DMA command to copy the first 16K bytes + * of local storage to the CSA. + */ + send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); +} + +static inline void set_spu_npc(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 51: + * Restore, Step 31. + * Write SPU_NPC[IE]=0 and SPU_NPC[LSA] to entry + * point address of context save code in local + * storage. + * + * This implementation uses SPU-side save/restore + * programs with entry points at LSA of 0. + */ + out_be32(&prob->spu_npc_RW, 0); + eieio(); +} + +static inline void set_signot1(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + union { + u64 ull; + u32 ui[2]; + } addr64; + + /* Save, Step 52: + * Restore, Step 32: + * Write SPU_Sig_Notify_1 register with upper 32-bits + * of the CSA.LSCSA effective address. + */ + addr64.ull = (u64) csa->lscsa; + out_be32(&prob->signal_notify1, addr64.ui[0]); + eieio(); +} + +static inline void set_signot2(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + union { + u64 ull; + u32 ui[2]; + } addr64; + + /* Save, Step 53: + * Restore, Step 33: + * Write SPU_Sig_Notify_2 register with lower 32-bits + * of the CSA.LSCSA effective address. + */ + addr64.ull = (u64) csa->lscsa; + out_be32(&prob->signal_notify2, addr64.ui[1]); + eieio(); +} + +static inline void send_save_code(struct spu_state *csa, struct spu *spu) +{ + unsigned long addr = (unsigned long)&spu_save_code[0]; + unsigned int ls_offset = 0x0; + unsigned int size = sizeof(spu_save_code); + unsigned int tag = 0; + unsigned int rclass = 0; + unsigned int cmd = MFC_GETFS_CMD; + + /* Save, Step 54: + * Issue a DMA command to copy context save code + * to local storage and start SPU. + */ + send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); +} + +static inline void set_ppu_querymask(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 55: + * Restore, Step 38. + * Write PPU_QueryMask=1 (enable Tag Group 0) + * and issue eieio instruction. + */ + out_be32(&prob->dma_querymask_RW, MFC_TAGID_TO_TAGMASK(0)); + eieio(); +} + +static inline void wait_tag_complete(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 mask = MFC_TAGID_TO_TAGMASK(0); + unsigned long flags; + + /* Save, Step 56: + * Restore, Step 39. + * Restore, Step 39. + * Restore, Step 46. + * Poll PPU_TagStatus[gn] until 01 (Tag group 0 complete) + * or write PPU_QueryType[TS]=01 and wait for Tag Group + * Complete Interrupt. Write INT_Stat_Class0 or + * INT_Stat_Class2 with value of 'handled'. + */ + POLL_WHILE_FALSE(in_be32(&prob->dma_tagstatus_R) & mask); + + local_irq_save(flags); + spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK); + spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK); + local_irq_restore(flags); +} + +static inline void wait_spu_stopped(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + unsigned long flags; + + /* Save, Step 57: + * Restore, Step 40. + * Poll until SPU_Status[R]=0 or wait for SPU Class 0 + * or SPU Class 2 interrupt. Write INT_Stat_class0 + * or INT_Stat_class2 with value of handled. + */ + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING); + + local_irq_save(flags); + spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK); + spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK); + local_irq_restore(flags); +} + +static inline int check_save_status(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 complete; + + /* Save, Step 54: + * If SPU_Status[P]=1 and SPU_Status[SC] = "success", + * context save succeeded, otherwise context save + * failed. + */ + complete = ((SPU_SAVE_COMPLETE << SPU_STOP_STATUS_SHIFT) | + SPU_STATUS_STOPPED_BY_STOP); + return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0; +} + +static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 4: + * If required, notify the "using application" that + * the SPU task has been terminated. TBD. + */ +} + +static inline void suspend_mfc_and_halt_decr(struct spu_state *csa, + struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 7: + * Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend + * the queue and halt the decrementer. + */ + out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE | + MFC_CNTL_DECREMENTER_HALTED); + eieio(); +} + +static inline void wait_suspend_mfc_complete(struct spu_state *csa, + struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 8: + * Restore, Step 47. + * Poll MFC_CNTL[Ss] until 11 is returned. + */ + POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) & + MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == + MFC_CNTL_SUSPEND_COMPLETE); +} + +static inline int suspend_spe(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 9: + * If SPU_Status[R]=1, stop SPU execution + * and wait for stop to complete. + * + * Returns 1 if SPU_Status[R]=1 on entry. + * 0 otherwise + */ + if (in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) { + if (in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_EXIT_STATUS) { + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } + if ((in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_LOAD_STATUS) + || (in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_STATE)) { + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + out_be32(&prob->spu_runcntl_RW, 0x2); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } + if (in_be32(&prob->spu_status_R) & + SPU_STATUS_WAITING_FOR_CHANNEL) { + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } + return 1; + } + return 0; +} + +static inline void clear_spu_status(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 10: + * If SPU_Status[R]=0 and SPU_Status[E,L,IS]=1, + * release SPU from isolate state. + */ + if (!(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING)) { + if (in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_EXIT_STATUS) { + spu_mfc_sr1_set(spu, + MFC_STATE1_MASTER_RUN_CONTROL_MASK); + eieio(); + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } + if ((in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_LOAD_STATUS) + || (in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_STATE)) { + spu_mfc_sr1_set(spu, + MFC_STATE1_MASTER_RUN_CONTROL_MASK); + eieio(); + out_be32(&prob->spu_runcntl_RW, 0x2); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } + } +} + +static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + u64 idx; + int i; + + /* Restore, Step 20: + */ + + /* Reset CH 1 */ + out_be64(&priv2->spu_chnlcntptr_RW, 1); + out_be64(&priv2->spu_chnldata_RW, 0UL); + + /* Reset the following CH: [0,3,4,24,25,27] */ + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + out_be64(&priv2->spu_chnldata_RW, 0UL); + out_be64(&priv2->spu_chnlcnt_RW, 0UL); + eieio(); + } +} + +static inline void reset_ch_part2(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 ch_indices[5] = { 21UL, 23UL, 28UL, 29UL, 30UL }; + u64 ch_counts[5] = { 16UL, 1UL, 1UL, 0UL, 1UL }; + u64 idx; + int i; + + /* Restore, Step 21: + * Reset the following CH: [21, 23, 28, 29, 30] + */ + for (i = 0; i < 5; i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]); + eieio(); + } +} + +static inline void setup_spu_status_part1(struct spu_state *csa, + struct spu *spu) +{ + u32 status_P = SPU_STATUS_STOPPED_BY_STOP; + u32 status_I = SPU_STATUS_INVALID_INSTR; + u32 status_H = SPU_STATUS_STOPPED_BY_HALT; + u32 status_S = SPU_STATUS_SINGLE_STEP; + u32 status_S_I = SPU_STATUS_SINGLE_STEP | SPU_STATUS_INVALID_INSTR; + u32 status_S_P = SPU_STATUS_SINGLE_STEP | SPU_STATUS_STOPPED_BY_STOP; + u32 status_P_H = SPU_STATUS_STOPPED_BY_HALT |SPU_STATUS_STOPPED_BY_STOP; + u32 status_P_I = SPU_STATUS_STOPPED_BY_STOP |SPU_STATUS_INVALID_INSTR; + u32 status_code; + + /* Restore, Step 27: + * If the CSA.SPU_Status[I,S,H,P]=1 then add the correct + * instruction sequence to the end of the SPU based restore + * code (after the "context restored" stop and signal) to + * restore the correct SPU status. + * + * NOTE: Rather than modifying the SPU executable, we + * instead add a new 'stopped_status' field to the + * LSCSA. The SPU-side restore reads this field and + * takes the appropriate action when exiting. + */ + + status_code = + (csa->prob.spu_status_R >> SPU_STOP_STATUS_SHIFT) & 0xFFFF; + if ((csa->prob.spu_status_R & status_P_I) == status_P_I) { + + /* SPU_Status[P,I]=1 - Illegal Instruction followed + * by Stop and Signal instruction, followed by 'br -4'. + * + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_I; + csa->lscsa->stopped_status.slot[1] = status_code; + + } else if ((csa->prob.spu_status_R & status_P_H) == status_P_H) { + + /* SPU_Status[P,H]=1 - Halt Conditional, followed + * by Stop and Signal instruction, followed by + * 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_H; + csa->lscsa->stopped_status.slot[1] = status_code; + + } else if ((csa->prob.spu_status_R & status_S_P) == status_S_P) { + + /* SPU_Status[S,P]=1 - Stop and Signal instruction + * followed by 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_P; + csa->lscsa->stopped_status.slot[1] = status_code; + + } else if ((csa->prob.spu_status_R & status_S_I) == status_S_I) { + + /* SPU_Status[S,I]=1 - Illegal instruction followed + * by 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_I; + csa->lscsa->stopped_status.slot[1] = status_code; + + } else if ((csa->prob.spu_status_R & status_P) == status_P) { + + /* SPU_Status[P]=1 - Stop and Signal instruction + * followed by 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P; + csa->lscsa->stopped_status.slot[1] = status_code; + + } else if ((csa->prob.spu_status_R & status_H) == status_H) { + + /* SPU_Status[H]=1 - Halt Conditional, followed + * by 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_H; + + } else if ((csa->prob.spu_status_R & status_S) == status_S) { + + /* SPU_Status[S]=1 - Two nop instructions. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S; + + } else if ((csa->prob.spu_status_R & status_I) == status_I) { + + /* SPU_Status[I]=1 - Illegal instruction followed + * by 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_I; + + } +} + +static inline void setup_spu_status_part2(struct spu_state *csa, + struct spu *spu) +{ + u32 mask; + + /* Restore, Step 28: + * If the CSA.SPU_Status[I,S,H,P,R]=0 then + * add a 'br *' instruction to the end of + * the SPU based restore code. + * + * NOTE: Rather than modifying the SPU executable, we + * instead add a new 'stopped_status' field to the + * LSCSA. The SPU-side restore reads this field and + * takes the appropriate action when exiting. + */ + mask = SPU_STATUS_INVALID_INSTR | + SPU_STATUS_SINGLE_STEP | + SPU_STATUS_STOPPED_BY_HALT | + SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING; + if (!(csa->prob.spu_status_R & mask)) { + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_R; + } +} + +static inline void restore_mfc_rag(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 29: + * Restore RA_GROUP_ID register and the + * RA_ENABLE reigster from the CSA. + */ + spu_resource_allocation_groupID_set(spu, + csa->priv1.resource_allocation_groupID_RW); + spu_resource_allocation_enable_set(spu, + csa->priv1.resource_allocation_enable_RW); +} + +static inline void send_restore_code(struct spu_state *csa, struct spu *spu) +{ + unsigned long addr = (unsigned long)&spu_restore_code[0]; + unsigned int ls_offset = 0x0; + unsigned int size = sizeof(spu_restore_code); + unsigned int tag = 0; + unsigned int rclass = 0; + unsigned int cmd = MFC_GETFS_CMD; + + /* Restore, Step 37: + * Issue MFC DMA command to copy context + * restore code to local storage. + */ + send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); +} + +static inline void setup_decr(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 34: + * If CSA.MFC_CNTL[Ds]=1 (decrementer was + * running) then adjust decrementer, set + * decrementer running status in LSCSA, + * and set decrementer "wrapped" status + * in LSCSA. + */ + if (csa->priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) { + cycles_t resume_time = get_cycles(); + cycles_t delta_time = resume_time - csa->suspend_time; + + csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING; + if (csa->lscsa->decr.slot[0] < delta_time) { + csa->lscsa->decr_status.slot[0] |= + SPU_DECR_STATUS_WRAPPED; + } + + csa->lscsa->decr.slot[0] -= delta_time; + } else { + csa->lscsa->decr_status.slot[0] = 0; + } +} + +static inline void setup_ppu_mb(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 35: + * Copy the CSA.PU_MB data into the LSCSA. + */ + csa->lscsa->ppu_mb.slot[0] = csa->prob.pu_mb_R; +} + +static inline void setup_ppuint_mb(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 36: + * Copy the CSA.PUINT_MB data into the LSCSA. + */ + csa->lscsa->ppuint_mb.slot[0] = csa->priv2.puint_mb_R; +} + +static inline int check_restore_status(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 complete; + + /* Restore, Step 40: + * If SPU_Status[P]=1 and SPU_Status[SC] = "success", + * context restore succeeded, otherwise context restore + * failed. + */ + complete = ((SPU_RESTORE_COMPLETE << SPU_STOP_STATUS_SHIFT) | + SPU_STATUS_STOPPED_BY_STOP); + return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0; +} + +static inline void restore_spu_privcntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 41: + * Restore SPU_PrivCntl from the CSA. + */ + out_be64(&priv2->spu_privcntl_RW, csa->priv2.spu_privcntl_RW); + eieio(); +} + +static inline void restore_status_part1(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 mask; + + /* Restore, Step 42: + * If any CSA.SPU_Status[I,S,H,P]=1, then + * restore the error or single step state. + */ + mask = SPU_STATUS_INVALID_INSTR | + SPU_STATUS_SINGLE_STEP | + SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP; + if (csa->prob.spu_status_R & mask) { + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } +} + +static inline void restore_status_part2(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 mask; + + /* Restore, Step 43: + * If all CSA.SPU_Status[I,S,H,P,R]=0 then write + * SPU_RunCntl[R0R1]='01', wait for SPU_Status[R]=1, + * then write '00' to SPU_RunCntl[R0R1] and wait + * for SPU_Status[R]=0. + */ + mask = SPU_STATUS_INVALID_INSTR | + SPU_STATUS_SINGLE_STEP | + SPU_STATUS_STOPPED_BY_HALT | + SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING; + if (!(csa->prob.spu_status_R & mask)) { + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE); + eieio(); + POLL_WHILE_FALSE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } +} + +static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu) +{ + unsigned long addr = (unsigned long)&csa->lscsa->ls[0]; + unsigned int ls_offset = 0x0; + unsigned int size = 16384; + unsigned int tag = 0; + unsigned int rclass = 0; + unsigned int cmd = MFC_GET_CMD; + + /* Restore, Step 44: + * Issue a DMA command to restore the first + * 16kb of local storage from CSA. + */ + send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); +} + +static inline void suspend_mfc(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 47. + * Write MFC_Cntl[Sc,Sm]='1','0' to suspend + * the queue. + */ + out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE); + eieio(); +} + +static inline void clear_interrupts(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 49: + * Write INT_MASK_class0 with value of 0. + * Write INT_MASK_class1 with value of 0. + * Write INT_MASK_class2 with value of 0. + * Write INT_STAT_class0 with value of -1. + * Write INT_STAT_class1 with value of -1. + * Write INT_STAT_class2 with value of -1. + */ + spin_lock_irq(&spu->register_lock); + spu_int_mask_set(spu, 0, 0ul); + spu_int_mask_set(spu, 1, 0ul); + spu_int_mask_set(spu, 2, 0ul); + spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK); + spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK); + spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK); + spin_unlock_irq(&spu->register_lock); +} + +static inline void restore_mfc_queues(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + int i; + + /* Restore, Step 50: + * If MFC_Cntl[Se]!=0 then restore + * MFC command queues. + */ + if ((csa->priv2.mfc_control_RW & MFC_CNTL_DMA_QUEUES_EMPTY_MASK) == 0) { + for (i = 0; i < 8; i++) { + out_be64(&priv2->puq[i].mfc_cq_data0_RW, + csa->priv2.puq[i].mfc_cq_data0_RW); + out_be64(&priv2->puq[i].mfc_cq_data1_RW, + csa->priv2.puq[i].mfc_cq_data1_RW); + out_be64(&priv2->puq[i].mfc_cq_data2_RW, + csa->priv2.puq[i].mfc_cq_data2_RW); + out_be64(&priv2->puq[i].mfc_cq_data3_RW, + csa->priv2.puq[i].mfc_cq_data3_RW); + } + for (i = 0; i < 16; i++) { + out_be64(&priv2->spuq[i].mfc_cq_data0_RW, + csa->priv2.spuq[i].mfc_cq_data0_RW); + out_be64(&priv2->spuq[i].mfc_cq_data1_RW, + csa->priv2.spuq[i].mfc_cq_data1_RW); + out_be64(&priv2->spuq[i].mfc_cq_data2_RW, + csa->priv2.spuq[i].mfc_cq_data2_RW); + out_be64(&priv2->spuq[i].mfc_cq_data3_RW, + csa->priv2.spuq[i].mfc_cq_data3_RW); + } + } + eieio(); +} + +static inline void restore_ppu_querymask(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 51: + * Restore the PPU_QueryMask register from CSA. + */ + out_be32(&prob->dma_querymask_RW, csa->prob.dma_querymask_RW); + eieio(); +} + +static inline void restore_ppu_querytype(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 52: + * Restore the PPU_QueryType register from CSA. + */ + out_be32(&prob->dma_querytype_RW, csa->prob.dma_querytype_RW); + eieio(); +} + +static inline void restore_mfc_csr_tsq(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 53: + * Restore the MFC_CSR_TSQ register from CSA. + */ + out_be64(&priv2->spu_tag_status_query_RW, + csa->priv2.spu_tag_status_query_RW); + eieio(); +} + +static inline void restore_mfc_csr_cmd(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 54: + * Restore the MFC_CSR_CMD1 and MFC_CSR_CMD2 + * registers from CSA. + */ + out_be64(&priv2->spu_cmd_buf1_RW, csa->priv2.spu_cmd_buf1_RW); + out_be64(&priv2->spu_cmd_buf2_RW, csa->priv2.spu_cmd_buf2_RW); + eieio(); +} + +static inline void restore_mfc_csr_ato(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 55: + * Restore the MFC_CSR_ATO register from CSA. + */ + out_be64(&priv2->spu_atomic_status_RW, csa->priv2.spu_atomic_status_RW); +} + +static inline void restore_mfc_tclass_id(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 56: + * Restore the MFC_TCLASS_ID register from CSA. + */ + spu_mfc_tclass_id_set(spu, csa->priv1.mfc_tclass_id_RW); + eieio(); +} + +static inline void set_llr_event(struct spu_state *csa, struct spu *spu) +{ + u64 ch0_cnt, ch0_data; + u64 ch1_data; + + /* Restore, Step 57: + * Set the Lock Line Reservation Lost Event by: + * 1. OR CSA.SPU_Event_Status with bit 21 (Lr) set to 1. + * 2. If CSA.SPU_Channel_0_Count=0 and + * CSA.SPU_Wr_Event_Mask[Lr]=1 and + * CSA.SPU_Event_Status[Lr]=0 then set + * CSA.SPU_Event_Status_Count=1. + */ + ch0_cnt = csa->spu_chnlcnt_RW[0]; + ch0_data = csa->spu_chnldata_RW[0]; + ch1_data = csa->spu_chnldata_RW[1]; + csa->spu_chnldata_RW[0] |= MFC_LLR_LOST_EVENT; + if ((ch0_cnt == 0) && !(ch0_data & MFC_LLR_LOST_EVENT) && + (ch1_data & MFC_LLR_LOST_EVENT)) { + csa->spu_chnlcnt_RW[0] = 1; + } +} + +static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 58: + * If the status of the CSA software decrementer + * "wrapped" flag is set, OR in a '1' to + * CSA.SPU_Event_Status[Tm]. + */ + if (!(csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED)) + return; + + if ((csa->spu_chnlcnt_RW[0] == 0) && + (csa->spu_chnldata_RW[1] & 0x20) && + !(csa->spu_chnldata_RW[0] & 0x20)) + csa->spu_chnlcnt_RW[0] = 1; + + csa->spu_chnldata_RW[0] |= 0x20; +} + +static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + int i; + + /* Restore, Step 59: + * Restore the following CH: [0,3,4,24,25,27] + */ + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[idx]); + out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[idx]); + eieio(); + } +} + +static inline void restore_ch_part2(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 ch_indices[3] = { 9UL, 21UL, 23UL }; + u64 ch_counts[3] = { 1UL, 16UL, 1UL }; + u64 idx; + int i; + + /* Restore, Step 60: + * Restore the following CH: [9,21,23]. + */ + ch_counts[0] = 1UL; + ch_counts[1] = csa->spu_chnlcnt_RW[21]; + ch_counts[2] = 1UL; + for (i = 0; i < 3; i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]); + eieio(); + } +} + +static inline void restore_spu_lslr(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 61: + * Restore the SPU_LSLR register from CSA. + */ + out_be64(&priv2->spu_lslr_RW, csa->priv2.spu_lslr_RW); + eieio(); +} + +static inline void restore_spu_cfg(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 62: + * Restore the SPU_Cfg register from CSA. + */ + out_be64(&priv2->spu_cfg_RW, csa->priv2.spu_cfg_RW); + eieio(); +} + +static inline void restore_pm_trace(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 63: + * Restore PM_Trace_Tag_Wait_Mask from CSA. + * Not performed by this implementation. + */ +} + +static inline void restore_spu_npc(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 64: + * Restore SPU_NPC from CSA. + */ + out_be32(&prob->spu_npc_RW, csa->prob.spu_npc_RW); + eieio(); +} + +static inline void restore_spu_mb(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + int i; + + /* Restore, Step 65: + * Restore MFC_RdSPU_MB from CSA. + */ + out_be64(&priv2->spu_chnlcntptr_RW, 29UL); + eieio(); + out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[29]); + for (i = 0; i < 4; i++) { + out_be64(&priv2->spu_chnldata_RW, csa->spu_mailbox_data[i]); + } + eieio(); +} + +static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 dummy = 0; + + /* Restore, Step 66: + * If CSA.MB_Stat[P]=0 (mailbox empty) then + * read from the PPU_MB register. + */ + if ((csa->prob.mb_stat_R & 0xFF) == 0) { + dummy = in_be32(&prob->pu_mb_R); + eieio(); + } +} + +static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 dummy = 0UL; + + /* Restore, Step 66: + * If CSA.MB_Stat[I]=0 (mailbox empty) then + * read from the PPUINT_MB register. + */ + if ((csa->prob.mb_stat_R & 0xFF0000) == 0) { + dummy = in_be64(&priv2->puint_mb_R); + eieio(); + spu_int_stat_clear(spu, 2, CLASS2_ENABLE_MAILBOX_INTR); + eieio(); + } +} + +static inline void restore_mfc_sr1(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 69: + * Restore the MFC_SR1 register from CSA. + */ + spu_mfc_sr1_set(spu, csa->priv1.mfc_sr1_RW); + eieio(); +} + +static inline void set_int_route(struct spu_state *csa, struct spu *spu) +{ + struct spu_context *ctx = spu->ctx; + + spu_cpu_affinity_set(spu, ctx->last_ran); +} + +static inline void restore_other_spu_access(struct spu_state *csa, + struct spu *spu) +{ + /* Restore, Step 70: + * Restore other SPU mappings to this SPU. TBD. + */ +} + +static inline void restore_spu_runcntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 71: + * If CSA.SPU_Status[R]=1 then write + * SPU_RunCntl[R0R1]='01'. + */ + if (csa->prob.spu_status_R & SPU_STATUS_RUNNING) { + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE); + eieio(); + } +} + +static inline void restore_mfc_cntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 72: + * Restore the MFC_CNTL register for the CSA. + */ + out_be64(&priv2->mfc_control_RW, csa->priv2.mfc_control_RW); + eieio(); + + /* + * The queue is put back into the same state that was evident prior to + * the context switch. The suspend flag is added to the saved state in + * the csa, if the operational state was suspending or suspended. In + * this case, the code that suspended the mfc is responsible for + * continuing it. Note that SPE faults do not change the operational + * state of the spu. + */ +} + +static inline void enable_user_access(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 73: + * Enable user-space access (if provided) to this + * SPU by mapping the virtual pages assigned to + * the SPU memory-mapped I/O (MMIO) for problem + * state. TBD. + */ +} + +static inline void reset_switch_active(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 74: + * Reset the "context switch active" flag. + * Not performed by this implementation. + */ +} + +static inline void reenable_interrupts(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 75: + * Re-enable SPU interrupts. + */ + spin_lock_irq(&spu->register_lock); + spu_int_mask_set(spu, 0, csa->priv1.int_mask_class0_RW); + spu_int_mask_set(spu, 1, csa->priv1.int_mask_class1_RW); + spu_int_mask_set(spu, 2, csa->priv1.int_mask_class2_RW); + spin_unlock_irq(&spu->register_lock); +} + +static int quiece_spu(struct spu_state *prev, struct spu *spu) +{ + /* + * Combined steps 2-18 of SPU context save sequence, which + * quiesce the SPU state (disable SPU execution, MFC command + * queues, decrementer, SPU interrupts, etc.). + * + * Returns 0 on success. + * 2 if failed step 2. + * 6 if failed step 6. + */ + + if (check_spu_isolate(prev, spu)) { /* Step 2. */ + return 2; + } + disable_interrupts(prev, spu); /* Step 3. */ + set_watchdog_timer(prev, spu); /* Step 4. */ + inhibit_user_access(prev, spu); /* Step 5. */ + if (check_spu_isolate(prev, spu)) { /* Step 6. */ + return 6; + } + set_switch_pending(prev, spu); /* Step 7. */ + save_mfc_cntl(prev, spu); /* Step 8. */ + save_spu_runcntl(prev, spu); /* Step 9. */ + save_mfc_sr1(prev, spu); /* Step 10. */ + save_spu_status(prev, spu); /* Step 11. */ + save_mfc_stopped_status(prev, spu); /* Step 12. */ + halt_mfc_decr(prev, spu); /* Step 13. */ + save_timebase(prev, spu); /* Step 14. */ + remove_other_spu_access(prev, spu); /* Step 15. */ + do_mfc_mssync(prev, spu); /* Step 16. */ + issue_mfc_tlbie(prev, spu); /* Step 17. */ + handle_pending_interrupts(prev, spu); /* Step 18. */ + + return 0; +} + +static void save_csa(struct spu_state *prev, struct spu *spu) +{ + /* + * Combine steps 19-44 of SPU context save sequence, which + * save regions of the privileged & problem state areas. + */ + + save_mfc_queues(prev, spu); /* Step 19. */ + save_ppu_querymask(prev, spu); /* Step 20. */ + save_ppu_querytype(prev, spu); /* Step 21. */ + save_ppu_tagstatus(prev, spu); /* NEW. */ + save_mfc_csr_tsq(prev, spu); /* Step 22. */ + save_mfc_csr_cmd(prev, spu); /* Step 23. */ + save_mfc_csr_ato(prev, spu); /* Step 24. */ + save_mfc_tclass_id(prev, spu); /* Step 25. */ + set_mfc_tclass_id(prev, spu); /* Step 26. */ + save_mfc_cmd(prev, spu); /* Step 26a - moved from 44. */ + purge_mfc_queue(prev, spu); /* Step 27. */ + wait_purge_complete(prev, spu); /* Step 28. */ + setup_mfc_sr1(prev, spu); /* Step 30. */ + save_spu_npc(prev, spu); /* Step 31. */ + save_spu_privcntl(prev, spu); /* Step 32. */ + reset_spu_privcntl(prev, spu); /* Step 33. */ + save_spu_lslr(prev, spu); /* Step 34. */ + reset_spu_lslr(prev, spu); /* Step 35. */ + save_spu_cfg(prev, spu); /* Step 36. */ + save_pm_trace(prev, spu); /* Step 37. */ + save_mfc_rag(prev, spu); /* Step 38. */ + save_ppu_mb_stat(prev, spu); /* Step 39. */ + save_ppu_mb(prev, spu); /* Step 40. */ + save_ppuint_mb(prev, spu); /* Step 41. */ + save_ch_part1(prev, spu); /* Step 42. */ + save_spu_mb(prev, spu); /* Step 43. */ + reset_ch(prev, spu); /* Step 45. */ +} + +static void save_lscsa(struct spu_state *prev, struct spu *spu) +{ + /* + * Perform steps 46-57 of SPU context save sequence, + * which save regions of the local store and register + * file. + */ + + resume_mfc_queue(prev, spu); /* Step 46. */ + /* Step 47. */ + setup_mfc_slbs(prev, spu, spu_save_code, sizeof(spu_save_code)); + set_switch_active(prev, spu); /* Step 48. */ + enable_interrupts(prev, spu); /* Step 49. */ + save_ls_16kb(prev, spu); /* Step 50. */ + set_spu_npc(prev, spu); /* Step 51. */ + set_signot1(prev, spu); /* Step 52. */ + set_signot2(prev, spu); /* Step 53. */ + send_save_code(prev, spu); /* Step 54. */ + set_ppu_querymask(prev, spu); /* Step 55. */ + wait_tag_complete(prev, spu); /* Step 56. */ + wait_spu_stopped(prev, spu); /* Step 57. */ +} + +static void force_spu_isolate_exit(struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Stop SPE execution and wait for completion. */ + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); + iobarrier_rw(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING); + + /* Restart SPE master runcntl. */ + spu_mfc_sr1_set(spu, MFC_STATE1_MASTER_RUN_CONTROL_MASK); + iobarrier_w(); + + /* Initiate isolate exit request and wait for completion. */ + out_be64(&priv2->spu_privcntl_RW, 4LL); + iobarrier_w(); + out_be32(&prob->spu_runcntl_RW, 2); + iobarrier_rw(); + POLL_WHILE_FALSE((in_be32(&prob->spu_status_R) + & SPU_STATUS_STOPPED_BY_STOP)); + + /* Reset load request to normal. */ + out_be64(&priv2->spu_privcntl_RW, SPU_PRIVCNT_LOAD_REQUEST_NORMAL); + iobarrier_w(); +} + +/** + * stop_spu_isolate + * Check SPU run-control state and force isolated + * exit function as necessary. + */ +static void stop_spu_isolate(struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + if (in_be32(&prob->spu_status_R) & SPU_STATUS_ISOLATED_STATE) { + /* The SPU is in isolated state; the only way + * to get it out is to perform an isolated + * exit (clean) operation. + */ + force_spu_isolate_exit(spu); + } +} + +static void harvest(struct spu_state *prev, struct spu *spu) +{ + /* + * Perform steps 2-25 of SPU context restore sequence, + * which resets an SPU either after a failed save, or + * when using SPU for first time. + */ + + disable_interrupts(prev, spu); /* Step 2. */ + inhibit_user_access(prev, spu); /* Step 3. */ + terminate_spu_app(prev, spu); /* Step 4. */ + set_switch_pending(prev, spu); /* Step 5. */ + stop_spu_isolate(spu); /* NEW. */ + remove_other_spu_access(prev, spu); /* Step 6. */ + suspend_mfc_and_halt_decr(prev, spu); /* Step 7. */ + wait_suspend_mfc_complete(prev, spu); /* Step 8. */ + if (!suspend_spe(prev, spu)) /* Step 9. */ + clear_spu_status(prev, spu); /* Step 10. */ + do_mfc_mssync(prev, spu); /* Step 11. */ + issue_mfc_tlbie(prev, spu); /* Step 12. */ + handle_pending_interrupts(prev, spu); /* Step 13. */ + purge_mfc_queue(prev, spu); /* Step 14. */ + wait_purge_complete(prev, spu); /* Step 15. */ + reset_spu_privcntl(prev, spu); /* Step 16. */ + reset_spu_lslr(prev, spu); /* Step 17. */ + setup_mfc_sr1(prev, spu); /* Step 18. */ + spu_invalidate_slbs(spu); /* Step 19. */ + reset_ch_part1(prev, spu); /* Step 20. */ + reset_ch_part2(prev, spu); /* Step 21. */ + enable_interrupts(prev, spu); /* Step 22. */ + set_switch_active(prev, spu); /* Step 23. */ + set_mfc_tclass_id(prev, spu); /* Step 24. */ + resume_mfc_queue(prev, spu); /* Step 25. */ +} + +static void restore_lscsa(struct spu_state *next, struct spu *spu) +{ + /* + * Perform steps 26-40 of SPU context restore sequence, + * which restores regions of the local store and register + * file. + */ + + set_watchdog_timer(next, spu); /* Step 26. */ + setup_spu_status_part1(next, spu); /* Step 27. */ + setup_spu_status_part2(next, spu); /* Step 28. */ + restore_mfc_rag(next, spu); /* Step 29. */ + /* Step 30. */ + setup_mfc_slbs(next, spu, spu_restore_code, sizeof(spu_restore_code)); + set_spu_npc(next, spu); /* Step 31. */ + set_signot1(next, spu); /* Step 32. */ + set_signot2(next, spu); /* Step 33. */ + setup_decr(next, spu); /* Step 34. */ + setup_ppu_mb(next, spu); /* Step 35. */ + setup_ppuint_mb(next, spu); /* Step 36. */ + send_restore_code(next, spu); /* Step 37. */ + set_ppu_querymask(next, spu); /* Step 38. */ + wait_tag_complete(next, spu); /* Step 39. */ + wait_spu_stopped(next, spu); /* Step 40. */ +} + +static void restore_csa(struct spu_state *next, struct spu *spu) +{ + /* + * Combine steps 41-76 of SPU context restore sequence, which + * restore regions of the privileged & problem state areas. + */ + + restore_spu_privcntl(next, spu); /* Step 41. */ + restore_status_part1(next, spu); /* Step 42. */ + restore_status_part2(next, spu); /* Step 43. */ + restore_ls_16kb(next, spu); /* Step 44. */ + wait_tag_complete(next, spu); /* Step 45. */ + suspend_mfc(next, spu); /* Step 46. */ + wait_suspend_mfc_complete(next, spu); /* Step 47. */ + issue_mfc_tlbie(next, spu); /* Step 48. */ + clear_interrupts(next, spu); /* Step 49. */ + restore_mfc_queues(next, spu); /* Step 50. */ + restore_ppu_querymask(next, spu); /* Step 51. */ + restore_ppu_querytype(next, spu); /* Step 52. */ + restore_mfc_csr_tsq(next, spu); /* Step 53. */ + restore_mfc_csr_cmd(next, spu); /* Step 54. */ + restore_mfc_csr_ato(next, spu); /* Step 55. */ + restore_mfc_tclass_id(next, spu); /* Step 56. */ + set_llr_event(next, spu); /* Step 57. */ + restore_decr_wrapped(next, spu); /* Step 58. */ + restore_ch_part1(next, spu); /* Step 59. */ + restore_ch_part2(next, spu); /* Step 60. */ + restore_spu_lslr(next, spu); /* Step 61. */ + restore_spu_cfg(next, spu); /* Step 62. */ + restore_pm_trace(next, spu); /* Step 63. */ + restore_spu_npc(next, spu); /* Step 64. */ + restore_spu_mb(next, spu); /* Step 65. */ + check_ppu_mb_stat(next, spu); /* Step 66. */ + check_ppuint_mb_stat(next, spu); /* Step 67. */ + spu_invalidate_slbs(spu); /* Modified Step 68. */ + restore_mfc_sr1(next, spu); /* Step 69. */ + set_int_route(next, spu); /* NEW */ + restore_other_spu_access(next, spu); /* Step 70. */ + restore_spu_runcntl(next, spu); /* Step 71. */ + restore_mfc_cntl(next, spu); /* Step 72. */ + enable_user_access(next, spu); /* Step 73. */ + reset_switch_active(next, spu); /* Step 74. */ + reenable_interrupts(next, spu); /* Step 75. */ +} + +static int __do_spu_save(struct spu_state *prev, struct spu *spu) +{ + int rc; + + /* + * SPU context save can be broken into three phases: + * + * (a) quiesce [steps 2-16]. + * (b) save of CSA, performed by PPE [steps 17-42] + * (c) save of LSCSA, mostly performed by SPU [steps 43-52]. + * + * Returns 0 on success. + * 2,6 if failed to quiece SPU + * 53 if SPU-side of save failed. + */ + + rc = quiece_spu(prev, spu); /* Steps 2-16. */ + switch (rc) { + default: + case 2: + case 6: + harvest(prev, spu); + return rc; + break; + case 0: + break; + } + save_csa(prev, spu); /* Steps 17-43. */ + save_lscsa(prev, spu); /* Steps 44-53. */ + return check_save_status(prev, spu); /* Step 54. */ +} + +static int __do_spu_restore(struct spu_state *next, struct spu *spu) +{ + int rc; + + /* + * SPU context restore can be broken into three phases: + * + * (a) harvest (or reset) SPU [steps 2-24]. + * (b) restore LSCSA [steps 25-40], mostly performed by SPU. + * (c) restore CSA [steps 41-76], performed by PPE. + * + * The 'harvest' step is not performed here, but rather + * as needed below. + */ + + restore_lscsa(next, spu); /* Steps 24-39. */ + rc = check_restore_status(next, spu); /* Step 40. */ + switch (rc) { + default: + /* Failed. Return now. */ + return rc; + break; + case 0: + /* Fall through to next step. */ + break; + } + restore_csa(next, spu); + + return 0; +} + +/** + * spu_save - SPU context save, with locking. + * @prev: pointer to SPU context save area, to be saved. + * @spu: pointer to SPU iomem structure. + * + * Acquire locks, perform the save operation then return. + */ +int spu_save(struct spu_state *prev, struct spu *spu) +{ + int rc; + + acquire_spu_lock(spu); /* Step 1. */ + rc = __do_spu_save(prev, spu); /* Steps 2-53. */ + release_spu_lock(spu); + if (rc != 0 && rc != 2 && rc != 6) { + panic("%s failed on SPU[%d], rc=%d.\n", + __func__, spu->number, rc); + } + return 0; +} +EXPORT_SYMBOL_GPL(spu_save); + +/** + * spu_restore - SPU context restore, with harvest and locking. + * @new: pointer to SPU context save area, to be restored. + * @spu: pointer to SPU iomem structure. + * + * Perform harvest + restore, as we may not be coming + * from a previous successful save operation, and the + * hardware state is unknown. + */ +int spu_restore(struct spu_state *new, struct spu *spu) +{ + int rc; + + acquire_spu_lock(spu); + harvest(NULL, spu); + spu->slb_replace = 0; + rc = __do_spu_restore(new, spu); + release_spu_lock(spu); + if (rc) { + panic("%s failed on SPU[%d] rc=%d.\n", + __func__, spu->number, rc); + } + return rc; +} +EXPORT_SYMBOL_GPL(spu_restore); + +static void init_prob(struct spu_state *csa) +{ + csa->spu_chnlcnt_RW[9] = 1; + csa->spu_chnlcnt_RW[21] = 16; + csa->spu_chnlcnt_RW[23] = 1; + csa->spu_chnlcnt_RW[28] = 1; + csa->spu_chnlcnt_RW[30] = 1; + csa->prob.spu_runcntl_RW = SPU_RUNCNTL_STOP; + csa->prob.mb_stat_R = 0x000400; +} + +static void init_priv1(struct spu_state *csa) +{ + /* Enable decode, relocate, tlbie response, master runcntl. */ + csa->priv1.mfc_sr1_RW = MFC_STATE1_LOCAL_STORAGE_DECODE_MASK | + MFC_STATE1_MASTER_RUN_CONTROL_MASK | + MFC_STATE1_PROBLEM_STATE_MASK | + MFC_STATE1_RELOCATE_MASK | MFC_STATE1_BUS_TLBIE_MASK; + + /* Enable OS-specific set of interrupts. */ + csa->priv1.int_mask_class0_RW = CLASS0_ENABLE_DMA_ALIGNMENT_INTR | + CLASS0_ENABLE_INVALID_DMA_COMMAND_INTR | + CLASS0_ENABLE_SPU_ERROR_INTR; + csa->priv1.int_mask_class1_RW = CLASS1_ENABLE_SEGMENT_FAULT_INTR | + CLASS1_ENABLE_STORAGE_FAULT_INTR; + csa->priv1.int_mask_class2_RW = CLASS2_ENABLE_SPU_STOP_INTR | + CLASS2_ENABLE_SPU_HALT_INTR | + CLASS2_ENABLE_SPU_DMA_TAG_GROUP_COMPLETE_INTR; +} + +static void init_priv2(struct spu_state *csa) +{ + csa->priv2.spu_lslr_RW = LS_ADDR_MASK; + csa->priv2.mfc_control_RW = MFC_CNTL_RESUME_DMA_QUEUE | + MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION | + MFC_CNTL_DMA_QUEUES_EMPTY_MASK; +} + +/** + * spu_alloc_csa - allocate and initialize an SPU context save area. + * + * Allocate and initialize the contents of an SPU context save area. + * This includes enabling address translation, interrupt masks, etc., + * as appropriate for the given OS environment. + * + * Note that storage for the 'lscsa' is allocated separately, + * as it is by far the largest of the context save regions, + * and may need to be pinned or otherwise specially aligned. + */ +int spu_init_csa(struct spu_state *csa) +{ + int rc; + + if (!csa) + return -EINVAL; + memset(csa, 0, sizeof(struct spu_state)); + + rc = spu_alloc_lscsa(csa); + if (rc) + return rc; + + spin_lock_init(&csa->register_lock); + + init_prob(csa); + init_priv1(csa); + init_priv2(csa); + + return 0; +} + +void spu_fini_csa(struct spu_state *csa) +{ + spu_free_lscsa(csa); +} diff --git a/kernel/arch/powerpc/platforms/cell/spufs/syscalls.c b/kernel/arch/powerpc/platforms/cell/spufs/syscalls.c new file mode 100644 index 000000000..a87200a53 --- /dev/null +++ b/kernel/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -0,0 +1,88 @@ +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/export.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/slab.h> + +#include <asm/uaccess.h> + +#include "spufs.h" + +/** + * sys_spu_run - run code loaded into an SPU + * + * @unpc: next program counter for the SPU + * @ustatus: status of the SPU + * + * This system call transfers the control of execution of a + * user space thread to an SPU. It will return when the + * SPU has finished executing or when it hits an error + * condition and it will be interrupted if a signal needs + * to be delivered to a handler in user space. + * + * The next program counter is set to the passed value + * before the SPU starts fetching code and the user space + * pointer gets updated with the new value when returning + * from kernel space. + * + * The status value returned from spu_run reflects the + * value of the spu_status register after the SPU has stopped. + * + */ +static long do_spu_run(struct file *filp, + __u32 __user *unpc, + __u32 __user *ustatus) +{ + long ret; + struct spufs_inode_info *i; + u32 npc, status; + + ret = -EFAULT; + if (get_user(npc, unpc)) + goto out; + + /* check if this file was created by spu_create */ + ret = -EINVAL; + if (filp->f_op != &spufs_context_fops) + goto out; + + i = SPUFS_I(file_inode(filp)); + ret = spufs_run_spu(i->i_ctx, &npc, &status); + + if (put_user(npc, unpc)) + ret = -EFAULT; + + if (ustatus && put_user(status, ustatus)) + ret = -EFAULT; +out: + return ret; +} + +static long do_spu_create(const char __user *pathname, unsigned int flags, + umode_t mode, struct file *neighbor) +{ + struct path path; + struct dentry *dentry; + int ret; + + dentry = user_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY); + ret = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + ret = spufs_create(&path, dentry, flags, mode, neighbor); + done_path_create(&path, dentry); + } + + return ret; +} + +struct spufs_calls spufs_calls = { + .create_thread = do_spu_create, + .spu_run = do_spu_run, + .notify_spus_active = do_notify_spus_active, + .owner = THIS_MODULE, +#ifdef CONFIG_COREDUMP + .coredump_extra_notes_size = spufs_coredump_extra_notes_size, + .coredump_extra_notes_write = spufs_coredump_extra_notes_write, +#endif +}; |