summaryrefslogtreecommitdiffstats
path: root/kernel/arch/powerpc/platforms/cell
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/arch/powerpc/platforms/cell')
-rw-r--r--kernel/arch/powerpc/platforms/cell/Kconfig129
-rw-r--r--kernel/arch/powerpc/platforms/cell/Makefile31
-rw-r--r--kernel/arch/powerpc/platforms/cell/axon_msi.c492
-rw-r--r--kernel/arch/powerpc/platforms/cell/cbe_powerbutton.c118
-rw-r--r--kernel/arch/powerpc/platforms/cell/cbe_regs.c281
-rw-r--r--kernel/arch/powerpc/platforms/cell/cbe_thermal.c399
-rw-r--r--kernel/arch/powerpc/platforms/cell/cell.h24
-rw-r--r--kernel/arch/powerpc/platforms/cell/cpufreq_spudemand.c171
-rw-r--r--kernel/arch/powerpc/platforms/cell/interrupt.c411
-rw-r--r--kernel/arch/powerpc/platforms/cell/interrupt.h89
-rw-r--r--kernel/arch/powerpc/platforms/cell/iommu.c1237
-rw-r--r--kernel/arch/powerpc/platforms/cell/pervasive.c133
-rw-r--r--kernel/arch/powerpc/platforms/cell/pervasive.h42
-rw-r--r--kernel/arch/powerpc/platforms/cell/pmu.c424
-rw-r--r--kernel/arch/powerpc/platforms/cell/qpace_setup.c148
-rw-r--r--kernel/arch/powerpc/platforms/cell/ras.c356
-rw-r--r--kernel/arch/powerpc/platforms/cell/ras.h9
-rw-r--r--kernel/arch/powerpc/platforms/cell/setup.c286
-rw-r--r--kernel/arch/powerpc/platforms/cell/smp.c168
-rw-r--r--kernel/arch/powerpc/platforms/cell/spider-pci.c184
-rw-r--r--kernel/arch/powerpc/platforms/cell/spider-pic.c359
-rw-r--r--kernel/arch/powerpc/platforms/cell/spu_base.c811
-rw-r--r--kernel/arch/powerpc/platforms/cell/spu_callbacks.c74
-rw-r--r--kernel/arch/powerpc/platforms/cell/spu_manage.c555
-rw-r--r--kernel/arch/powerpc/platforms/cell/spu_notify.c68
-rw-r--r--kernel/arch/powerpc/platforms/cell/spu_priv1_mmio.c180
-rw-r--r--kernel/arch/powerpc/platforms/cell/spu_priv1_mmio.h26
-rw-r--r--kernel/arch/powerpc/platforms/cell/spu_syscalls.c178
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/.gitignore2
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/Makefile62
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/backing_ops.c413
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/context.c186
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/coredump.c211
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/fault.c191
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/file.c2771
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/gang.c87
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/hw_ops.c349
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/inode.c811
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c183
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/run.c454
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/sched.c1172
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/spu_restore.c336
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S116
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped935
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/spu_save.c195
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S102
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped743
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/spu_utils.h160
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/spufs.h376
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/sputrace.h39
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/switch.c2222
-rw-r--r--kernel/arch/powerpc/platforms/cell/spufs/syscalls.c88
52 files changed, 19587 insertions, 0 deletions
diff --git a/kernel/arch/powerpc/platforms/cell/Kconfig b/kernel/arch/powerpc/platforms/cell/Kconfig
new file mode 100644
index 000000000..2f23133ab
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/Kconfig
@@ -0,0 +1,129 @@
+config PPC_CELL
+ bool
+ default n
+
+config PPC_CELL_COMMON
+ bool
+ select PPC_CELL
+ select PPC_DCR_MMIO
+ select PPC_INDIRECT_PIO
+ select PPC_INDIRECT_MMIO
+ select PPC_NATIVE
+ select PPC_RTAS
+ select IRQ_EDGE_EOI_HANDLER
+
+config PPC_CELL_NATIVE
+ bool
+ select PPC_CELL_COMMON
+ select MPIC
+ select PPC_IO_WORKAROUNDS
+ select IBM_EMAC_EMAC4
+ select IBM_EMAC_RGMII
+ select IBM_EMAC_ZMII #test only
+ select IBM_EMAC_TAH #test only
+ default n
+
+config PPC_IBM_CELL_BLADE
+ bool "IBM Cell Blade"
+ depends on PPC64 && PPC_BOOK3S
+ select PPC_CELL_NATIVE
+ select PPC_OF_PLATFORM_PCI
+ select PCI
+ select MMIO_NVRAM
+ select PPC_UDBG_16550
+ select UDBG_RTAS_CONSOLE
+
+config PPC_CELL_QPACE
+ bool "IBM Cell - QPACE"
+ depends on PPC64 && PPC_BOOK3S
+ select PPC_CELL_COMMON
+
+config AXON_MSI
+ bool
+ depends on PPC_IBM_CELL_BLADE && PCI_MSI
+ default y
+
+menu "Cell Broadband Engine options"
+ depends on PPC_CELL
+
+config SPU_FS
+ tristate "SPU file system"
+ default m
+ depends on PPC_CELL
+ select SPU_BASE
+ select MEMORY_HOTPLUG
+ help
+ The SPU file system is used to access Synergistic Processing
+ Units on machines implementing the Broadband Processor
+ Architecture.
+
+config SPU_FS_64K_LS
+ bool "Use 64K pages to map SPE local store"
+ # we depend on PPC_MM_SLICES for now rather than selecting
+ # it because we depend on hugetlbfs hooks being present. We
+ # will fix that when the generic code has been improved to
+ # not require hijacking hugetlbfs hooks.
+ depends on SPU_FS && PPC_MM_SLICES && !PPC_64K_PAGES
+ default y
+ select PPC_HAS_HASH_64K
+ help
+ This option causes SPE local stores to be mapped in process
+ address spaces using 64K pages while the rest of the kernel
+ uses 4K pages. This can improve performances of applications
+ using multiple SPEs by lowering the TLB pressure on them.
+
+config SPU_BASE
+ bool
+ default n
+ select PPC_COPRO_BASE
+
+config CBE_RAS
+ bool "RAS features for bare metal Cell BE"
+ depends on PPC_CELL_NATIVE
+ default y
+
+config PPC_IBM_CELL_RESETBUTTON
+ bool "IBM Cell Blade Pinhole reset button"
+ depends on CBE_RAS && PPC_IBM_CELL_BLADE
+ default y
+ help
+ Support Pinhole Resetbutton on IBM Cell blades.
+ This adds a method to trigger system reset via front panel pinhole button.
+
+config PPC_IBM_CELL_POWERBUTTON
+ tristate "IBM Cell Blade power button"
+ depends on PPC_IBM_CELL_BLADE && INPUT_EVDEV
+ default y
+ help
+ Support Powerbutton on IBM Cell blades.
+ This will enable the powerbutton as an input device.
+
+config CBE_THERM
+ tristate "CBE thermal support"
+ default m
+ depends on CBE_RAS && SPU_BASE
+
+config PPC_PMI
+ tristate
+ default y
+ depends on CPU_FREQ_CBE_PMI || PPC_IBM_CELL_POWERBUTTON
+ help
+ PMI (Platform Management Interrupt) is a way to
+ communicate with the BMC (Baseboard Management Controller).
+ It is used in some IBM Cell blades.
+
+config CBE_CPUFREQ_SPU_GOVERNOR
+ tristate "CBE frequency scaling based on SPU usage"
+ depends on SPU_FS && CPU_FREQ
+ default m
+ help
+ This governor checks for spu usage to adjust the cpu frequency.
+ If no spu is running on a given cpu, that cpu will be throttled to
+ the minimal possible frequency.
+
+endmenu
+
+config OPROFILE_CELL
+ def_bool y
+ depends on PPC_CELL_NATIVE && (OPROFILE = m || OPROFILE = y) && SPU_BASE
+
diff --git a/kernel/arch/powerpc/platforms/cell/Makefile b/kernel/arch/powerpc/platforms/cell/Makefile
new file mode 100644
index 000000000..34699bddf
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/Makefile
@@ -0,0 +1,31 @@
+obj-$(CONFIG_PPC_CELL_COMMON) += cbe_regs.o interrupt.o pervasive.o
+
+obj-$(CONFIG_PPC_CELL_NATIVE) += iommu.o setup.o spider-pic.o \
+ pmu.o spider-pci.o
+obj-$(CONFIG_CBE_RAS) += ras.o
+
+obj-$(CONFIG_CBE_THERM) += cbe_thermal.o
+obj-$(CONFIG_CBE_CPUFREQ_SPU_GOVERNOR) += cpufreq_spudemand.o
+
+obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON) += cbe_powerbutton.o
+
+ifeq ($(CONFIG_SMP),y)
+obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o
+obj-$(CONFIG_PPC_CELL_QPACE) += smp.o
+endif
+
+# needed only when building loadable spufs.ko
+spu-priv1-$(CONFIG_PPC_CELL_COMMON) += spu_priv1_mmio.o
+spu-manage-$(CONFIG_PPC_CELL_COMMON) += spu_manage.o
+
+obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \
+ spu_notify.o \
+ spu_syscalls.o \
+ $(spu-priv1-y) \
+ $(spu-manage-y) \
+ spufs/
+
+obj-$(CONFIG_AXON_MSI) += axon_msi.o
+
+# qpace setup
+obj-$(CONFIG_PPC_CELL_QPACE) += qpace_setup.o
diff --git a/kernel/arch/powerpc/platforms/cell/axon_msi.c b/kernel/arch/powerpc/platforms/cell/axon_msi.c
new file mode 100644
index 000000000..623bd9614
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/axon_msi.c
@@ -0,0 +1,492 @@
+/*
+ * Copyright 2007, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/export.h>
+#include <linux/of_platform.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+
+#include <asm/dcr.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+
+
+/*
+ * MSIC registers, specified as offsets from dcr_base
+ */
+#define MSIC_CTRL_REG 0x0
+
+/* Base Address registers specify FIFO location in BE memory */
+#define MSIC_BASE_ADDR_HI_REG 0x3
+#define MSIC_BASE_ADDR_LO_REG 0x4
+
+/* Hold the read/write offsets into the FIFO */
+#define MSIC_READ_OFFSET_REG 0x5
+#define MSIC_WRITE_OFFSET_REG 0x6
+
+
+/* MSIC control register flags */
+#define MSIC_CTRL_ENABLE 0x0001
+#define MSIC_CTRL_FIFO_FULL_ENABLE 0x0002
+#define MSIC_CTRL_IRQ_ENABLE 0x0008
+#define MSIC_CTRL_FULL_STOP_ENABLE 0x0010
+
+/*
+ * The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB.
+ * Currently we're using a 64KB FIFO size.
+ */
+#define MSIC_FIFO_SIZE_SHIFT 16
+#define MSIC_FIFO_SIZE_BYTES (1 << MSIC_FIFO_SIZE_SHIFT)
+
+/*
+ * To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits
+ * 8-9 of the MSIC control reg.
+ */
+#define MSIC_CTRL_FIFO_SIZE (((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300)
+
+/*
+ * We need to mask the read/write offsets to make sure they stay within
+ * the bounds of the FIFO. Also they should always be 16-byte aligned.
+ */
+#define MSIC_FIFO_SIZE_MASK ((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu)
+
+/* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */
+#define MSIC_FIFO_ENTRY_SIZE 0x10
+
+
+struct axon_msic {
+ struct irq_domain *irq_domain;
+ __le32 *fifo_virt;
+ dma_addr_t fifo_phys;
+ dcr_host_t dcr_host;
+ u32 read_offset;
+#ifdef DEBUG
+ u32 __iomem *trigger;
+#endif
+};
+
+#ifdef DEBUG
+void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic);
+#else
+static inline void axon_msi_debug_setup(struct device_node *dn,
+ struct axon_msic *msic) { }
+#endif
+
+
+static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val)
+{
+ pr_devel("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n);
+
+ dcr_write(msic->dcr_host, dcr_n, val);
+}
+
+static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct axon_msic *msic = irq_get_handler_data(irq);
+ u32 write_offset, msi;
+ int idx;
+ int retry = 0;
+
+ write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG);
+ pr_devel("axon_msi: original write_offset 0x%x\n", write_offset);
+
+ /* write_offset doesn't wrap properly, so we have to mask it */
+ write_offset &= MSIC_FIFO_SIZE_MASK;
+
+ while (msic->read_offset != write_offset && retry < 100) {
+ idx = msic->read_offset / sizeof(__le32);
+ msi = le32_to_cpu(msic->fifo_virt[idx]);
+ msi &= 0xFFFF;
+
+ pr_devel("axon_msi: woff %x roff %x msi %x\n",
+ write_offset, msic->read_offset, msi);
+
+ if (msi < nr_irqs && irq_get_chip_data(msi) == msic) {
+ generic_handle_irq(msi);
+ msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
+ } else {
+ /*
+ * Reading the MSIC_WRITE_OFFSET_REG does not
+ * reliably flush the outstanding DMA to the
+ * FIFO buffer. Here we were reading stale
+ * data, so we need to retry.
+ */
+ udelay(1);
+ retry++;
+ pr_devel("axon_msi: invalid irq 0x%x!\n", msi);
+ continue;
+ }
+
+ if (retry) {
+ pr_devel("axon_msi: late irq 0x%x, retry %d\n",
+ msi, retry);
+ retry = 0;
+ }
+
+ msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+ msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+ }
+
+ if (retry) {
+ printk(KERN_WARNING "axon_msi: irq timed out\n");
+
+ msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+ msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+ }
+
+ chip->irq_eoi(&desc->irq_data);
+}
+
+static struct axon_msic *find_msi_translator(struct pci_dev *dev)
+{
+ struct irq_domain *irq_domain;
+ struct device_node *dn, *tmp;
+ const phandle *ph;
+ struct axon_msic *msic = NULL;
+
+ dn = of_node_get(pci_device_to_OF_node(dev));
+ if (!dn) {
+ dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+ return NULL;
+ }
+
+ for (; dn; dn = of_get_next_parent(dn)) {
+ ph = of_get_property(dn, "msi-translator", NULL);
+ if (ph)
+ break;
+ }
+
+ if (!ph) {
+ dev_dbg(&dev->dev,
+ "axon_msi: no msi-translator property found\n");
+ goto out_error;
+ }
+
+ tmp = dn;
+ dn = of_find_node_by_phandle(*ph);
+ of_node_put(tmp);
+ if (!dn) {
+ dev_dbg(&dev->dev,
+ "axon_msi: msi-translator doesn't point to a node\n");
+ goto out_error;
+ }
+
+ irq_domain = irq_find_host(dn);
+ if (!irq_domain) {
+ dev_dbg(&dev->dev, "axon_msi: no irq_domain found for node %s\n",
+ dn->full_name);
+ goto out_error;
+ }
+
+ msic = irq_domain->host_data;
+
+out_error:
+ of_node_put(dn);
+
+ return msic;
+}
+
+static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
+{
+ struct device_node *dn;
+ struct msi_desc *entry;
+ int len;
+ const u32 *prop;
+
+ dn = of_node_get(pci_device_to_OF_node(dev));
+ if (!dn) {
+ dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+ return -ENODEV;
+ }
+
+ entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
+
+ for (; dn; dn = of_get_next_parent(dn)) {
+ if (entry->msi_attrib.is_64) {
+ prop = of_get_property(dn, "msi-address-64", &len);
+ if (prop)
+ break;
+ }
+
+ prop = of_get_property(dn, "msi-address-32", &len);
+ if (prop)
+ break;
+ }
+
+ if (!prop) {
+ dev_dbg(&dev->dev,
+ "axon_msi: no msi-address-(32|64) properties found\n");
+ return -ENOENT;
+ }
+
+ switch (len) {
+ case 8:
+ msg->address_hi = prop[0];
+ msg->address_lo = prop[1];
+ break;
+ case 4:
+ msg->address_hi = 0;
+ msg->address_lo = prop[0];
+ break;
+ default:
+ dev_dbg(&dev->dev,
+ "axon_msi: malformed msi-address-(32|64) property\n");
+ of_node_put(dn);
+ return -EINVAL;
+ }
+
+ of_node_put(dn);
+
+ return 0;
+}
+
+static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ unsigned int virq, rc;
+ struct msi_desc *entry;
+ struct msi_msg msg;
+ struct axon_msic *msic;
+
+ msic = find_msi_translator(dev);
+ if (!msic)
+ return -ENODEV;
+
+ rc = setup_msi_msg_address(dev, &msg);
+ if (rc)
+ return rc;
+
+ list_for_each_entry(entry, &dev->msi_list, list) {
+ virq = irq_create_direct_mapping(msic->irq_domain);
+ if (virq == NO_IRQ) {
+ dev_warn(&dev->dev,
+ "axon_msi: virq allocation failed!\n");
+ return -1;
+ }
+ dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq);
+
+ irq_set_msi_desc(virq, entry);
+ msg.data = virq;
+ pci_write_msi_msg(virq, &msg);
+ }
+
+ return 0;
+}
+
+static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
+{
+ struct msi_desc *entry;
+
+ dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n");
+
+ list_for_each_entry(entry, &dev->msi_list, list) {
+ if (entry->irq == NO_IRQ)
+ continue;
+
+ irq_set_msi_desc(entry->irq, NULL);
+ irq_dispose_mapping(entry->irq);
+ }
+}
+
+static struct irq_chip msic_irq_chip = {
+ .irq_mask = pci_msi_mask_irq,
+ .irq_unmask = pci_msi_unmask_irq,
+ .irq_shutdown = pci_msi_mask_irq,
+ .name = "AXON-MSI",
+};
+
+static int msic_host_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ irq_set_chip_data(virq, h->host_data);
+ irq_set_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq);
+
+ return 0;
+}
+
+static const struct irq_domain_ops msic_host_ops = {
+ .map = msic_host_map,
+};
+
+static void axon_msi_shutdown(struct platform_device *device)
+{
+ struct axon_msic *msic = dev_get_drvdata(&device->dev);
+ u32 tmp;
+
+ pr_devel("axon_msi: disabling %s\n",
+ msic->irq_domain->of_node->full_name);
+ tmp = dcr_read(msic->dcr_host, MSIC_CTRL_REG);
+ tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
+ msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
+}
+
+static int axon_msi_probe(struct platform_device *device)
+{
+ struct device_node *dn = device->dev.of_node;
+ struct axon_msic *msic;
+ unsigned int virq;
+ int dcr_base, dcr_len;
+
+ pr_devel("axon_msi: setting up dn %s\n", dn->full_name);
+
+ msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL);
+ if (!msic) {
+ printk(KERN_ERR "axon_msi: couldn't allocate msic for %s\n",
+ dn->full_name);
+ goto out;
+ }
+
+ dcr_base = dcr_resource_start(dn, 0);
+ dcr_len = dcr_resource_len(dn, 0);
+
+ if (dcr_base == 0 || dcr_len == 0) {
+ printk(KERN_ERR
+ "axon_msi: couldn't parse dcr properties on %s\n",
+ dn->full_name);
+ goto out_free_msic;
+ }
+
+ msic->dcr_host = dcr_map(dn, dcr_base, dcr_len);
+ if (!DCR_MAP_OK(msic->dcr_host)) {
+ printk(KERN_ERR "axon_msi: dcr_map failed for %s\n",
+ dn->full_name);
+ goto out_free_msic;
+ }
+
+ msic->fifo_virt = dma_alloc_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES,
+ &msic->fifo_phys, GFP_KERNEL);
+ if (!msic->fifo_virt) {
+ printk(KERN_ERR "axon_msi: couldn't allocate fifo for %s\n",
+ dn->full_name);
+ goto out_free_msic;
+ }
+
+ virq = irq_of_parse_and_map(dn, 0);
+ if (virq == NO_IRQ) {
+ printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n",
+ dn->full_name);
+ goto out_free_fifo;
+ }
+ memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES);
+
+ /* We rely on being able to stash a virq in a u16, so limit irqs to < 65536 */
+ msic->irq_domain = irq_domain_add_nomap(dn, 65536, &msic_host_ops, msic);
+ if (!msic->irq_domain) {
+ printk(KERN_ERR "axon_msi: couldn't allocate irq_domain for %s\n",
+ dn->full_name);
+ goto out_free_fifo;
+ }
+
+ irq_set_handler_data(virq, msic);
+ irq_set_chained_handler(virq, axon_msi_cascade);
+ pr_devel("axon_msi: irq 0x%x setup for axon_msi\n", virq);
+
+ /* Enable the MSIC hardware */
+ msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, msic->fifo_phys >> 32);
+ msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG,
+ msic->fifo_phys & 0xFFFFFFFF);
+ msic_dcr_write(msic, MSIC_CTRL_REG,
+ MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE |
+ MSIC_CTRL_FIFO_SIZE);
+
+ msic->read_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG)
+ & MSIC_FIFO_SIZE_MASK;
+
+ dev_set_drvdata(&device->dev, msic);
+
+ ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs;
+ ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs;
+
+ axon_msi_debug_setup(dn, msic);
+
+ printk(KERN_DEBUG "axon_msi: setup MSIC on %s\n", dn->full_name);
+
+ return 0;
+
+out_free_fifo:
+ dma_free_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES, msic->fifo_virt,
+ msic->fifo_phys);
+out_free_msic:
+ kfree(msic);
+out:
+
+ return -1;
+}
+
+static const struct of_device_id axon_msi_device_id[] = {
+ {
+ .compatible = "ibm,axon-msic"
+ },
+ {}
+};
+
+static struct platform_driver axon_msi_driver = {
+ .probe = axon_msi_probe,
+ .shutdown = axon_msi_shutdown,
+ .driver = {
+ .name = "axon-msi",
+ .of_match_table = axon_msi_device_id,
+ },
+};
+
+static int __init axon_msi_init(void)
+{
+ return platform_driver_register(&axon_msi_driver);
+}
+subsys_initcall(axon_msi_init);
+
+
+#ifdef DEBUG
+static int msic_set(void *data, u64 val)
+{
+ struct axon_msic *msic = data;
+ out_le32(msic->trigger, val);
+ return 0;
+}
+
+static int msic_get(void *data, u64 *val)
+{
+ *val = 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_msic, msic_get, msic_set, "%llu\n");
+
+void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic)
+{
+ char name[8];
+ u64 addr;
+
+ addr = of_translate_address(dn, of_get_property(dn, "reg", NULL));
+ if (addr == OF_BAD_ADDR) {
+ pr_devel("axon_msi: couldn't translate reg property\n");
+ return;
+ }
+
+ msic->trigger = ioremap(addr, 0x4);
+ if (!msic->trigger) {
+ pr_devel("axon_msi: ioremap failed\n");
+ return;
+ }
+
+ snprintf(name, sizeof(name), "msic_%d", of_node_to_nid(dn));
+
+ if (!debugfs_create_file(name, 0600, powerpc_debugfs_root,
+ msic, &fops_msic)) {
+ pr_devel("axon_msi: debugfs_create_file failed!\n");
+ return;
+ }
+}
+#endif /* DEBUG */
diff --git a/kernel/arch/powerpc/platforms/cell/cbe_powerbutton.c b/kernel/arch/powerpc/platforms/cell/cbe_powerbutton.c
new file mode 100644
index 000000000..2bb803130
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/cbe_powerbutton.c
@@ -0,0 +1,118 @@
+/*
+ * driver for powerbutton on IBM cell blades
+ *
+ * (C) Copyright IBM Corp. 2005-2008
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/input.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <asm/pmi.h>
+#include <asm/prom.h>
+
+static struct input_dev *button_dev;
+static struct platform_device *button_pdev;
+
+static void cbe_powerbutton_handle_pmi(pmi_message_t pmi_msg)
+{
+ BUG_ON(pmi_msg.type != PMI_TYPE_POWER_BUTTON);
+
+ input_report_key(button_dev, KEY_POWER, 1);
+ input_sync(button_dev);
+ input_report_key(button_dev, KEY_POWER, 0);
+ input_sync(button_dev);
+}
+
+static struct pmi_handler cbe_pmi_handler = {
+ .type = PMI_TYPE_POWER_BUTTON,
+ .handle_pmi_message = cbe_powerbutton_handle_pmi,
+};
+
+static int __init cbe_powerbutton_init(void)
+{
+ int ret = 0;
+ struct input_dev *dev;
+
+ if (!of_machine_is_compatible("IBM,CBPLUS-1.0")) {
+ printk(KERN_ERR "%s: Not a cell blade.\n", __func__);
+ ret = -ENODEV;
+ goto out;
+ }
+
+ dev = input_allocate_device();
+ if (!dev) {
+ ret = -ENOMEM;
+ printk(KERN_ERR "%s: Not enough memory.\n", __func__);
+ goto out;
+ }
+
+ set_bit(EV_KEY, dev->evbit);
+ set_bit(KEY_POWER, dev->keybit);
+
+ dev->name = "Power Button";
+ dev->id.bustype = BUS_HOST;
+
+ /* this makes the button look like an acpi power button
+ * no clue whether anyone relies on that though */
+ dev->id.product = 0x02;
+ dev->phys = "LNXPWRBN/button/input0";
+
+ button_pdev = platform_device_register_simple("power_button", 0, NULL, 0);
+ if (IS_ERR(button_pdev)) {
+ ret = PTR_ERR(button_pdev);
+ goto out_free_input;
+ }
+
+ dev->dev.parent = &button_pdev->dev;
+ ret = input_register_device(dev);
+ if (ret) {
+ printk(KERN_ERR "%s: Failed to register device\n", __func__);
+ goto out_free_pdev;
+ }
+
+ button_dev = dev;
+
+ ret = pmi_register_handler(&cbe_pmi_handler);
+ if (ret) {
+ printk(KERN_ERR "%s: Failed to register with pmi.\n", __func__);
+ goto out_free_pdev;
+ }
+
+ goto out;
+
+out_free_pdev:
+ platform_device_unregister(button_pdev);
+out_free_input:
+ input_free_device(dev);
+out:
+ return ret;
+}
+
+static void __exit cbe_powerbutton_exit(void)
+{
+ pmi_unregister_handler(&cbe_pmi_handler);
+ platform_device_unregister(button_pdev);
+ input_free_device(button_dev);
+}
+
+module_init(cbe_powerbutton_init);
+module_exit(cbe_powerbutton_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
diff --git a/kernel/arch/powerpc/platforms/cell/cbe_regs.c b/kernel/arch/powerpc/platforms/cell/cbe_regs.c
new file mode 100644
index 000000000..1428d583c
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/cbe_regs.c
@@ -0,0 +1,281 @@
+/*
+ * cbe_regs.c
+ *
+ * Accessor routines for the various MMIO register blocks of the CBE
+ *
+ * (c) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
+ */
+
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/ptrace.h>
+#include <asm/cell-regs.h>
+
+/*
+ * Current implementation uses "cpu" nodes. We build our own mapping
+ * array of cpu numbers to cpu nodes locally for now to allow interrupt
+ * time code to have a fast path rather than call of_get_cpu_node(). If
+ * we implement cpu hotplug, we'll have to install an appropriate norifier
+ * in order to release references to the cpu going away
+ */
+static struct cbe_regs_map
+{
+ struct device_node *cpu_node;
+ struct device_node *be_node;
+ struct cbe_pmd_regs __iomem *pmd_regs;
+ struct cbe_iic_regs __iomem *iic_regs;
+ struct cbe_mic_tm_regs __iomem *mic_tm_regs;
+ struct cbe_pmd_shadow_regs pmd_shadow_regs;
+} cbe_regs_maps[MAX_CBE];
+static int cbe_regs_map_count;
+
+static struct cbe_thread_map
+{
+ struct device_node *cpu_node;
+ struct device_node *be_node;
+ struct cbe_regs_map *regs;
+ unsigned int thread_id;
+ unsigned int cbe_id;
+} cbe_thread_map[NR_CPUS];
+
+static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = {CPU_BITS_NONE} };
+static cpumask_t cbe_first_online_cpu = { CPU_BITS_NONE };
+
+static struct cbe_regs_map *cbe_find_map(struct device_node *np)
+{
+ int i;
+ struct device_node *tmp_np;
+
+ if (strcasecmp(np->type, "spe")) {
+ for (i = 0; i < cbe_regs_map_count; i++)
+ if (cbe_regs_maps[i].cpu_node == np ||
+ cbe_regs_maps[i].be_node == np)
+ return &cbe_regs_maps[i];
+ return NULL;
+ }
+
+ if (np->data)
+ return np->data;
+
+ /* walk up path until cpu or be node was found */
+ tmp_np = np;
+ do {
+ tmp_np = tmp_np->parent;
+ /* on a correct devicetree we wont get up to root */
+ BUG_ON(!tmp_np);
+ } while (strcasecmp(tmp_np->type, "cpu") &&
+ strcasecmp(tmp_np->type, "be"));
+
+ np->data = cbe_find_map(tmp_np);
+
+ return np->data;
+}
+
+struct cbe_pmd_regs __iomem *cbe_get_pmd_regs(struct device_node *np)
+{
+ struct cbe_regs_map *map = cbe_find_map(np);
+ if (map == NULL)
+ return NULL;
+ return map->pmd_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_pmd_regs);
+
+struct cbe_pmd_regs __iomem *cbe_get_cpu_pmd_regs(int cpu)
+{
+ struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+ if (map == NULL)
+ return NULL;
+ return map->pmd_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_cpu_pmd_regs);
+
+struct cbe_pmd_shadow_regs *cbe_get_pmd_shadow_regs(struct device_node *np)
+{
+ struct cbe_regs_map *map = cbe_find_map(np);
+ if (map == NULL)
+ return NULL;
+ return &map->pmd_shadow_regs;
+}
+
+struct cbe_pmd_shadow_regs *cbe_get_cpu_pmd_shadow_regs(int cpu)
+{
+ struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+ if (map == NULL)
+ return NULL;
+ return &map->pmd_shadow_regs;
+}
+
+struct cbe_iic_regs __iomem *cbe_get_iic_regs(struct device_node *np)
+{
+ struct cbe_regs_map *map = cbe_find_map(np);
+ if (map == NULL)
+ return NULL;
+ return map->iic_regs;
+}
+
+struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu)
+{
+ struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+ if (map == NULL)
+ return NULL;
+ return map->iic_regs;
+}
+
+struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np)
+{
+ struct cbe_regs_map *map = cbe_find_map(np);
+ if (map == NULL)
+ return NULL;
+ return map->mic_tm_regs;
+}
+
+struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu)
+{
+ struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+ if (map == NULL)
+ return NULL;
+ return map->mic_tm_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs);
+
+u32 cbe_get_hw_thread_id(int cpu)
+{
+ return cbe_thread_map[cpu].thread_id;
+}
+EXPORT_SYMBOL_GPL(cbe_get_hw_thread_id);
+
+u32 cbe_cpu_to_node(int cpu)
+{
+ return cbe_thread_map[cpu].cbe_id;
+}
+EXPORT_SYMBOL_GPL(cbe_cpu_to_node);
+
+u32 cbe_node_to_cpu(int node)
+{
+ return cpumask_first(&cbe_local_mask[node]);
+
+}
+EXPORT_SYMBOL_GPL(cbe_node_to_cpu);
+
+static struct device_node *cbe_get_be_node(int cpu_id)
+{
+ struct device_node *np;
+
+ for_each_node_by_type (np, "be") {
+ int len,i;
+ const phandle *cpu_handle;
+
+ cpu_handle = of_get_property(np, "cpus", &len);
+
+ /*
+ * the CAB SLOF tree is non compliant, so we just assume
+ * there is only one node
+ */
+ if (WARN_ON_ONCE(!cpu_handle))
+ return np;
+
+ for (i=0; i<len; i++)
+ if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL))
+ return np;
+ }
+
+ return NULL;
+}
+
+void __init cbe_fill_regs_map(struct cbe_regs_map *map)
+{
+ if(map->be_node) {
+ struct device_node *be, *np;
+
+ be = map->be_node;
+
+ for_each_node_by_type(np, "pervasive")
+ if (of_get_parent(np) == be)
+ map->pmd_regs = of_iomap(np, 0);
+
+ for_each_node_by_type(np, "CBEA-Internal-Interrupt-Controller")
+ if (of_get_parent(np) == be)
+ map->iic_regs = of_iomap(np, 2);
+
+ for_each_node_by_type(np, "mic-tm")
+ if (of_get_parent(np) == be)
+ map->mic_tm_regs = of_iomap(np, 0);
+ } else {
+ struct device_node *cpu;
+ /* That hack must die die die ! */
+ const struct address_prop {
+ unsigned long address;
+ unsigned int len;
+ } __attribute__((packed)) *prop;
+
+ cpu = map->cpu_node;
+
+ prop = of_get_property(cpu, "pervasive", NULL);
+ if (prop != NULL)
+ map->pmd_regs = ioremap(prop->address, prop->len);
+
+ prop = of_get_property(cpu, "iic", NULL);
+ if (prop != NULL)
+ map->iic_regs = ioremap(prop->address, prop->len);
+
+ prop = of_get_property(cpu, "mic-tm", NULL);
+ if (prop != NULL)
+ map->mic_tm_regs = ioremap(prop->address, prop->len);
+ }
+}
+
+
+void __init cbe_regs_init(void)
+{
+ int i;
+ unsigned int thread_id;
+ struct device_node *cpu;
+
+ /* Build local fast map of CPUs */
+ for_each_possible_cpu(i) {
+ cbe_thread_map[i].cpu_node = of_get_cpu_node(i, &thread_id);
+ cbe_thread_map[i].be_node = cbe_get_be_node(i);
+ cbe_thread_map[i].thread_id = thread_id;
+ }
+
+ /* Find maps for each device tree CPU */
+ for_each_node_by_type(cpu, "cpu") {
+ struct cbe_regs_map *map;
+ unsigned int cbe_id;
+
+ cbe_id = cbe_regs_map_count++;
+ map = &cbe_regs_maps[cbe_id];
+
+ if (cbe_regs_map_count > MAX_CBE) {
+ printk(KERN_ERR "cbe_regs: More BE chips than supported"
+ "!\n");
+ cbe_regs_map_count--;
+ of_node_put(cpu);
+ return;
+ }
+ map->cpu_node = cpu;
+
+ for_each_possible_cpu(i) {
+ struct cbe_thread_map *thread = &cbe_thread_map[i];
+
+ if (thread->cpu_node == cpu) {
+ thread->regs = map;
+ thread->cbe_id = cbe_id;
+ map->be_node = thread->be_node;
+ cpumask_set_cpu(i, &cbe_local_mask[cbe_id]);
+ if(thread->thread_id == 0)
+ cpumask_set_cpu(i, &cbe_first_online_cpu);
+ }
+ }
+
+ cbe_fill_regs_map(map);
+ }
+}
+
diff --git a/kernel/arch/powerpc/platforms/cell/cbe_thermal.c b/kernel/arch/powerpc/platforms/cell/cbe_thermal.c
new file mode 100644
index 000000000..2c15ff094
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/cbe_thermal.c
@@ -0,0 +1,399 @@
+/*
+ * thermal support for the cell processor
+ *
+ * This module adds some sysfs attributes to cpu and spu nodes.
+ * Base for measurements are the digital thermal sensors (DTS)
+ * located on the chip.
+ * The accuracy is 2 degrees, starting from 65 up to 125 degrees celsius
+ * The attributes can be found under
+ * /sys/devices/system/cpu/cpuX/thermal
+ * /sys/devices/system/spu/spuX/thermal
+ *
+ * The following attributes are added for each node:
+ * temperature:
+ * contains the current temperature measured by the DTS
+ * throttle_begin:
+ * throttling begins when temperature is greater or equal to
+ * throttle_begin. Setting this value to 125 prevents throttling.
+ * throttle_end:
+ * throttling is being ceased, if the temperature is lower than
+ * throttle_end. Due to a delay between applying throttling and
+ * a reduced temperature this value should be less than throttle_begin.
+ * A value equal to throttle_begin provides only a very little hysteresis.
+ * throttle_full_stop:
+ * If the temperatrue is greater or equal to throttle_full_stop,
+ * full throttling is applied to the cpu or spu. This value should be
+ * greater than throttle_begin and throttle_end. Setting this value to
+ * 65 prevents the unit from running code at all.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <asm/spu.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/cell-regs.h>
+
+#include "spu_priv1_mmio.h"
+
+#define TEMP_MIN 65
+#define TEMP_MAX 125
+
+#define DEVICE_PREFIX_ATTR(_prefix,_name,_mode) \
+struct device_attribute attr_ ## _prefix ## _ ## _name = { \
+ .attr = { .name = __stringify(_name), .mode = _mode }, \
+ .show = _prefix ## _show_ ## _name, \
+ .store = _prefix ## _store_ ## _name, \
+};
+
+static inline u8 reg_to_temp(u8 reg_value)
+{
+ return ((reg_value & 0x3f) << 1) + TEMP_MIN;
+}
+
+static inline u8 temp_to_reg(u8 temp)
+{
+ return ((temp - TEMP_MIN) >> 1) & 0x3f;
+}
+
+static struct cbe_pmd_regs __iomem *get_pmd_regs(struct device *dev)
+{
+ struct spu *spu;
+
+ spu = container_of(dev, struct spu, dev);
+
+ return cbe_get_pmd_regs(spu_devnode(spu));
+}
+
+/* returns the value for a given spu in a given register */
+static u8 spu_read_register_value(struct device *dev, union spe_reg __iomem *reg)
+{
+ union spe_reg value;
+ struct spu *spu;
+
+ spu = container_of(dev, struct spu, dev);
+ value.val = in_be64(&reg->val);
+
+ return value.spe[spu->spe_id];
+}
+
+static ssize_t spu_show_temp(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ u8 value;
+ struct cbe_pmd_regs __iomem *pmd_regs;
+
+ pmd_regs = get_pmd_regs(dev);
+
+ value = spu_read_register_value(dev, &pmd_regs->ts_ctsr1);
+
+ return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t show_throttle(struct cbe_pmd_regs __iomem *pmd_regs, char *buf, int pos)
+{
+ u64 value;
+
+ value = in_be64(&pmd_regs->tm_tpr.val);
+ /* access the corresponding byte */
+ value >>= pos;
+ value &= 0x3F;
+
+ return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char *buf, size_t size, int pos)
+{
+ u64 reg_value;
+ unsigned int temp;
+ u64 new_value;
+ int ret;
+
+ ret = sscanf(buf, "%u", &temp);
+
+ if (ret != 1 || temp < TEMP_MIN || temp > TEMP_MAX)
+ return -EINVAL;
+
+ new_value = temp_to_reg(temp);
+
+ reg_value = in_be64(&pmd_regs->tm_tpr.val);
+
+ /* zero out bits for new value */
+ reg_value &= ~(0xffull << pos);
+ /* set bits to new value */
+ reg_value |= new_value << pos;
+
+ out_be64(&pmd_regs->tm_tpr.val, reg_value);
+ return size;
+}
+
+static ssize_t spu_show_throttle_end(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(get_pmd_regs(dev), buf, 0);
+}
+
+static ssize_t spu_show_throttle_begin(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(get_pmd_regs(dev), buf, 8);
+}
+
+static ssize_t spu_show_throttle_full_stop(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(get_pmd_regs(dev), buf, 16);
+}
+
+static ssize_t spu_store_throttle_end(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(get_pmd_regs(dev), buf, size, 0);
+}
+
+static ssize_t spu_store_throttle_begin(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(get_pmd_regs(dev), buf, size, 8);
+}
+
+static ssize_t spu_store_throttle_full_stop(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(get_pmd_regs(dev), buf, size, 16);
+}
+
+static ssize_t ppe_show_temp(struct device *dev, char *buf, int pos)
+{
+ struct cbe_pmd_regs __iomem *pmd_regs;
+ u64 value;
+
+ pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
+ value = in_be64(&pmd_regs->ts_ctsr2);
+
+ value = (value >> pos) & 0x3f;
+
+ return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+
+/* shows the temperature of the DTS on the PPE,
+ * located near the linear thermal sensor */
+static ssize_t ppe_show_temp0(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return ppe_show_temp(dev, buf, 32);
+}
+
+/* shows the temperature of the second DTS on the PPE */
+static ssize_t ppe_show_temp1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return ppe_show_temp(dev, buf, 0);
+}
+
+static ssize_t ppe_show_throttle_end(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 32);
+}
+
+static ssize_t ppe_show_throttle_begin(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 40);
+}
+
+static ssize_t ppe_show_throttle_full_stop(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 48);
+}
+
+static ssize_t ppe_store_throttle_end(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 32);
+}
+
+static ssize_t ppe_store_throttle_begin(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 40);
+}
+
+static ssize_t ppe_store_throttle_full_stop(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 48);
+}
+
+
+static struct device_attribute attr_spu_temperature = {
+ .attr = {.name = "temperature", .mode = 0400 },
+ .show = spu_show_temp,
+};
+
+static DEVICE_PREFIX_ATTR(spu, throttle_end, 0600);
+static DEVICE_PREFIX_ATTR(spu, throttle_begin, 0600);
+static DEVICE_PREFIX_ATTR(spu, throttle_full_stop, 0600);
+
+
+static struct attribute *spu_attributes[] = {
+ &attr_spu_temperature.attr,
+ &attr_spu_throttle_end.attr,
+ &attr_spu_throttle_begin.attr,
+ &attr_spu_throttle_full_stop.attr,
+ NULL,
+};
+
+static struct attribute_group spu_attribute_group = {
+ .name = "thermal",
+ .attrs = spu_attributes,
+};
+
+static struct device_attribute attr_ppe_temperature0 = {
+ .attr = {.name = "temperature0", .mode = 0400 },
+ .show = ppe_show_temp0,
+};
+
+static struct device_attribute attr_ppe_temperature1 = {
+ .attr = {.name = "temperature1", .mode = 0400 },
+ .show = ppe_show_temp1,
+};
+
+static DEVICE_PREFIX_ATTR(ppe, throttle_end, 0600);
+static DEVICE_PREFIX_ATTR(ppe, throttle_begin, 0600);
+static DEVICE_PREFIX_ATTR(ppe, throttle_full_stop, 0600);
+
+static struct attribute *ppe_attributes[] = {
+ &attr_ppe_temperature0.attr,
+ &attr_ppe_temperature1.attr,
+ &attr_ppe_throttle_end.attr,
+ &attr_ppe_throttle_begin.attr,
+ &attr_ppe_throttle_full_stop.attr,
+ NULL,
+};
+
+static struct attribute_group ppe_attribute_group = {
+ .name = "thermal",
+ .attrs = ppe_attributes,
+};
+
+/*
+ * initialize throttling with default values
+ */
+static int __init init_default_values(void)
+{
+ int cpu;
+ struct cbe_pmd_regs __iomem *pmd_regs;
+ struct device *dev;
+ union ppe_spe_reg tpr;
+ union spe_reg str1;
+ u64 str2;
+ union spe_reg cr1;
+ u64 cr2;
+
+ /* TPR defaults */
+ /* ppe
+ * 1F - no full stop
+ * 08 - dynamic throttling starts if over 80 degrees
+ * 03 - dynamic throttling ceases if below 70 degrees */
+ tpr.ppe = 0x1F0803;
+ /* spe
+ * 10 - full stopped when over 96 degrees
+ * 08 - dynamic throttling starts if over 80 degrees
+ * 03 - dynamic throttling ceases if below 70 degrees
+ */
+ tpr.spe = 0x100803;
+
+ /* STR defaults */
+ /* str1
+ * 10 - stop 16 of 32 cycles
+ */
+ str1.val = 0x1010101010101010ull;
+ /* str2
+ * 10 - stop 16 of 32 cycles
+ */
+ str2 = 0x10;
+
+ /* CR defaults */
+ /* cr1
+ * 4 - normal operation
+ */
+ cr1.val = 0x0404040404040404ull;
+ /* cr2
+ * 4 - normal operation
+ */
+ cr2 = 0x04;
+
+ for_each_possible_cpu (cpu) {
+ pr_debug("processing cpu %d\n", cpu);
+ dev = get_cpu_device(cpu);
+
+ if (!dev) {
+ pr_info("invalid dev pointer for cbe_thermal\n");
+ return -EINVAL;
+ }
+
+ pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
+
+ if (!pmd_regs) {
+ pr_info("invalid CBE regs pointer for cbe_thermal\n");
+ return -EINVAL;
+ }
+
+ out_be64(&pmd_regs->tm_str2, str2);
+ out_be64(&pmd_regs->tm_str1.val, str1.val);
+ out_be64(&pmd_regs->tm_tpr.val, tpr.val);
+ out_be64(&pmd_regs->tm_cr1.val, cr1.val);
+ out_be64(&pmd_regs->tm_cr2, cr2);
+ }
+
+ return 0;
+}
+
+
+static int __init thermal_init(void)
+{
+ int rc = init_default_values();
+
+ if (rc == 0) {
+ spu_add_dev_attr_group(&spu_attribute_group);
+ cpu_add_dev_attr_group(&ppe_attribute_group);
+ }
+
+ return rc;
+}
+module_init(thermal_init);
+
+static void __exit thermal_exit(void)
+{
+ spu_remove_dev_attr_group(&spu_attribute_group);
+ cpu_remove_dev_attr_group(&ppe_attribute_group);
+}
+module_exit(thermal_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+
diff --git a/kernel/arch/powerpc/platforms/cell/cell.h b/kernel/arch/powerpc/platforms/cell/cell.h
new file mode 100644
index 000000000..ef143dfee
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/cell.h
@@ -0,0 +1,24 @@
+/*
+ * Cell Platform common data structures
+ *
+ * Copyright 2015, Daniel Axtens, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef CELL_H
+#define CELL_H
+
+#include <asm/pci-bridge.h>
+
+extern struct pci_controller_ops cell_pci_controller_ops;
+
+#endif
diff --git a/kernel/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/kernel/arch/powerpc/platforms/cell/cpufreq_spudemand.c
new file mode 100644
index 000000000..82607d621
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -0,0 +1,171 @@
+/*
+ * spu aware cpufreq governor for the cell processor
+ *
+ * © Copyright IBM Corporation 2006-2008
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/atomic.h>
+#include <asm/machdep.h>
+#include <asm/spu.h>
+
+#define POLL_TIME 100000 /* in µs */
+#define EXP 753 /* exp(-1) in fixed-point */
+
+struct spu_gov_info_struct {
+ unsigned long busy_spus; /* fixed-point */
+ struct cpufreq_policy *policy;
+ struct delayed_work work;
+ unsigned int poll_int; /* µs */
+};
+static DEFINE_PER_CPU(struct spu_gov_info_struct, spu_gov_info);
+
+static int calc_freq(struct spu_gov_info_struct *info)
+{
+ int cpu;
+ int busy_spus;
+
+ cpu = info->policy->cpu;
+ busy_spus = atomic_read(&cbe_spu_info[cpu_to_node(cpu)].busy_spus);
+
+ CALC_LOAD(info->busy_spus, EXP, busy_spus * FIXED_1);
+ pr_debug("cpu %d: busy_spus=%d, info->busy_spus=%ld\n",
+ cpu, busy_spus, info->busy_spus);
+
+ return info->policy->max * info->busy_spus / FIXED_1;
+}
+
+static void spu_gov_work(struct work_struct *work)
+{
+ struct spu_gov_info_struct *info;
+ int delay;
+ unsigned long target_freq;
+
+ info = container_of(work, struct spu_gov_info_struct, work.work);
+
+ /* after cancel_delayed_work_sync we unset info->policy */
+ BUG_ON(info->policy == NULL);
+
+ target_freq = calc_freq(info);
+ __cpufreq_driver_target(info->policy, target_freq, CPUFREQ_RELATION_H);
+
+ delay = usecs_to_jiffies(info->poll_int);
+ schedule_delayed_work_on(info->policy->cpu, &info->work, delay);
+}
+
+static void spu_gov_init_work(struct spu_gov_info_struct *info)
+{
+ int delay = usecs_to_jiffies(info->poll_int);
+ INIT_DEFERRABLE_WORK(&info->work, spu_gov_work);
+ schedule_delayed_work_on(info->policy->cpu, &info->work, delay);
+}
+
+static void spu_gov_cancel_work(struct spu_gov_info_struct *info)
+{
+ cancel_delayed_work_sync(&info->work);
+}
+
+static int spu_gov_govern(struct cpufreq_policy *policy, unsigned int event)
+{
+ unsigned int cpu = policy->cpu;
+ struct spu_gov_info_struct *info, *affected_info;
+ int i;
+ int ret = 0;
+
+ info = &per_cpu(spu_gov_info, cpu);
+
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ if (!cpu_online(cpu)) {
+ printk(KERN_ERR "cpu %d is not online\n", cpu);
+ ret = -EINVAL;
+ break;
+ }
+
+ if (!policy->cur) {
+ printk(KERN_ERR "no cpu specified in policy\n");
+ ret = -EINVAL;
+ break;
+ }
+
+ /* initialize spu_gov_info for all affected cpus */
+ for_each_cpu(i, policy->cpus) {
+ affected_info = &per_cpu(spu_gov_info, i);
+ affected_info->policy = policy;
+ }
+
+ info->poll_int = POLL_TIME;
+
+ /* setup timer */
+ spu_gov_init_work(info);
+
+ break;
+
+ case CPUFREQ_GOV_STOP:
+ /* cancel timer */
+ spu_gov_cancel_work(info);
+
+ /* clean spu_gov_info for all affected cpus */
+ for_each_cpu (i, policy->cpus) {
+ info = &per_cpu(spu_gov_info, i);
+ info->policy = NULL;
+ }
+
+ break;
+ }
+
+ return ret;
+}
+
+static struct cpufreq_governor spu_governor = {
+ .name = "spudemand",
+ .governor = spu_gov_govern,
+ .owner = THIS_MODULE,
+};
+
+/*
+ * module init and destoy
+ */
+
+static int __init spu_gov_init(void)
+{
+ int ret;
+
+ ret = cpufreq_register_governor(&spu_governor);
+ if (ret)
+ printk(KERN_ERR "registration of governor failed\n");
+ return ret;
+}
+
+static void __exit spu_gov_exit(void)
+{
+ cpufreq_unregister_governor(&spu_governor);
+}
+
+
+module_init(spu_gov_init);
+module_exit(spu_gov_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+
diff --git a/kernel/arch/powerpc/platforms/cell/interrupt.c b/kernel/arch/powerpc/platforms/cell/interrupt.c
new file mode 100644
index 000000000..3af8324c1
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/interrupt.c
@@ -0,0 +1,411 @@
+/*
+ * Cell Internal Interrupt Controller
+ *
+ * Copyright (C) 2006 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ * IBM, Corp.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * TODO:
+ * - Fix various assumptions related to HW CPU numbers vs. linux CPU numbers
+ * vs node numbers in the setup code
+ * - Implement proper handling of maxcpus=1/2 (that is, routing of irqs from
+ * a non-active node to the active node)
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/export.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/kernel_stat.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/ptrace.h>
+#include <asm/machdep.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+struct iic {
+ struct cbe_iic_thread_regs __iomem *regs;
+ u8 target_id;
+ u8 eoi_stack[16];
+ int eoi_ptr;
+ struct device_node *node;
+};
+
+static DEFINE_PER_CPU(struct iic, cpu_iic);
+#define IIC_NODE_COUNT 2
+static struct irq_domain *iic_host;
+
+/* Convert between "pending" bits and hw irq number */
+static irq_hw_number_t iic_pending_to_hwnum(struct cbe_iic_pending_bits bits)
+{
+ unsigned char unit = bits.source & 0xf;
+ unsigned char node = bits.source >> 4;
+ unsigned char class = bits.class & 3;
+
+ /* Decode IPIs */
+ if (bits.flags & CBE_IIC_IRQ_IPI)
+ return IIC_IRQ_TYPE_IPI | (bits.prio >> 4);
+ else
+ return (node << IIC_IRQ_NODE_SHIFT) | (class << 4) | unit;
+}
+
+static void iic_mask(struct irq_data *d)
+{
+}
+
+static void iic_unmask(struct irq_data *d)
+{
+}
+
+static void iic_eoi(struct irq_data *d)
+{
+ struct iic *iic = this_cpu_ptr(&cpu_iic);
+ out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]);
+ BUG_ON(iic->eoi_ptr < 0);
+}
+
+static struct irq_chip iic_chip = {
+ .name = "CELL-IIC",
+ .irq_mask = iic_mask,
+ .irq_unmask = iic_unmask,
+ .irq_eoi = iic_eoi,
+};
+
+
+static void iic_ioexc_eoi(struct irq_data *d)
+{
+}
+
+static void iic_ioexc_cascade(unsigned int irq, struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct cbe_iic_regs __iomem *node_iic =
+ (void __iomem *)irq_desc_get_handler_data(desc);
+ unsigned int base = (irq & 0xffffff00) | IIC_IRQ_TYPE_IOEXC;
+ unsigned long bits, ack;
+ int cascade;
+
+ for (;;) {
+ bits = in_be64(&node_iic->iic_is);
+ if (bits == 0)
+ break;
+ /* pre-ack edge interrupts */
+ ack = bits & IIC_ISR_EDGE_MASK;
+ if (ack)
+ out_be64(&node_iic->iic_is, ack);
+ /* handle them */
+ for (cascade = 63; cascade >= 0; cascade--)
+ if (bits & (0x8000000000000000UL >> cascade)) {
+ unsigned int cirq =
+ irq_linear_revmap(iic_host,
+ base | cascade);
+ if (cirq != NO_IRQ)
+ generic_handle_irq(cirq);
+ }
+ /* post-ack level interrupts */
+ ack = bits & ~IIC_ISR_EDGE_MASK;
+ if (ack)
+ out_be64(&node_iic->iic_is, ack);
+ }
+ chip->irq_eoi(&desc->irq_data);
+}
+
+
+static struct irq_chip iic_ioexc_chip = {
+ .name = "CELL-IOEX",
+ .irq_mask = iic_mask,
+ .irq_unmask = iic_unmask,
+ .irq_eoi = iic_ioexc_eoi,
+};
+
+/* Get an IRQ number from the pending state register of the IIC */
+static unsigned int iic_get_irq(void)
+{
+ struct cbe_iic_pending_bits pending;
+ struct iic *iic;
+ unsigned int virq;
+
+ iic = this_cpu_ptr(&cpu_iic);
+ *(unsigned long *) &pending =
+ in_be64((u64 __iomem *) &iic->regs->pending_destr);
+ if (!(pending.flags & CBE_IIC_IRQ_VALID))
+ return NO_IRQ;
+ virq = irq_linear_revmap(iic_host, iic_pending_to_hwnum(pending));
+ if (virq == NO_IRQ)
+ return NO_IRQ;
+ iic->eoi_stack[++iic->eoi_ptr] = pending.prio;
+ BUG_ON(iic->eoi_ptr > 15);
+ return virq;
+}
+
+void iic_setup_cpu(void)
+{
+ out_be64(&this_cpu_ptr(&cpu_iic)->regs->prio, 0xff);
+}
+
+u8 iic_get_target_id(int cpu)
+{
+ return per_cpu(cpu_iic, cpu).target_id;
+}
+
+EXPORT_SYMBOL_GPL(iic_get_target_id);
+
+#ifdef CONFIG_SMP
+
+/* Use the highest interrupt priorities for IPI */
+static inline int iic_msg_to_irq(int msg)
+{
+ return IIC_IRQ_TYPE_IPI + 0xf - msg;
+}
+
+void iic_message_pass(int cpu, int msg)
+{
+ out_be64(&per_cpu(cpu_iic, cpu).regs->generate, (0xf - msg) << 4);
+}
+
+struct irq_domain *iic_get_irq_host(int node)
+{
+ return iic_host;
+}
+EXPORT_SYMBOL_GPL(iic_get_irq_host);
+
+static void iic_request_ipi(int msg)
+{
+ int virq;
+
+ virq = irq_create_mapping(iic_host, iic_msg_to_irq(msg));
+ if (virq == NO_IRQ) {
+ printk(KERN_ERR
+ "iic: failed to map IPI %s\n", smp_ipi_name[msg]);
+ return;
+ }
+
+ /*
+ * If smp_request_message_ipi encounters an error it will notify
+ * the error. If a message is not needed it will return non-zero.
+ */
+ if (smp_request_message_ipi(virq, msg))
+ irq_dispose_mapping(virq);
+}
+
+void iic_request_IPIs(void)
+{
+ iic_request_ipi(PPC_MSG_CALL_FUNCTION);
+ iic_request_ipi(PPC_MSG_RESCHEDULE);
+ iic_request_ipi(PPC_MSG_TICK_BROADCAST);
+ iic_request_ipi(PPC_MSG_DEBUGGER_BREAK);
+}
+
+#endif /* CONFIG_SMP */
+
+
+static int iic_host_match(struct irq_domain *h, struct device_node *node)
+{
+ return of_device_is_compatible(node,
+ "IBM,CBEA-Internal-Interrupt-Controller");
+}
+
+static int iic_host_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ switch (hw & IIC_IRQ_TYPE_MASK) {
+ case IIC_IRQ_TYPE_IPI:
+ irq_set_chip_and_handler(virq, &iic_chip, handle_percpu_irq);
+ break;
+ case IIC_IRQ_TYPE_IOEXC:
+ irq_set_chip_and_handler(virq, &iic_ioexc_chip,
+ handle_edge_eoi_irq);
+ break;
+ default:
+ irq_set_chip_and_handler(virq, &iic_chip, handle_edge_eoi_irq);
+ }
+ return 0;
+}
+
+static int iic_host_xlate(struct irq_domain *h, struct device_node *ct,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+ unsigned int node, ext, unit, class;
+ const u32 *val;
+
+ if (!of_device_is_compatible(ct,
+ "IBM,CBEA-Internal-Interrupt-Controller"))
+ return -ENODEV;
+ if (intsize != 1)
+ return -ENODEV;
+ val = of_get_property(ct, "#interrupt-cells", NULL);
+ if (val == NULL || *val != 1)
+ return -ENODEV;
+
+ node = intspec[0] >> 24;
+ ext = (intspec[0] >> 16) & 0xff;
+ class = (intspec[0] >> 8) & 0xff;
+ unit = intspec[0] & 0xff;
+
+ /* Check if node is in supported range */
+ if (node > 1)
+ return -EINVAL;
+
+ /* Build up interrupt number, special case for IO exceptions */
+ *out_hwirq = (node << IIC_IRQ_NODE_SHIFT);
+ if (unit == IIC_UNIT_IIC && class == 1)
+ *out_hwirq |= IIC_IRQ_TYPE_IOEXC | ext;
+ else
+ *out_hwirq |= IIC_IRQ_TYPE_NORMAL |
+ (class << IIC_IRQ_CLASS_SHIFT) | unit;
+
+ /* Dummy flags, ignored by iic code */
+ *out_flags = IRQ_TYPE_EDGE_RISING;
+
+ return 0;
+}
+
+static const struct irq_domain_ops iic_host_ops = {
+ .match = iic_host_match,
+ .map = iic_host_map,
+ .xlate = iic_host_xlate,
+};
+
+static void __init init_one_iic(unsigned int hw_cpu, unsigned long addr,
+ struct device_node *node)
+{
+ /* XXX FIXME: should locate the linux CPU number from the HW cpu
+ * number properly. We are lucky for now
+ */
+ struct iic *iic = &per_cpu(cpu_iic, hw_cpu);
+
+ iic->regs = ioremap(addr, sizeof(struct cbe_iic_thread_regs));
+ BUG_ON(iic->regs == NULL);
+
+ iic->target_id = ((hw_cpu & 2) << 3) | ((hw_cpu & 1) ? 0xf : 0xe);
+ iic->eoi_stack[0] = 0xff;
+ iic->node = of_node_get(node);
+ out_be64(&iic->regs->prio, 0);
+
+ printk(KERN_INFO "IIC for CPU %d target id 0x%x : %s\n",
+ hw_cpu, iic->target_id, node->full_name);
+}
+
+static int __init setup_iic(void)
+{
+ struct device_node *dn;
+ struct resource r0, r1;
+ unsigned int node, cascade, found = 0;
+ struct cbe_iic_regs __iomem *node_iic;
+ const u32 *np;
+
+ for (dn = NULL;
+ (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) {
+ if (!of_device_is_compatible(dn,
+ "IBM,CBEA-Internal-Interrupt-Controller"))
+ continue;
+ np = of_get_property(dn, "ibm,interrupt-server-ranges", NULL);
+ if (np == NULL) {
+ printk(KERN_WARNING "IIC: CPU association not found\n");
+ of_node_put(dn);
+ return -ENODEV;
+ }
+ if (of_address_to_resource(dn, 0, &r0) ||
+ of_address_to_resource(dn, 1, &r1)) {
+ printk(KERN_WARNING "IIC: Can't resolve addresses\n");
+ of_node_put(dn);
+ return -ENODEV;
+ }
+ found++;
+ init_one_iic(np[0], r0.start, dn);
+ init_one_iic(np[1], r1.start, dn);
+
+ /* Setup cascade for IO exceptions. XXX cleanup tricks to get
+ * node vs CPU etc...
+ * Note that we configure the IIC_IRR here with a hard coded
+ * priority of 1. We might want to improve that later.
+ */
+ node = np[0] >> 1;
+ node_iic = cbe_get_cpu_iic_regs(np[0]);
+ cascade = node << IIC_IRQ_NODE_SHIFT;
+ cascade |= 1 << IIC_IRQ_CLASS_SHIFT;
+ cascade |= IIC_UNIT_IIC;
+ cascade = irq_create_mapping(iic_host, cascade);
+ if (cascade == NO_IRQ)
+ continue;
+ /*
+ * irq_data is a generic pointer that gets passed back
+ * to us later, so the forced cast is fine.
+ */
+ irq_set_handler_data(cascade, (void __force *)node_iic);
+ irq_set_chained_handler(cascade, iic_ioexc_cascade);
+ out_be64(&node_iic->iic_ir,
+ (1 << 12) /* priority */ |
+ (node << 4) /* dest node */ |
+ IIC_UNIT_THREAD_0 /* route them to thread 0 */);
+ /* Flush pending (make sure it triggers if there is
+ * anything pending
+ */
+ out_be64(&node_iic->iic_is, 0xfffffffffffffffful);
+ }
+
+ if (found)
+ return 0;
+ else
+ return -ENODEV;
+}
+
+void __init iic_init_IRQ(void)
+{
+ /* Setup an irq host data structure */
+ iic_host = irq_domain_add_linear(NULL, IIC_SOURCE_COUNT, &iic_host_ops,
+ NULL);
+ BUG_ON(iic_host == NULL);
+ irq_set_default_host(iic_host);
+
+ /* Discover and initialize iics */
+ if (setup_iic() < 0)
+ panic("IIC: Failed to initialize !\n");
+
+ /* Set master interrupt handling function */
+ ppc_md.get_irq = iic_get_irq;
+
+ /* Enable on current CPU */
+ iic_setup_cpu();
+}
+
+void iic_set_interrupt_routing(int cpu, int thread, int priority)
+{
+ struct cbe_iic_regs __iomem *iic_regs = cbe_get_cpu_iic_regs(cpu);
+ u64 iic_ir = 0;
+ int node = cpu >> 1;
+
+ /* Set which node and thread will handle the next interrupt */
+ iic_ir |= CBE_IIC_IR_PRIO(priority) |
+ CBE_IIC_IR_DEST_NODE(node);
+ if (thread == 0)
+ iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_0);
+ else
+ iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_1);
+ out_be64(&iic_regs->iic_ir, iic_ir);
+}
diff --git a/kernel/arch/powerpc/platforms/cell/interrupt.h b/kernel/arch/powerpc/platforms/cell/interrupt.h
new file mode 100644
index 000000000..4f60ae6ca
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/interrupt.h
@@ -0,0 +1,89 @@
+#ifndef ASM_CELL_PIC_H
+#define ASM_CELL_PIC_H
+#ifdef __KERNEL__
+/*
+ * Mapping of IIC pending bits into per-node interrupt numbers.
+ *
+ * Interrupt numbers are in the range 0...0x1ff where the top bit
+ * (0x100) represent the source node. Only 2 nodes are supported with
+ * the current code though it's trivial to extend that if necessary using
+ * higher level bits
+ *
+ * The bottom 8 bits are split into 2 type bits and 6 data bits that
+ * depend on the type:
+ *
+ * 00 (0x00 | data) : normal interrupt. data is (class << 4) | source
+ * 01 (0x40 | data) : IO exception. data is the exception number as
+ * defined by bit numbers in IIC_SR
+ * 10 (0x80 | data) : IPI. data is the IPI number (obtained from the priority)
+ * and node is always 0 (IPIs are per-cpu, their source is
+ * not relevant)
+ * 11 (0xc0 | data) : reserved
+ *
+ * In addition, interrupt number 0x80000000 is defined as always invalid
+ * (that is the node field is expected to never extend to move than 23 bits)
+ *
+ */
+
+enum {
+ IIC_IRQ_INVALID = 0x80000000u,
+ IIC_IRQ_NODE_MASK = 0x100,
+ IIC_IRQ_NODE_SHIFT = 8,
+ IIC_IRQ_MAX = 0x1ff,
+ IIC_IRQ_TYPE_MASK = 0xc0,
+ IIC_IRQ_TYPE_NORMAL = 0x00,
+ IIC_IRQ_TYPE_IOEXC = 0x40,
+ IIC_IRQ_TYPE_IPI = 0x80,
+ IIC_IRQ_CLASS_SHIFT = 4,
+ IIC_IRQ_CLASS_0 = 0x00,
+ IIC_IRQ_CLASS_1 = 0x10,
+ IIC_IRQ_CLASS_2 = 0x20,
+ IIC_SOURCE_COUNT = 0x200,
+
+ /* Here are defined the various source/dest units. Avoid using those
+ * definitions if you can, they are mostly here for reference
+ */
+ IIC_UNIT_SPU_0 = 0x4,
+ IIC_UNIT_SPU_1 = 0x7,
+ IIC_UNIT_SPU_2 = 0x3,
+ IIC_UNIT_SPU_3 = 0x8,
+ IIC_UNIT_SPU_4 = 0x2,
+ IIC_UNIT_SPU_5 = 0x9,
+ IIC_UNIT_SPU_6 = 0x1,
+ IIC_UNIT_SPU_7 = 0xa,
+ IIC_UNIT_IOC_0 = 0x0,
+ IIC_UNIT_IOC_1 = 0xb,
+ IIC_UNIT_THREAD_0 = 0xe, /* target only */
+ IIC_UNIT_THREAD_1 = 0xf, /* target only */
+ IIC_UNIT_IIC = 0xe, /* source only (IO exceptions) */
+
+ /* Base numbers for the external interrupts */
+ IIC_IRQ_EXT_IOIF0 =
+ IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_0,
+ IIC_IRQ_EXT_IOIF1 =
+ IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_1,
+
+ /* Base numbers for the IIC_ISR interrupts */
+ IIC_IRQ_IOEX_TMI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 63,
+ IIC_IRQ_IOEX_PMI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 62,
+ IIC_IRQ_IOEX_ATI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 61,
+ IIC_IRQ_IOEX_MATBFI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 60,
+ IIC_IRQ_IOEX_ELDI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 59,
+
+ /* Which bits in IIC_ISR are edge sensitive */
+ IIC_ISR_EDGE_MASK = 0x4ul,
+};
+
+extern void iic_init_IRQ(void);
+extern void iic_message_pass(int cpu, int msg);
+extern void iic_request_IPIs(void);
+extern void iic_setup_cpu(void);
+
+extern u8 iic_get_target_id(int cpu);
+
+extern void spider_init_IRQ(void);
+
+extern void iic_set_interrupt_routing(int cpu, int thread, int priority);
+
+#endif
+#endif /* ASM_CELL_PIC_H */
diff --git a/kernel/arch/powerpc/platforms/cell/iommu.c b/kernel/arch/powerpc/platforms/cell/iommu.c
new file mode 100644
index 000000000..21b502398
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/iommu.c
@@ -0,0 +1,1237 @@
+/*
+ * IOMMU implementation for Cell Broadband Processor Architecture
+ *
+ * (C) Copyright IBM Corporation 2006-2008
+ *
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/memblock.h>
+
+#include <asm/prom.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/udbg.h>
+#include <asm/firmware.h>
+#include <asm/cell-regs.h>
+
+#include "cell.h"
+#include "interrupt.h"
+
+/* Define CELL_IOMMU_REAL_UNMAP to actually unmap non-used pages
+ * instead of leaving them mapped to some dummy page. This can be
+ * enabled once the appropriate workarounds for spider bugs have
+ * been enabled
+ */
+#define CELL_IOMMU_REAL_UNMAP
+
+/* Define CELL_IOMMU_STRICT_PROTECTION to enforce protection of
+ * IO PTEs based on the transfer direction. That can be enabled
+ * once spider-net has been fixed to pass the correct direction
+ * to the DMA mapping functions
+ */
+#define CELL_IOMMU_STRICT_PROTECTION
+
+
+#define NR_IOMMUS 2
+
+/* IOC mmap registers */
+#define IOC_Reg_Size 0x2000
+
+#define IOC_IOPT_CacheInvd 0x908
+#define IOC_IOPT_CacheInvd_NE_Mask 0xffe0000000000000ul
+#define IOC_IOPT_CacheInvd_IOPTE_Mask 0x000003fffffffff8ul
+#define IOC_IOPT_CacheInvd_Busy 0x0000000000000001ul
+
+#define IOC_IOST_Origin 0x918
+#define IOC_IOST_Origin_E 0x8000000000000000ul
+#define IOC_IOST_Origin_HW 0x0000000000000800ul
+#define IOC_IOST_Origin_HL 0x0000000000000400ul
+
+#define IOC_IO_ExcpStat 0x920
+#define IOC_IO_ExcpStat_V 0x8000000000000000ul
+#define IOC_IO_ExcpStat_SPF_Mask 0x6000000000000000ul
+#define IOC_IO_ExcpStat_SPF_S 0x6000000000000000ul
+#define IOC_IO_ExcpStat_SPF_P 0x2000000000000000ul
+#define IOC_IO_ExcpStat_ADDR_Mask 0x00000007fffff000ul
+#define IOC_IO_ExcpStat_RW_Mask 0x0000000000000800ul
+#define IOC_IO_ExcpStat_IOID_Mask 0x00000000000007fful
+
+#define IOC_IO_ExcpMask 0x928
+#define IOC_IO_ExcpMask_SFE 0x4000000000000000ul
+#define IOC_IO_ExcpMask_PFE 0x2000000000000000ul
+
+#define IOC_IOCmd_Offset 0x1000
+
+#define IOC_IOCmd_Cfg 0xc00
+#define IOC_IOCmd_Cfg_TE 0x0000800000000000ul
+
+
+/* Segment table entries */
+#define IOSTE_V 0x8000000000000000ul /* valid */
+#define IOSTE_H 0x4000000000000000ul /* cache hint */
+#define IOSTE_PT_Base_RPN_Mask 0x3ffffffffffff000ul /* base RPN of IOPT */
+#define IOSTE_NPPT_Mask 0x0000000000000fe0ul /* no. pages in IOPT */
+#define IOSTE_PS_Mask 0x0000000000000007ul /* page size */
+#define IOSTE_PS_4K 0x0000000000000001ul /* - 4kB */
+#define IOSTE_PS_64K 0x0000000000000003ul /* - 64kB */
+#define IOSTE_PS_1M 0x0000000000000005ul /* - 1MB */
+#define IOSTE_PS_16M 0x0000000000000007ul /* - 16MB */
+
+
+/* IOMMU sizing */
+#define IO_SEGMENT_SHIFT 28
+#define IO_PAGENO_BITS(shift) (IO_SEGMENT_SHIFT - (shift))
+
+/* The high bit needs to be set on every DMA address */
+#define SPIDER_DMA_OFFSET 0x80000000ul
+
+struct iommu_window {
+ struct list_head list;
+ struct cbe_iommu *iommu;
+ unsigned long offset;
+ unsigned long size;
+ unsigned int ioid;
+ struct iommu_table table;
+};
+
+#define NAMESIZE 8
+struct cbe_iommu {
+ int nid;
+ char name[NAMESIZE];
+ void __iomem *xlate_regs;
+ void __iomem *cmd_regs;
+ unsigned long *stab;
+ unsigned long *ptab;
+ void *pad_page;
+ struct list_head windows;
+};
+
+/* Static array of iommus, one per node
+ * each contains a list of windows, keyed from dma_window property
+ * - on bus setup, look for a matching window, or create one
+ * - on dev setup, assign iommu_table ptr
+ */
+static struct cbe_iommu iommus[NR_IOMMUS];
+static int cbe_nr_iommus;
+
+static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
+ long n_ptes)
+{
+ u64 __iomem *reg;
+ u64 val;
+ long n;
+
+ reg = iommu->xlate_regs + IOC_IOPT_CacheInvd;
+
+ while (n_ptes > 0) {
+ /* we can invalidate up to 1 << 11 PTEs at once */
+ n = min(n_ptes, 1l << 11);
+ val = (((n /*- 1*/) << 53) & IOC_IOPT_CacheInvd_NE_Mask)
+ | (__pa(pte) & IOC_IOPT_CacheInvd_IOPTE_Mask)
+ | IOC_IOPT_CacheInvd_Busy;
+
+ out_be64(reg, val);
+ while (in_be64(reg) & IOC_IOPT_CacheInvd_Busy)
+ ;
+
+ n_ptes -= n;
+ pte += n;
+ }
+}
+
+static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
+ unsigned long uaddr, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ int i;
+ unsigned long *io_pte, base_pte;
+ struct iommu_window *window =
+ container_of(tbl, struct iommu_window, table);
+
+ /* implementing proper protection causes problems with the spidernet
+ * driver - check mapping directions later, but allow read & write by
+ * default for now.*/
+#ifdef CELL_IOMMU_STRICT_PROTECTION
+ /* to avoid referencing a global, we use a trick here to setup the
+ * protection bit. "prot" is setup to be 3 fields of 4 bits apprended
+ * together for each of the 3 supported direction values. It is then
+ * shifted left so that the fields matching the desired direction
+ * lands on the appropriate bits, and other bits are masked out.
+ */
+ const unsigned long prot = 0xc48;
+ base_pte =
+ ((prot << (52 + 4 * direction)) &
+ (CBE_IOPTE_PP_W | CBE_IOPTE_PP_R)) |
+ CBE_IOPTE_M | CBE_IOPTE_SO_RW |
+ (window->ioid & CBE_IOPTE_IOID_Mask);
+#else
+ base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M |
+ CBE_IOPTE_SO_RW | (window->ioid & CBE_IOPTE_IOID_Mask);
+#endif
+ if (unlikely(dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)))
+ base_pte &= ~CBE_IOPTE_SO_RW;
+
+ io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
+
+ for (i = 0; i < npages; i++, uaddr += (1 << tbl->it_page_shift))
+ io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask);
+
+ mb();
+
+ invalidate_tce_cache(window->iommu, io_pte, npages);
+
+ pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
+ index, npages, direction, base_pte);
+ return 0;
+}
+
+static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
+{
+
+ int i;
+ unsigned long *io_pte, pte;
+ struct iommu_window *window =
+ container_of(tbl, struct iommu_window, table);
+
+ pr_debug("tce_free_cell(index=%lx,n=%lx)\n", index, npages);
+
+#ifdef CELL_IOMMU_REAL_UNMAP
+ pte = 0;
+#else
+ /* spider bridge does PCI reads after freeing - insert a mapping
+ * to a scratch page instead of an invalid entry */
+ pte = CBE_IOPTE_PP_R | CBE_IOPTE_M | CBE_IOPTE_SO_RW |
+ __pa(window->iommu->pad_page) |
+ (window->ioid & CBE_IOPTE_IOID_Mask);
+#endif
+
+ io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
+
+ for (i = 0; i < npages; i++)
+ io_pte[i] = pte;
+
+ mb();
+
+ invalidate_tce_cache(window->iommu, io_pte, npages);
+}
+
+static irqreturn_t ioc_interrupt(int irq, void *data)
+{
+ unsigned long stat, spf;
+ struct cbe_iommu *iommu = data;
+
+ stat = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
+ spf = stat & IOC_IO_ExcpStat_SPF_Mask;
+
+ /* Might want to rate limit it */
+ printk(KERN_ERR "iommu: DMA exception 0x%016lx\n", stat);
+ printk(KERN_ERR " V=%d, SPF=[%c%c], RW=%s, IOID=0x%04x\n",
+ !!(stat & IOC_IO_ExcpStat_V),
+ (spf == IOC_IO_ExcpStat_SPF_S) ? 'S' : ' ',
+ (spf == IOC_IO_ExcpStat_SPF_P) ? 'P' : ' ',
+ (stat & IOC_IO_ExcpStat_RW_Mask) ? "Read" : "Write",
+ (unsigned int)(stat & IOC_IO_ExcpStat_IOID_Mask));
+ printk(KERN_ERR " page=0x%016lx\n",
+ stat & IOC_IO_ExcpStat_ADDR_Mask);
+
+ /* clear interrupt */
+ stat &= ~IOC_IO_ExcpStat_V;
+ out_be64(iommu->xlate_regs + IOC_IO_ExcpStat, stat);
+
+ return IRQ_HANDLED;
+}
+
+static int cell_iommu_find_ioc(int nid, unsigned long *base)
+{
+ struct device_node *np;
+ struct resource r;
+
+ *base = 0;
+
+ /* First look for new style /be nodes */
+ for_each_node_by_name(np, "ioc") {
+ if (of_node_to_nid(np) != nid)
+ continue;
+ if (of_address_to_resource(np, 0, &r)) {
+ printk(KERN_ERR "iommu: can't get address for %s\n",
+ np->full_name);
+ continue;
+ }
+ *base = r.start;
+ of_node_put(np);
+ return 0;
+ }
+
+ /* Ok, let's try the old way */
+ for_each_node_by_type(np, "cpu") {
+ const unsigned int *nidp;
+ const unsigned long *tmp;
+
+ nidp = of_get_property(np, "node-id", NULL);
+ if (nidp && *nidp == nid) {
+ tmp = of_get_property(np, "ioc-translation", NULL);
+ if (tmp) {
+ *base = *tmp;
+ of_node_put(np);
+ return 0;
+ }
+ }
+ }
+
+ return -ENODEV;
+}
+
+static void cell_iommu_setup_stab(struct cbe_iommu *iommu,
+ unsigned long dbase, unsigned long dsize,
+ unsigned long fbase, unsigned long fsize)
+{
+ struct page *page;
+ unsigned long segments, stab_size;
+
+ segments = max(dbase + dsize, fbase + fsize) >> IO_SEGMENT_SHIFT;
+
+ pr_debug("%s: iommu[%d]: segments: %lu\n",
+ __func__, iommu->nid, segments);
+
+ /* set up the segment table */
+ stab_size = segments * sizeof(unsigned long);
+ page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(stab_size));
+ BUG_ON(!page);
+ iommu->stab = page_address(page);
+ memset(iommu->stab, 0, stab_size);
+}
+
+static unsigned long *cell_iommu_alloc_ptab(struct cbe_iommu *iommu,
+ unsigned long base, unsigned long size, unsigned long gap_base,
+ unsigned long gap_size, unsigned long page_shift)
+{
+ struct page *page;
+ int i;
+ unsigned long reg, segments, pages_per_segment, ptab_size,
+ n_pte_pages, start_seg, *ptab;
+
+ start_seg = base >> IO_SEGMENT_SHIFT;
+ segments = size >> IO_SEGMENT_SHIFT;
+ pages_per_segment = 1ull << IO_PAGENO_BITS(page_shift);
+ /* PTEs for each segment must start on a 4K bounday */
+ pages_per_segment = max(pages_per_segment,
+ (1 << 12) / sizeof(unsigned long));
+
+ ptab_size = segments * pages_per_segment * sizeof(unsigned long);
+ pr_debug("%s: iommu[%d]: ptab_size: %lu, order: %d\n", __func__,
+ iommu->nid, ptab_size, get_order(ptab_size));
+ page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(ptab_size));
+ BUG_ON(!page);
+
+ ptab = page_address(page);
+ memset(ptab, 0, ptab_size);
+
+ /* number of 4K pages needed for a page table */
+ n_pte_pages = (pages_per_segment * sizeof(unsigned long)) >> 12;
+
+ pr_debug("%s: iommu[%d]: stab at %p, ptab at %p, n_pte_pages: %lu\n",
+ __func__, iommu->nid, iommu->stab, ptab,
+ n_pte_pages);
+
+ /* initialise the STEs */
+ reg = IOSTE_V | ((n_pte_pages - 1) << 5);
+
+ switch (page_shift) {
+ case 12: reg |= IOSTE_PS_4K; break;
+ case 16: reg |= IOSTE_PS_64K; break;
+ case 20: reg |= IOSTE_PS_1M; break;
+ case 24: reg |= IOSTE_PS_16M; break;
+ default: BUG();
+ }
+
+ gap_base = gap_base >> IO_SEGMENT_SHIFT;
+ gap_size = gap_size >> IO_SEGMENT_SHIFT;
+
+ pr_debug("Setting up IOMMU stab:\n");
+ for (i = start_seg; i < (start_seg + segments); i++) {
+ if (i >= gap_base && i < (gap_base + gap_size)) {
+ pr_debug("\toverlap at %d, skipping\n", i);
+ continue;
+ }
+ iommu->stab[i] = reg | (__pa(ptab) + (n_pte_pages << 12) *
+ (i - start_seg));
+ pr_debug("\t[%d] 0x%016lx\n", i, iommu->stab[i]);
+ }
+
+ return ptab;
+}
+
+static void cell_iommu_enable_hardware(struct cbe_iommu *iommu)
+{
+ int ret;
+ unsigned long reg, xlate_base;
+ unsigned int virq;
+
+ if (cell_iommu_find_ioc(iommu->nid, &xlate_base))
+ panic("%s: missing IOC register mappings for node %d\n",
+ __func__, iommu->nid);
+
+ iommu->xlate_regs = ioremap(xlate_base, IOC_Reg_Size);
+ iommu->cmd_regs = iommu->xlate_regs + IOC_IOCmd_Offset;
+
+ /* ensure that the STEs have updated */
+ mb();
+
+ /* setup interrupts for the iommu. */
+ reg = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
+ out_be64(iommu->xlate_regs + IOC_IO_ExcpStat,
+ reg & ~IOC_IO_ExcpStat_V);
+ out_be64(iommu->xlate_regs + IOC_IO_ExcpMask,
+ IOC_IO_ExcpMask_PFE | IOC_IO_ExcpMask_SFE);
+
+ virq = irq_create_mapping(NULL,
+ IIC_IRQ_IOEX_ATI | (iommu->nid << IIC_IRQ_NODE_SHIFT));
+ BUG_ON(virq == NO_IRQ);
+
+ ret = request_irq(virq, ioc_interrupt, 0, iommu->name, iommu);
+ BUG_ON(ret);
+
+ /* set the IOC segment table origin register (and turn on the iommu) */
+ reg = IOC_IOST_Origin_E | __pa(iommu->stab) | IOC_IOST_Origin_HW;
+ out_be64(iommu->xlate_regs + IOC_IOST_Origin, reg);
+ in_be64(iommu->xlate_regs + IOC_IOST_Origin);
+
+ /* turn on IO translation */
+ reg = in_be64(iommu->cmd_regs + IOC_IOCmd_Cfg) | IOC_IOCmd_Cfg_TE;
+ out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg);
+}
+
+static void cell_iommu_setup_hardware(struct cbe_iommu *iommu,
+ unsigned long base, unsigned long size)
+{
+ cell_iommu_setup_stab(iommu, base, size, 0, 0);
+ iommu->ptab = cell_iommu_alloc_ptab(iommu, base, size, 0, 0,
+ IOMMU_PAGE_SHIFT_4K);
+ cell_iommu_enable_hardware(iommu);
+}
+
+#if 0/* Unused for now */
+static struct iommu_window *find_window(struct cbe_iommu *iommu,
+ unsigned long offset, unsigned long size)
+{
+ struct iommu_window *window;
+
+ /* todo: check for overlapping (but not equal) windows) */
+
+ list_for_each_entry(window, &(iommu->windows), list) {
+ if (window->offset == offset && window->size == size)
+ return window;
+ }
+
+ return NULL;
+}
+#endif
+
+static inline u32 cell_iommu_get_ioid(struct device_node *np)
+{
+ const u32 *ioid;
+
+ ioid = of_get_property(np, "ioid", NULL);
+ if (ioid == NULL) {
+ printk(KERN_WARNING "iommu: missing ioid for %s using 0\n",
+ np->full_name);
+ return 0;
+ }
+
+ return *ioid;
+}
+
+static struct iommu_window * __init
+cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
+ unsigned long offset, unsigned long size,
+ unsigned long pte_offset)
+{
+ struct iommu_window *window;
+ struct page *page;
+ u32 ioid;
+
+ ioid = cell_iommu_get_ioid(np);
+
+ window = kzalloc_node(sizeof(*window), GFP_KERNEL, iommu->nid);
+ BUG_ON(window == NULL);
+
+ window->offset = offset;
+ window->size = size;
+ window->ioid = ioid;
+ window->iommu = iommu;
+
+ window->table.it_blocksize = 16;
+ window->table.it_base = (unsigned long)iommu->ptab;
+ window->table.it_index = iommu->nid;
+ window->table.it_page_shift = IOMMU_PAGE_SHIFT_4K;
+ window->table.it_offset =
+ (offset >> window->table.it_page_shift) + pte_offset;
+ window->table.it_size = size >> window->table.it_page_shift;
+
+ iommu_init_table(&window->table, iommu->nid);
+
+ pr_debug("\tioid %d\n", window->ioid);
+ pr_debug("\tblocksize %ld\n", window->table.it_blocksize);
+ pr_debug("\tbase 0x%016lx\n", window->table.it_base);
+ pr_debug("\toffset 0x%lx\n", window->table.it_offset);
+ pr_debug("\tsize %ld\n", window->table.it_size);
+
+ list_add(&window->list, &iommu->windows);
+
+ if (offset != 0)
+ return window;
+
+ /* We need to map and reserve the first IOMMU page since it's used
+ * by the spider workaround. In theory, we only need to do that when
+ * running on spider but it doesn't really matter.
+ *
+ * This code also assumes that we have a window that starts at 0,
+ * which is the case on all spider based blades.
+ */
+ page = alloc_pages_node(iommu->nid, GFP_KERNEL, 0);
+ BUG_ON(!page);
+ iommu->pad_page = page_address(page);
+ clear_page(iommu->pad_page);
+
+ __set_bit(0, window->table.it_map);
+ tce_build_cell(&window->table, window->table.it_offset, 1,
+ (unsigned long)iommu->pad_page, DMA_TO_DEVICE, NULL);
+
+ return window;
+}
+
+static struct cbe_iommu *cell_iommu_for_node(int nid)
+{
+ int i;
+
+ for (i = 0; i < cbe_nr_iommus; i++)
+ if (iommus[i].nid == nid)
+ return &iommus[i];
+ return NULL;
+}
+
+static unsigned long cell_dma_direct_offset;
+
+static unsigned long dma_iommu_fixed_base;
+
+/* iommu_fixed_is_weak is set if booted with iommu_fixed=weak */
+static int iommu_fixed_is_weak;
+
+static struct iommu_table *cell_get_iommu_table(struct device *dev)
+{
+ struct iommu_window *window;
+ struct cbe_iommu *iommu;
+
+ /* Current implementation uses the first window available in that
+ * node's iommu. We -might- do something smarter later though it may
+ * never be necessary
+ */
+ iommu = cell_iommu_for_node(dev_to_node(dev));
+ if (iommu == NULL || list_empty(&iommu->windows)) {
+ dev_err(dev, "iommu: missing iommu for %s (node %d)\n",
+ of_node_full_name(dev->of_node), dev_to_node(dev));
+ return NULL;
+ }
+ window = list_entry(iommu->windows.next, struct iommu_window, list);
+
+ return &window->table;
+}
+
+/* A coherent allocation implies strong ordering */
+
+static void *dma_fixed_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ struct dma_attrs *attrs)
+{
+ if (iommu_fixed_is_weak)
+ return iommu_alloc_coherent(dev, cell_get_iommu_table(dev),
+ size, dma_handle,
+ device_to_mask(dev), flag,
+ dev_to_node(dev));
+ else
+ return dma_direct_ops.alloc(dev, size, dma_handle, flag,
+ attrs);
+}
+
+static void dma_fixed_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+ if (iommu_fixed_is_weak)
+ iommu_free_coherent(cell_get_iommu_table(dev), size, vaddr,
+ dma_handle);
+ else
+ dma_direct_ops.free(dev, size, vaddr, dma_handle, attrs);
+}
+
+static dma_addr_t dma_fixed_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+ return dma_direct_ops.map_page(dev, page, offset, size,
+ direction, attrs);
+ else
+ return iommu_map_page(dev, cell_get_iommu_table(dev), page,
+ offset, size, device_to_mask(dev),
+ direction, attrs);
+}
+
+static void dma_fixed_unmap_page(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+ dma_direct_ops.unmap_page(dev, dma_addr, size, direction,
+ attrs);
+ else
+ iommu_unmap_page(cell_get_iommu_table(dev), dma_addr, size,
+ direction, attrs);
+}
+
+static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+ return dma_direct_ops.map_sg(dev, sg, nents, direction, attrs);
+ else
+ return ppc_iommu_map_sg(dev, cell_get_iommu_table(dev), sg,
+ nents, device_to_mask(dev),
+ direction, attrs);
+}
+
+static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+ dma_direct_ops.unmap_sg(dev, sg, nents, direction, attrs);
+ else
+ ppc_iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents,
+ direction, attrs);
+}
+
+static int dma_fixed_dma_supported(struct device *dev, u64 mask)
+{
+ return mask == DMA_BIT_MASK(64);
+}
+
+static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask);
+
+struct dma_map_ops dma_iommu_fixed_ops = {
+ .alloc = dma_fixed_alloc_coherent,
+ .free = dma_fixed_free_coherent,
+ .map_sg = dma_fixed_map_sg,
+ .unmap_sg = dma_fixed_unmap_sg,
+ .dma_supported = dma_fixed_dma_supported,
+ .set_dma_mask = dma_set_mask_and_switch,
+ .map_page = dma_fixed_map_page,
+ .unmap_page = dma_fixed_unmap_page,
+};
+
+static void cell_dma_dev_setup_fixed(struct device *dev);
+
+static void cell_dma_dev_setup(struct device *dev)
+{
+ /* Order is important here, these are not mutually exclusive */
+ if (get_dma_ops(dev) == &dma_iommu_fixed_ops)
+ cell_dma_dev_setup_fixed(dev);
+ else if (get_pci_dma_ops() == &dma_iommu_ops)
+ set_iommu_table_base(dev, cell_get_iommu_table(dev));
+ else if (get_pci_dma_ops() == &dma_direct_ops)
+ set_dma_offset(dev, cell_dma_direct_offset);
+ else
+ BUG();
+}
+
+static void cell_pci_dma_dev_setup(struct pci_dev *dev)
+{
+ cell_dma_dev_setup(&dev->dev);
+}
+
+static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action,
+ void *data)
+{
+ struct device *dev = data;
+
+ /* We are only intereted in device addition */
+ if (action != BUS_NOTIFY_ADD_DEVICE)
+ return 0;
+
+ /* We use the PCI DMA ops */
+ dev->archdata.dma_ops = get_pci_dma_ops();
+
+ cell_dma_dev_setup(dev);
+
+ return 0;
+}
+
+static struct notifier_block cell_of_bus_notifier = {
+ .notifier_call = cell_of_bus_notify
+};
+
+static int __init cell_iommu_get_window(struct device_node *np,
+ unsigned long *base,
+ unsigned long *size)
+{
+ const __be32 *dma_window;
+ unsigned long index;
+
+ /* Use ibm,dma-window if available, else, hard code ! */
+ dma_window = of_get_property(np, "ibm,dma-window", NULL);
+ if (dma_window == NULL) {
+ *base = 0;
+ *size = 0x80000000u;
+ return -ENODEV;
+ }
+
+ of_parse_dma_window(np, dma_window, &index, base, size);
+ return 0;
+}
+
+static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np)
+{
+ struct cbe_iommu *iommu;
+ int nid, i;
+
+ /* Get node ID */
+ nid = of_node_to_nid(np);
+ if (nid < 0) {
+ printk(KERN_ERR "iommu: failed to get node for %s\n",
+ np->full_name);
+ return NULL;
+ }
+ pr_debug("iommu: setting up iommu for node %d (%s)\n",
+ nid, np->full_name);
+
+ /* XXX todo: If we can have multiple windows on the same IOMMU, which
+ * isn't the case today, we probably want here to check whether the
+ * iommu for that node is already setup.
+ * However, there might be issue with getting the size right so let's
+ * ignore that for now. We might want to completely get rid of the
+ * multiple window support since the cell iommu supports per-page ioids
+ */
+
+ if (cbe_nr_iommus >= NR_IOMMUS) {
+ printk(KERN_ERR "iommu: too many IOMMUs detected ! (%s)\n",
+ np->full_name);
+ return NULL;
+ }
+
+ /* Init base fields */
+ i = cbe_nr_iommus++;
+ iommu = &iommus[i];
+ iommu->stab = NULL;
+ iommu->nid = nid;
+ snprintf(iommu->name, sizeof(iommu->name), "iommu%d", i);
+ INIT_LIST_HEAD(&iommu->windows);
+
+ return iommu;
+}
+
+static void __init cell_iommu_init_one(struct device_node *np,
+ unsigned long offset)
+{
+ struct cbe_iommu *iommu;
+ unsigned long base, size;
+
+ iommu = cell_iommu_alloc(np);
+ if (!iommu)
+ return;
+
+ /* Obtain a window for it */
+ cell_iommu_get_window(np, &base, &size);
+
+ pr_debug("\ttranslating window 0x%lx...0x%lx\n",
+ base, base + size - 1);
+
+ /* Initialize the hardware */
+ cell_iommu_setup_hardware(iommu, base, size);
+
+ /* Setup the iommu_table */
+ cell_iommu_setup_window(iommu, np, base, size,
+ offset >> IOMMU_PAGE_SHIFT_4K);
+}
+
+static void __init cell_disable_iommus(void)
+{
+ int node;
+ unsigned long base, val;
+ void __iomem *xregs, *cregs;
+
+ /* Make sure IOC translation is disabled on all nodes */
+ for_each_online_node(node) {
+ if (cell_iommu_find_ioc(node, &base))
+ continue;
+ xregs = ioremap(base, IOC_Reg_Size);
+ if (xregs == NULL)
+ continue;
+ cregs = xregs + IOC_IOCmd_Offset;
+
+ pr_debug("iommu: cleaning up iommu on node %d\n", node);
+
+ out_be64(xregs + IOC_IOST_Origin, 0);
+ (void)in_be64(xregs + IOC_IOST_Origin);
+ val = in_be64(cregs + IOC_IOCmd_Cfg);
+ val &= ~IOC_IOCmd_Cfg_TE;
+ out_be64(cregs + IOC_IOCmd_Cfg, val);
+ (void)in_be64(cregs + IOC_IOCmd_Cfg);
+
+ iounmap(xregs);
+ }
+}
+
+static int __init cell_iommu_init_disabled(void)
+{
+ struct device_node *np = NULL;
+ unsigned long base = 0, size;
+
+ /* When no iommu is present, we use direct DMA ops */
+ set_pci_dma_ops(&dma_direct_ops);
+
+ /* First make sure all IOC translation is turned off */
+ cell_disable_iommus();
+
+ /* If we have no Axon, we set up the spider DMA magic offset */
+ if (of_find_node_by_name(NULL, "axon") == NULL)
+ cell_dma_direct_offset = SPIDER_DMA_OFFSET;
+
+ /* Now we need to check to see where the memory is mapped
+ * in PCI space. We assume that all busses use the same dma
+ * window which is always the case so far on Cell, thus we
+ * pick up the first pci-internal node we can find and check
+ * the DMA window from there.
+ */
+ for_each_node_by_name(np, "axon") {
+ if (np->parent == NULL || np->parent->parent != NULL)
+ continue;
+ if (cell_iommu_get_window(np, &base, &size) == 0)
+ break;
+ }
+ if (np == NULL) {
+ for_each_node_by_name(np, "pci-internal") {
+ if (np->parent == NULL || np->parent->parent != NULL)
+ continue;
+ if (cell_iommu_get_window(np, &base, &size) == 0)
+ break;
+ }
+ }
+ of_node_put(np);
+
+ /* If we found a DMA window, we check if it's big enough to enclose
+ * all of physical memory. If not, we force enable IOMMU
+ */
+ if (np && size < memblock_end_of_DRAM()) {
+ printk(KERN_WARNING "iommu: force-enabled, dma window"
+ " (%ldMB) smaller than total memory (%lldMB)\n",
+ size >> 20, memblock_end_of_DRAM() >> 20);
+ return -ENODEV;
+ }
+
+ cell_dma_direct_offset += base;
+
+ if (cell_dma_direct_offset != 0)
+ cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
+
+ printk("iommu: disabled, direct DMA offset is 0x%lx\n",
+ cell_dma_direct_offset);
+
+ return 0;
+}
+
+/*
+ * Fixed IOMMU mapping support
+ *
+ * This code adds support for setting up a fixed IOMMU mapping on certain
+ * cell machines. For 64-bit devices this avoids the performance overhead of
+ * mapping and unmapping pages at runtime. 32-bit devices are unable to use
+ * the fixed mapping.
+ *
+ * The fixed mapping is established at boot, and maps all of physical memory
+ * 1:1 into device space at some offset. On machines with < 30 GB of memory
+ * we setup the fixed mapping immediately above the normal IOMMU window.
+ *
+ * For example a machine with 4GB of memory would end up with the normal
+ * IOMMU window from 0-2GB and the fixed mapping window from 2GB to 6GB. In
+ * this case a 64-bit device wishing to DMA to 1GB would be told to DMA to
+ * 3GB, plus any offset required by firmware. The firmware offset is encoded
+ * in the "dma-ranges" property.
+ *
+ * On machines with 30GB or more of memory, we are unable to place the fixed
+ * mapping above the normal IOMMU window as we would run out of address space.
+ * Instead we move the normal IOMMU window to coincide with the hash page
+ * table, this region does not need to be part of the fixed mapping as no
+ * device should ever be DMA'ing to it. We then setup the fixed mapping
+ * from 0 to 32GB.
+ */
+
+static u64 cell_iommu_get_fixed_address(struct device *dev)
+{
+ u64 cpu_addr, size, best_size, dev_addr = OF_BAD_ADDR;
+ struct device_node *np;
+ const u32 *ranges = NULL;
+ int i, len, best, naddr, nsize, pna, range_size;
+
+ np = of_node_get(dev->of_node);
+ while (1) {
+ naddr = of_n_addr_cells(np);
+ nsize = of_n_size_cells(np);
+ np = of_get_next_parent(np);
+ if (!np)
+ break;
+
+ ranges = of_get_property(np, "dma-ranges", &len);
+
+ /* Ignore empty ranges, they imply no translation required */
+ if (ranges && len > 0)
+ break;
+ }
+
+ if (!ranges) {
+ dev_dbg(dev, "iommu: no dma-ranges found\n");
+ goto out;
+ }
+
+ len /= sizeof(u32);
+
+ pna = of_n_addr_cells(np);
+ range_size = naddr + nsize + pna;
+
+ /* dma-ranges format:
+ * child addr : naddr cells
+ * parent addr : pna cells
+ * size : nsize cells
+ */
+ for (i = 0, best = -1, best_size = 0; i < len; i += range_size) {
+ cpu_addr = of_translate_dma_address(np, ranges + i + naddr);
+ size = of_read_number(ranges + i + naddr + pna, nsize);
+
+ if (cpu_addr == 0 && size > best_size) {
+ best = i;
+ best_size = size;
+ }
+ }
+
+ if (best >= 0) {
+ dev_addr = of_read_number(ranges + best, naddr);
+ } else
+ dev_dbg(dev, "iommu: no suitable range found!\n");
+
+out:
+ of_node_put(np);
+
+ return dev_addr;
+}
+
+static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask)
+{
+ if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+ return -EIO;
+
+ if (dma_mask == DMA_BIT_MASK(64) &&
+ cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR)
+ {
+ dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+ set_dma_ops(dev, &dma_iommu_fixed_ops);
+ } else {
+ dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
+ set_dma_ops(dev, get_pci_dma_ops());
+ }
+
+ cell_dma_dev_setup(dev);
+
+ *dev->dma_mask = dma_mask;
+
+ return 0;
+}
+
+static void cell_dma_dev_setup_fixed(struct device *dev)
+{
+ u64 addr;
+
+ addr = cell_iommu_get_fixed_address(dev) + dma_iommu_fixed_base;
+ set_dma_offset(dev, addr);
+
+ dev_dbg(dev, "iommu: fixed addr = %llx\n", addr);
+}
+
+static void insert_16M_pte(unsigned long addr, unsigned long *ptab,
+ unsigned long base_pte)
+{
+ unsigned long segment, offset;
+
+ segment = addr >> IO_SEGMENT_SHIFT;
+ offset = (addr >> 24) - (segment << IO_PAGENO_BITS(24));
+ ptab = ptab + (segment * (1 << 12) / sizeof(unsigned long));
+
+ pr_debug("iommu: addr %lx ptab %p segment %lx offset %lx\n",
+ addr, ptab, segment, offset);
+
+ ptab[offset] = base_pte | (__pa(addr) & CBE_IOPTE_RPN_Mask);
+}
+
+static void cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu,
+ struct device_node *np, unsigned long dbase, unsigned long dsize,
+ unsigned long fbase, unsigned long fsize)
+{
+ unsigned long base_pte, uaddr, ioaddr, *ptab;
+
+ ptab = cell_iommu_alloc_ptab(iommu, fbase, fsize, dbase, dsize, 24);
+
+ dma_iommu_fixed_base = fbase;
+
+ pr_debug("iommu: mapping 0x%lx pages from 0x%lx\n", fsize, fbase);
+
+ base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M |
+ (cell_iommu_get_ioid(np) & CBE_IOPTE_IOID_Mask);
+
+ if (iommu_fixed_is_weak)
+ pr_info("IOMMU: Using weak ordering for fixed mapping\n");
+ else {
+ pr_info("IOMMU: Using strong ordering for fixed mapping\n");
+ base_pte |= CBE_IOPTE_SO_RW;
+ }
+
+ for (uaddr = 0; uaddr < fsize; uaddr += (1 << 24)) {
+ /* Don't touch the dynamic region */
+ ioaddr = uaddr + fbase;
+ if (ioaddr >= dbase && ioaddr < (dbase + dsize)) {
+ pr_debug("iommu: fixed/dynamic overlap, skipping\n");
+ continue;
+ }
+
+ insert_16M_pte(uaddr, ptab, base_pte);
+ }
+
+ mb();
+}
+
+static int __init cell_iommu_fixed_mapping_init(void)
+{
+ unsigned long dbase, dsize, fbase, fsize, hbase, hend;
+ struct cbe_iommu *iommu;
+ struct device_node *np;
+
+ /* The fixed mapping is only supported on axon machines */
+ np = of_find_node_by_name(NULL, "axon");
+ of_node_put(np);
+
+ if (!np) {
+ pr_debug("iommu: fixed mapping disabled, no axons found\n");
+ return -1;
+ }
+
+ /* We must have dma-ranges properties for fixed mapping to work */
+ np = of_find_node_with_property(NULL, "dma-ranges");
+ of_node_put(np);
+
+ if (!np) {
+ pr_debug("iommu: no dma-ranges found, no fixed mapping\n");
+ return -1;
+ }
+
+ /* The default setup is to have the fixed mapping sit after the
+ * dynamic region, so find the top of the largest IOMMU window
+ * on any axon, then add the size of RAM and that's our max value.
+ * If that is > 32GB we have to do other shennanigans.
+ */
+ fbase = 0;
+ for_each_node_by_name(np, "axon") {
+ cell_iommu_get_window(np, &dbase, &dsize);
+ fbase = max(fbase, dbase + dsize);
+ }
+
+ fbase = _ALIGN_UP(fbase, 1 << IO_SEGMENT_SHIFT);
+ fsize = memblock_phys_mem_size();
+
+ if ((fbase + fsize) <= 0x800000000ul)
+ hbase = 0; /* use the device tree window */
+ else {
+ /* If we're over 32 GB we need to cheat. We can't map all of
+ * RAM with the fixed mapping, and also fit the dynamic
+ * region. So try to place the dynamic region where the hash
+ * table sits, drivers never need to DMA to it, we don't
+ * need a fixed mapping for that area.
+ */
+ if (!htab_address) {
+ pr_debug("iommu: htab is NULL, on LPAR? Huh?\n");
+ return -1;
+ }
+ hbase = __pa(htab_address);
+ hend = hbase + htab_size_bytes;
+
+ /* The window must start and end on a segment boundary */
+ if ((hbase != _ALIGN_UP(hbase, 1 << IO_SEGMENT_SHIFT)) ||
+ (hend != _ALIGN_UP(hend, 1 << IO_SEGMENT_SHIFT))) {
+ pr_debug("iommu: hash window not segment aligned\n");
+ return -1;
+ }
+
+ /* Check the hash window fits inside the real DMA window */
+ for_each_node_by_name(np, "axon") {
+ cell_iommu_get_window(np, &dbase, &dsize);
+
+ if (hbase < dbase || (hend > (dbase + dsize))) {
+ pr_debug("iommu: hash window doesn't fit in"
+ "real DMA window\n");
+ return -1;
+ }
+ }
+
+ fbase = 0;
+ }
+
+ /* Setup the dynamic regions */
+ for_each_node_by_name(np, "axon") {
+ iommu = cell_iommu_alloc(np);
+ BUG_ON(!iommu);
+
+ if (hbase == 0)
+ cell_iommu_get_window(np, &dbase, &dsize);
+ else {
+ dbase = hbase;
+ dsize = htab_size_bytes;
+ }
+
+ printk(KERN_DEBUG "iommu: node %d, dynamic window 0x%lx-0x%lx "
+ "fixed window 0x%lx-0x%lx\n", iommu->nid, dbase,
+ dbase + dsize, fbase, fbase + fsize);
+
+ cell_iommu_setup_stab(iommu, dbase, dsize, fbase, fsize);
+ iommu->ptab = cell_iommu_alloc_ptab(iommu, dbase, dsize, 0, 0,
+ IOMMU_PAGE_SHIFT_4K);
+ cell_iommu_setup_fixed_ptab(iommu, np, dbase, dsize,
+ fbase, fsize);
+ cell_iommu_enable_hardware(iommu);
+ cell_iommu_setup_window(iommu, np, dbase, dsize, 0);
+ }
+
+ dma_iommu_ops.set_dma_mask = dma_set_mask_and_switch;
+ set_pci_dma_ops(&dma_iommu_ops);
+
+ return 0;
+}
+
+static int iommu_fixed_disabled;
+
+static int __init setup_iommu_fixed(char *str)
+{
+ struct device_node *pciep;
+
+ if (strcmp(str, "off") == 0)
+ iommu_fixed_disabled = 1;
+
+ /* If we can find a pcie-endpoint in the device tree assume that
+ * we're on a triblade or a CAB so by default the fixed mapping
+ * should be set to be weakly ordered; but only if the boot
+ * option WASN'T set for strong ordering
+ */
+ pciep = of_find_node_by_type(NULL, "pcie-endpoint");
+
+ if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
+ iommu_fixed_is_weak = 1;
+
+ of_node_put(pciep);
+
+ return 1;
+}
+__setup("iommu_fixed=", setup_iommu_fixed);
+
+static u64 cell_dma_get_required_mask(struct device *dev)
+{
+ struct dma_map_ops *dma_ops;
+
+ if (!dev->dma_mask)
+ return 0;
+
+ if (!iommu_fixed_disabled &&
+ cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR)
+ return DMA_BIT_MASK(64);
+
+ dma_ops = get_dma_ops(dev);
+ if (dma_ops->get_required_mask)
+ return dma_ops->get_required_mask(dev);
+
+ WARN_ONCE(1, "no get_required_mask in %p ops", dma_ops);
+
+ return DMA_BIT_MASK(64);
+}
+
+static int __init cell_iommu_init(void)
+{
+ struct device_node *np;
+
+ /* If IOMMU is disabled or we have little enough RAM to not need
+ * to enable it, we setup a direct mapping.
+ *
+ * Note: should we make sure we have the IOMMU actually disabled ?
+ */
+ if (iommu_is_off ||
+ (!iommu_force_on && memblock_end_of_DRAM() <= 0x80000000ull))
+ if (cell_iommu_init_disabled() == 0)
+ goto bail;
+
+ /* Setup various callbacks */
+ cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
+ ppc_md.dma_get_required_mask = cell_dma_get_required_mask;
+ ppc_md.tce_build = tce_build_cell;
+ ppc_md.tce_free = tce_free_cell;
+
+ if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0)
+ goto bail;
+
+ /* Create an iommu for each /axon node. */
+ for_each_node_by_name(np, "axon") {
+ if (np->parent == NULL || np->parent->parent != NULL)
+ continue;
+ cell_iommu_init_one(np, 0);
+ }
+
+ /* Create an iommu for each toplevel /pci-internal node for
+ * old hardware/firmware
+ */
+ for_each_node_by_name(np, "pci-internal") {
+ if (np->parent == NULL || np->parent->parent != NULL)
+ continue;
+ cell_iommu_init_one(np, SPIDER_DMA_OFFSET);
+ }
+
+ /* Setup default PCI iommu ops */
+ set_pci_dma_ops(&dma_iommu_ops);
+
+ bail:
+ /* Register callbacks on OF platform device addition/removal
+ * to handle linking them to the right DMA operations
+ */
+ bus_register_notifier(&platform_bus_type, &cell_of_bus_notifier);
+
+ return 0;
+}
+machine_arch_initcall(cell, cell_iommu_init);
diff --git a/kernel/arch/powerpc/platforms/cell/pervasive.c b/kernel/arch/powerpc/platforms/cell/pervasive.c
new file mode 100644
index 000000000..d17e98bc0
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/pervasive.c
@@ -0,0 +1,133 @@
+/*
+ * CBE Pervasive Monitor and Debug
+ *
+ * (C) Copyright IBM Corporation 2005
+ *
+ * Authors: Maximino Aguilar (maguilar@us.ibm.com)
+ * Michael N. Day (mnday@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/kallsyms.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/pgtable.h>
+#include <asm/reg.h>
+#include <asm/cell-regs.h>
+
+#include "pervasive.h"
+
+static void cbe_power_save(void)
+{
+ unsigned long ctrl, thread_switch_control;
+
+ /* Ensure our interrupt state is properly tracked */
+ if (!prep_irq_for_idle())
+ return;
+
+ ctrl = mfspr(SPRN_CTRLF);
+
+ /* Enable DEC and EE interrupt request */
+ thread_switch_control = mfspr(SPRN_TSC_CELL);
+ thread_switch_control |= TSC_CELL_EE_ENABLE | TSC_CELL_EE_BOOST;
+
+ switch (ctrl & CTRL_CT) {
+ case CTRL_CT0:
+ thread_switch_control |= TSC_CELL_DEC_ENABLE_0;
+ break;
+ case CTRL_CT1:
+ thread_switch_control |= TSC_CELL_DEC_ENABLE_1;
+ break;
+ default:
+ printk(KERN_WARNING "%s: unknown configuration\n",
+ __func__);
+ break;
+ }
+ mtspr(SPRN_TSC_CELL, thread_switch_control);
+
+ /*
+ * go into low thread priority, medium priority will be
+ * restored for us after wake-up.
+ */
+ HMT_low();
+
+ /*
+ * atomically disable thread execution and runlatch.
+ * External and Decrementer exceptions are still handled when the
+ * thread is disabled but now enter in cbe_system_reset_exception()
+ */
+ ctrl &= ~(CTRL_RUNLATCH | CTRL_TE);
+ mtspr(SPRN_CTRLT, ctrl);
+
+ /* Re-enable interrupts in MSR */
+ __hard_irq_enable();
+}
+
+static int cbe_system_reset_exception(struct pt_regs *regs)
+{
+ switch (regs->msr & SRR1_WAKEMASK) {
+ case SRR1_WAKEEE:
+ do_IRQ(regs);
+ break;
+ case SRR1_WAKEDEC:
+ timer_interrupt(regs);
+ break;
+ case SRR1_WAKEMT:
+ return cbe_sysreset_hack();
+#ifdef CONFIG_CBE_RAS
+ case SRR1_WAKESYSERR:
+ cbe_system_error_exception(regs);
+ break;
+ case SRR1_WAKETHERM:
+ cbe_thermal_exception(regs);
+ break;
+#endif /* CONFIG_CBE_RAS */
+ default:
+ /* do system reset */
+ return 0;
+ }
+ /* everything handled */
+ return 1;
+}
+
+void __init cbe_pervasive_init(void)
+{
+ int cpu;
+
+ if (!cpu_has_feature(CPU_FTR_PAUSE_ZERO))
+ return;
+
+ for_each_possible_cpu(cpu) {
+ struct cbe_pmd_regs __iomem *regs = cbe_get_cpu_pmd_regs(cpu);
+ if (!regs)
+ continue;
+
+ /* Enable Pause(0) control bit */
+ out_be64(&regs->pmcr, in_be64(&regs->pmcr) |
+ CBE_PMD_PAUSE_ZERO_CONTROL);
+ }
+
+ ppc_md.power_save = cbe_power_save;
+ ppc_md.system_reset_exception = cbe_system_reset_exception;
+}
diff --git a/kernel/arch/powerpc/platforms/cell/pervasive.h b/kernel/arch/powerpc/platforms/cell/pervasive.h
new file mode 100644
index 000000000..fd4d7b709
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/pervasive.h
@@ -0,0 +1,42 @@
+/*
+ * Cell Pervasive Monitor and Debug interface and HW structures
+ *
+ * (C) Copyright IBM Corporation 2005
+ *
+ * Authors: Maximino Aguilar (maguilar@us.ibm.com)
+ * David J. Erb (djerb@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#ifndef PERVASIVE_H
+#define PERVASIVE_H
+
+extern void cbe_pervasive_init(void);
+extern void cbe_system_error_exception(struct pt_regs *regs);
+extern void cbe_maintenance_exception(struct pt_regs *regs);
+extern void cbe_thermal_exception(struct pt_regs *regs);
+
+#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON
+extern int cbe_sysreset_hack(void);
+#else
+static inline int cbe_sysreset_hack(void)
+{
+ return 1;
+}
+#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
+
+#endif
diff --git a/kernel/arch/powerpc/platforms/cell/pmu.c b/kernel/arch/powerpc/platforms/cell/pmu.c
new file mode 100644
index 000000000..348a27b12
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/pmu.c
@@ -0,0 +1,424 @@
+/*
+ * Cell Broadband Engine Performance Monitor
+ *
+ * (C) Copyright IBM Corporation 2001,2006
+ *
+ * Author:
+ * David Erb (djerb@us.ibm.com)
+ * Kevin Corry (kevcorry@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <linux/export.h>
+#include <asm/io.h>
+#include <asm/irq_regs.h>
+#include <asm/machdep.h>
+#include <asm/pmc.h>
+#include <asm/reg.h>
+#include <asm/spu.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+/*
+ * When writing to write-only mmio addresses, save a shadow copy. All of the
+ * registers are 32-bit, but stored in the upper-half of a 64-bit field in
+ * pmd_regs.
+ */
+
+#define WRITE_WO_MMIO(reg, x) \
+ do { \
+ u32 _x = (x); \
+ struct cbe_pmd_regs __iomem *pmd_regs; \
+ struct cbe_pmd_shadow_regs *shadow_regs; \
+ pmd_regs = cbe_get_cpu_pmd_regs(cpu); \
+ shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); \
+ out_be64(&(pmd_regs->reg), (((u64)_x) << 32)); \
+ shadow_regs->reg = _x; \
+ } while (0)
+
+#define READ_SHADOW_REG(val, reg) \
+ do { \
+ struct cbe_pmd_shadow_regs *shadow_regs; \
+ shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); \
+ (val) = shadow_regs->reg; \
+ } while (0)
+
+#define READ_MMIO_UPPER32(val, reg) \
+ do { \
+ struct cbe_pmd_regs __iomem *pmd_regs; \
+ pmd_regs = cbe_get_cpu_pmd_regs(cpu); \
+ (val) = (u32)(in_be64(&pmd_regs->reg) >> 32); \
+ } while (0)
+
+/*
+ * Physical counter registers.
+ * Each physical counter can act as one 32-bit counter or two 16-bit counters.
+ */
+
+u32 cbe_read_phys_ctr(u32 cpu, u32 phys_ctr)
+{
+ u32 val_in_latch, val = 0;
+
+ if (phys_ctr < NR_PHYS_CTRS) {
+ READ_SHADOW_REG(val_in_latch, counter_value_in_latch);
+
+ /* Read the latch or the actual counter, whichever is newer. */
+ if (val_in_latch & (1 << phys_ctr)) {
+ READ_SHADOW_REG(val, pm_ctr[phys_ctr]);
+ } else {
+ READ_MMIO_UPPER32(val, pm_ctr[phys_ctr]);
+ }
+ }
+
+ return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_phys_ctr);
+
+void cbe_write_phys_ctr(u32 cpu, u32 phys_ctr, u32 val)
+{
+ struct cbe_pmd_shadow_regs *shadow_regs;
+ u32 pm_ctrl;
+
+ if (phys_ctr < NR_PHYS_CTRS) {
+ /* Writing to a counter only writes to a hardware latch.
+ * The new value is not propagated to the actual counter
+ * until the performance monitor is enabled.
+ */
+ WRITE_WO_MMIO(pm_ctr[phys_ctr], val);
+
+ pm_ctrl = cbe_read_pm(cpu, pm_control);
+ if (pm_ctrl & CBE_PM_ENABLE_PERF_MON) {
+ /* The counters are already active, so we need to
+ * rewrite the pm_control register to "re-enable"
+ * the PMU.
+ */
+ cbe_write_pm(cpu, pm_control, pm_ctrl);
+ } else {
+ shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);
+ shadow_regs->counter_value_in_latch |= (1 << phys_ctr);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(cbe_write_phys_ctr);
+
+/*
+ * "Logical" counter registers.
+ * These will read/write 16-bits or 32-bits depending on the
+ * current size of the counter. Counters 4 - 7 are always 16-bit.
+ */
+
+u32 cbe_read_ctr(u32 cpu, u32 ctr)
+{
+ u32 val;
+ u32 phys_ctr = ctr & (NR_PHYS_CTRS - 1);
+
+ val = cbe_read_phys_ctr(cpu, phys_ctr);
+
+ if (cbe_get_ctr_size(cpu, phys_ctr) == 16)
+ val = (ctr < NR_PHYS_CTRS) ? (val >> 16) : (val & 0xffff);
+
+ return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_ctr);
+
+void cbe_write_ctr(u32 cpu, u32 ctr, u32 val)
+{
+ u32 phys_ctr;
+ u32 phys_val;
+
+ phys_ctr = ctr & (NR_PHYS_CTRS - 1);
+
+ if (cbe_get_ctr_size(cpu, phys_ctr) == 16) {
+ phys_val = cbe_read_phys_ctr(cpu, phys_ctr);
+
+ if (ctr < NR_PHYS_CTRS)
+ val = (val << 16) | (phys_val & 0xffff);
+ else
+ val = (val & 0xffff) | (phys_val & 0xffff0000);
+ }
+
+ cbe_write_phys_ctr(cpu, phys_ctr, val);
+}
+EXPORT_SYMBOL_GPL(cbe_write_ctr);
+
+/*
+ * Counter-control registers.
+ * Each "logical" counter has a corresponding control register.
+ */
+
+u32 cbe_read_pm07_control(u32 cpu, u32 ctr)
+{
+ u32 pm07_control = 0;
+
+ if (ctr < NR_CTRS)
+ READ_SHADOW_REG(pm07_control, pm07_control[ctr]);
+
+ return pm07_control;
+}
+EXPORT_SYMBOL_GPL(cbe_read_pm07_control);
+
+void cbe_write_pm07_control(u32 cpu, u32 ctr, u32 val)
+{
+ if (ctr < NR_CTRS)
+ WRITE_WO_MMIO(pm07_control[ctr], val);
+}
+EXPORT_SYMBOL_GPL(cbe_write_pm07_control);
+
+/*
+ * Other PMU control registers. Most of these are write-only.
+ */
+
+u32 cbe_read_pm(u32 cpu, enum pm_reg_name reg)
+{
+ u32 val = 0;
+
+ switch (reg) {
+ case group_control:
+ READ_SHADOW_REG(val, group_control);
+ break;
+
+ case debug_bus_control:
+ READ_SHADOW_REG(val, debug_bus_control);
+ break;
+
+ case trace_address:
+ READ_MMIO_UPPER32(val, trace_address);
+ break;
+
+ case ext_tr_timer:
+ READ_SHADOW_REG(val, ext_tr_timer);
+ break;
+
+ case pm_status:
+ READ_MMIO_UPPER32(val, pm_status);
+ break;
+
+ case pm_control:
+ READ_SHADOW_REG(val, pm_control);
+ break;
+
+ case pm_interval:
+ READ_MMIO_UPPER32(val, pm_interval);
+ break;
+
+ case pm_start_stop:
+ READ_SHADOW_REG(val, pm_start_stop);
+ break;
+ }
+
+ return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_pm);
+
+void cbe_write_pm(u32 cpu, enum pm_reg_name reg, u32 val)
+{
+ switch (reg) {
+ case group_control:
+ WRITE_WO_MMIO(group_control, val);
+ break;
+
+ case debug_bus_control:
+ WRITE_WO_MMIO(debug_bus_control, val);
+ break;
+
+ case trace_address:
+ WRITE_WO_MMIO(trace_address, val);
+ break;
+
+ case ext_tr_timer:
+ WRITE_WO_MMIO(ext_tr_timer, val);
+ break;
+
+ case pm_status:
+ WRITE_WO_MMIO(pm_status, val);
+ break;
+
+ case pm_control:
+ WRITE_WO_MMIO(pm_control, val);
+ break;
+
+ case pm_interval:
+ WRITE_WO_MMIO(pm_interval, val);
+ break;
+
+ case pm_start_stop:
+ WRITE_WO_MMIO(pm_start_stop, val);
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(cbe_write_pm);
+
+/*
+ * Get/set the size of a physical counter to either 16 or 32 bits.
+ */
+
+u32 cbe_get_ctr_size(u32 cpu, u32 phys_ctr)
+{
+ u32 pm_ctrl, size = 0;
+
+ if (phys_ctr < NR_PHYS_CTRS) {
+ pm_ctrl = cbe_read_pm(cpu, pm_control);
+ size = (pm_ctrl & CBE_PM_16BIT_CTR(phys_ctr)) ? 16 : 32;
+ }
+
+ return size;
+}
+EXPORT_SYMBOL_GPL(cbe_get_ctr_size);
+
+void cbe_set_ctr_size(u32 cpu, u32 phys_ctr, u32 ctr_size)
+{
+ u32 pm_ctrl;
+
+ if (phys_ctr < NR_PHYS_CTRS) {
+ pm_ctrl = cbe_read_pm(cpu, pm_control);
+ switch (ctr_size) {
+ case 16:
+ pm_ctrl |= CBE_PM_16BIT_CTR(phys_ctr);
+ break;
+
+ case 32:
+ pm_ctrl &= ~CBE_PM_16BIT_CTR(phys_ctr);
+ break;
+ }
+ cbe_write_pm(cpu, pm_control, pm_ctrl);
+ }
+}
+EXPORT_SYMBOL_GPL(cbe_set_ctr_size);
+
+/*
+ * Enable/disable the entire performance monitoring unit.
+ * When we enable the PMU, all pending writes to counters get committed.
+ */
+
+void cbe_enable_pm(u32 cpu)
+{
+ struct cbe_pmd_shadow_regs *shadow_regs;
+ u32 pm_ctrl;
+
+ shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);
+ shadow_regs->counter_value_in_latch = 0;
+
+ pm_ctrl = cbe_read_pm(cpu, pm_control) | CBE_PM_ENABLE_PERF_MON;
+ cbe_write_pm(cpu, pm_control, pm_ctrl);
+}
+EXPORT_SYMBOL_GPL(cbe_enable_pm);
+
+void cbe_disable_pm(u32 cpu)
+{
+ u32 pm_ctrl;
+ pm_ctrl = cbe_read_pm(cpu, pm_control) & ~CBE_PM_ENABLE_PERF_MON;
+ cbe_write_pm(cpu, pm_control, pm_ctrl);
+}
+EXPORT_SYMBOL_GPL(cbe_disable_pm);
+
+/*
+ * Reading from the trace_buffer.
+ * The trace buffer is two 64-bit registers. Reading from
+ * the second half automatically increments the trace_address.
+ */
+
+void cbe_read_trace_buffer(u32 cpu, u64 *buf)
+{
+ struct cbe_pmd_regs __iomem *pmd_regs = cbe_get_cpu_pmd_regs(cpu);
+
+ *buf++ = in_be64(&pmd_regs->trace_buffer_0_63);
+ *buf++ = in_be64(&pmd_regs->trace_buffer_64_127);
+}
+EXPORT_SYMBOL_GPL(cbe_read_trace_buffer);
+
+/*
+ * Enabling/disabling interrupts for the entire performance monitoring unit.
+ */
+
+u32 cbe_get_and_clear_pm_interrupts(u32 cpu)
+{
+ /* Reading pm_status clears the interrupt bits. */
+ return cbe_read_pm(cpu, pm_status);
+}
+EXPORT_SYMBOL_GPL(cbe_get_and_clear_pm_interrupts);
+
+void cbe_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask)
+{
+ /* Set which node and thread will handle the next interrupt. */
+ iic_set_interrupt_routing(cpu, thread, 0);
+
+ /* Enable the interrupt bits in the pm_status register. */
+ if (mask)
+ cbe_write_pm(cpu, pm_status, mask);
+}
+EXPORT_SYMBOL_GPL(cbe_enable_pm_interrupts);
+
+void cbe_disable_pm_interrupts(u32 cpu)
+{
+ cbe_get_and_clear_pm_interrupts(cpu);
+ cbe_write_pm(cpu, pm_status, 0);
+}
+EXPORT_SYMBOL_GPL(cbe_disable_pm_interrupts);
+
+static irqreturn_t cbe_pm_irq(int irq, void *dev_id)
+{
+ perf_irq(get_irq_regs());
+ return IRQ_HANDLED;
+}
+
+static int __init cbe_init_pm_irq(void)
+{
+ unsigned int irq;
+ int rc, node;
+
+ for_each_online_node(node) {
+ irq = irq_create_mapping(NULL, IIC_IRQ_IOEX_PMI |
+ (node << IIC_IRQ_NODE_SHIFT));
+ if (irq == NO_IRQ) {
+ printk("ERROR: Unable to allocate irq for node %d\n",
+ node);
+ return -EINVAL;
+ }
+
+ rc = request_irq(irq, cbe_pm_irq,
+ 0, "cbe-pmu-0", NULL);
+ if (rc) {
+ printk("ERROR: Request for irq on node %d failed\n",
+ node);
+ return rc;
+ }
+ }
+
+ return 0;
+}
+machine_arch_initcall(cell, cbe_init_pm_irq);
+
+void cbe_sync_irq(int node)
+{
+ unsigned int irq;
+
+ irq = irq_find_mapping(NULL,
+ IIC_IRQ_IOEX_PMI
+ | (node << IIC_IRQ_NODE_SHIFT));
+
+ if (irq == NO_IRQ) {
+ printk(KERN_WARNING "ERROR, unable to get existing irq %d " \
+ "for node %d\n", irq, node);
+ return;
+ }
+
+ synchronize_irq(irq);
+}
+EXPORT_SYMBOL_GPL(cbe_sync_irq);
+
diff --git a/kernel/arch/powerpc/platforms/cell/qpace_setup.c b/kernel/arch/powerpc/platforms/cell/qpace_setup.c
new file mode 100644
index 000000000..d328140dc
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/qpace_setup.c
@@ -0,0 +1,148 @@
+/*
+ * linux/arch/powerpc/platforms/cell/qpace_setup.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ * Adapted from 'alpha' version by Gary Thomas
+ * Modified by Cort Dougan (cort@cs.nmt.edu)
+ * Modified by PPC64 Team, IBM Corp
+ * Modified by Cell Team, IBM Deutschland Entwicklung GmbH
+ * Modified by Benjamin Krill <ben@codiert.org>, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/console.h>
+#include <linux/of_platform.h>
+
+#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/kexec.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/cputable.h>
+#include <asm/irq.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/udbg.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+#include "pervasive.h"
+#include "ras.h"
+
+static void qpace_show_cpuinfo(struct seq_file *m)
+{
+ struct device_node *root;
+ const char *model = "";
+
+ root = of_find_node_by_path("/");
+ if (root)
+ model = of_get_property(root, "model", NULL);
+ seq_printf(m, "machine\t\t: CHRP %s\n", model);
+ of_node_put(root);
+}
+
+static void qpace_progress(char *s, unsigned short hex)
+{
+ printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+static const struct of_device_id qpace_bus_ids[] __initconst = {
+ { .type = "soc", },
+ { .compatible = "soc", },
+ { .type = "spider", },
+ { .type = "axon", },
+ { .type = "plb5", },
+ { .type = "plb4", },
+ { .type = "opb", },
+ { .type = "ebc", },
+ {},
+};
+
+static int __init qpace_publish_devices(void)
+{
+ int node;
+
+ /* Publish OF platform devices for southbridge IOs */
+ of_platform_bus_probe(NULL, qpace_bus_ids, NULL);
+
+ /* There is no device for the MIC memory controller, thus we create
+ * a platform device for it to attach the EDAC driver to.
+ */
+ for_each_online_node(node) {
+ if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL)
+ continue;
+ platform_device_register_simple("cbe-mic", node, NULL, 0);
+ }
+
+ return 0;
+}
+machine_subsys_initcall(qpace, qpace_publish_devices);
+
+static void __init qpace_setup_arch(void)
+{
+#ifdef CONFIG_SPU_BASE
+ spu_priv1_ops = &spu_priv1_mmio_ops;
+ spu_management_ops = &spu_management_of_ops;
+#endif
+
+ cbe_regs_init();
+
+#ifdef CONFIG_CBE_RAS
+ cbe_ras_init();
+#endif
+
+#ifdef CONFIG_SMP
+ smp_init_cell();
+#endif
+
+ /* init to some ~sane value until calibrate_delay() runs */
+ loops_per_jiffy = 50000000;
+
+ cbe_pervasive_init();
+#ifdef CONFIG_DUMMY_CONSOLE
+ conswitchp = &dummy_con;
+#endif
+}
+
+static int __init qpace_probe(void)
+{
+ unsigned long root = of_get_flat_dt_root();
+
+ if (!of_flat_dt_is_compatible(root, "IBM,QPACE"))
+ return 0;
+
+ hpte_init_native();
+ pm_power_off = rtas_power_off;
+
+ return 1;
+}
+
+define_machine(qpace) {
+ .name = "QPACE",
+ .probe = qpace_probe,
+ .setup_arch = qpace_setup_arch,
+ .show_cpuinfo = qpace_show_cpuinfo,
+ .restart = rtas_restart,
+ .halt = rtas_halt,
+ .get_boot_time = rtas_get_boot_time,
+ .get_rtc_time = rtas_get_rtc_time,
+ .set_rtc_time = rtas_set_rtc_time,
+ .calibrate_decr = generic_calibrate_decr,
+ .progress = qpace_progress,
+ .init_IRQ = iic_init_IRQ,
+};
diff --git a/kernel/arch/powerpc/platforms/cell/ras.c b/kernel/arch/powerpc/platforms/cell/ras.c
new file mode 100644
index 000000000..e865d7481
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/ras.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright 2006-2008, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+
+#include <asm/kexec.h>
+#include <asm/reg.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/cell-regs.h>
+
+#include "ras.h"
+
+
+static void dump_fir(int cpu)
+{
+ struct cbe_pmd_regs __iomem *pregs = cbe_get_cpu_pmd_regs(cpu);
+ struct cbe_iic_regs __iomem *iregs = cbe_get_cpu_iic_regs(cpu);
+
+ if (pregs == NULL)
+ return;
+
+ /* Todo: do some nicer parsing of bits and based on them go down
+ * to other sub-units FIRs and not only IIC
+ */
+ printk(KERN_ERR "Global Checkstop FIR : 0x%016llx\n",
+ in_be64(&pregs->checkstop_fir));
+ printk(KERN_ERR "Global Recoverable FIR : 0x%016llx\n",
+ in_be64(&pregs->checkstop_fir));
+ printk(KERN_ERR "Global MachineCheck FIR : 0x%016llx\n",
+ in_be64(&pregs->spec_att_mchk_fir));
+
+ if (iregs == NULL)
+ return;
+ printk(KERN_ERR "IOC FIR : 0x%016llx\n",
+ in_be64(&iregs->ioc_fir));
+
+}
+
+void cbe_system_error_exception(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+
+ printk(KERN_ERR "System Error Interrupt on CPU %d !\n", cpu);
+ dump_fir(cpu);
+ dump_stack();
+}
+
+void cbe_maintenance_exception(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+
+ /*
+ * Nothing implemented for the maintenance interrupt at this point
+ */
+
+ printk(KERN_ERR "Unhandled Maintenance interrupt on CPU %d !\n", cpu);
+ dump_stack();
+}
+
+void cbe_thermal_exception(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+
+ /*
+ * Nothing implemented for the thermal interrupt at this point
+ */
+
+ printk(KERN_ERR "Unhandled Thermal interrupt on CPU %d !\n", cpu);
+ dump_stack();
+}
+
+static int cbe_machine_check_handler(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+
+ printk(KERN_ERR "Machine Check Interrupt on CPU %d !\n", cpu);
+ dump_fir(cpu);
+
+ /* No recovery from this code now, lets continue */
+ return 0;
+}
+
+struct ptcal_area {
+ struct list_head list;
+ int nid;
+ int order;
+ struct page *pages;
+};
+
+static LIST_HEAD(ptcal_list);
+
+static int ptcal_start_tok, ptcal_stop_tok;
+
+static int __init cbe_ptcal_enable_on_node(int nid, int order)
+{
+ struct ptcal_area *area;
+ int ret = -ENOMEM;
+ unsigned long addr;
+
+ if (is_kdump_kernel())
+ rtas_call(ptcal_stop_tok, 1, 1, NULL, nid);
+
+ area = kmalloc(sizeof(*area), GFP_KERNEL);
+ if (!area)
+ goto out_err;
+
+ area->nid = nid;
+ area->order = order;
+ area->pages = alloc_pages_exact_node(area->nid,
+ GFP_KERNEL|__GFP_THISNODE,
+ area->order);
+
+ if (!area->pages) {
+ printk(KERN_WARNING "%s: no page on node %d\n",
+ __func__, area->nid);
+ goto out_free_area;
+ }
+
+ /*
+ * We move the ptcal area to the middle of the allocated
+ * page, in order to avoid prefetches in memcpy and similar
+ * functions stepping on it.
+ */
+ addr = __pa(page_address(area->pages)) + (PAGE_SIZE >> 1);
+ printk(KERN_DEBUG "%s: enabling PTCAL on node %d address=0x%016lx\n",
+ __func__, area->nid, addr);
+
+ ret = -EIO;
+ if (rtas_call(ptcal_start_tok, 3, 1, NULL, area->nid,
+ (unsigned int)(addr >> 32),
+ (unsigned int)(addr & 0xffffffff))) {
+ printk(KERN_ERR "%s: error enabling PTCAL on node %d!\n",
+ __func__, nid);
+ goto out_free_pages;
+ }
+
+ list_add(&area->list, &ptcal_list);
+
+ return 0;
+
+out_free_pages:
+ __free_pages(area->pages, area->order);
+out_free_area:
+ kfree(area);
+out_err:
+ return ret;
+}
+
+static int __init cbe_ptcal_enable(void)
+{
+ const u32 *size;
+ struct device_node *np;
+ int order, found_mic = 0;
+
+ np = of_find_node_by_path("/rtas");
+ if (!np)
+ return -ENODEV;
+
+ size = of_get_property(np, "ibm,cbe-ptcal-size", NULL);
+ if (!size) {
+ of_node_put(np);
+ return -ENODEV;
+ }
+
+ pr_debug("%s: enabling PTCAL, size = 0x%x\n", __func__, *size);
+ order = get_order(*size);
+ of_node_put(np);
+
+ /* support for malta device trees, with be@/mic@ nodes */
+ for_each_node_by_type(np, "mic-tm") {
+ cbe_ptcal_enable_on_node(of_node_to_nid(np), order);
+ found_mic = 1;
+ }
+
+ if (found_mic)
+ return 0;
+
+ /* support for older device tree - use cpu nodes */
+ for_each_node_by_type(np, "cpu") {
+ const u32 *nid = of_get_property(np, "node-id", NULL);
+ if (!nid) {
+ printk(KERN_ERR "%s: node %s is missing node-id?\n",
+ __func__, np->full_name);
+ continue;
+ }
+ cbe_ptcal_enable_on_node(*nid, order);
+ found_mic = 1;
+ }
+
+ return found_mic ? 0 : -ENODEV;
+}
+
+static int cbe_ptcal_disable(void)
+{
+ struct ptcal_area *area, *tmp;
+ int ret = 0;
+
+ pr_debug("%s: disabling PTCAL\n", __func__);
+
+ list_for_each_entry_safe(area, tmp, &ptcal_list, list) {
+ /* disable ptcal on this node */
+ if (rtas_call(ptcal_stop_tok, 1, 1, NULL, area->nid)) {
+ printk(KERN_ERR "%s: error disabling PTCAL "
+ "on node %d!\n", __func__,
+ area->nid);
+ ret = -EIO;
+ continue;
+ }
+
+ /* ensure we can access the PTCAL area */
+ memset(page_address(area->pages), 0,
+ 1 << (area->order + PAGE_SHIFT));
+
+ /* clean up */
+ list_del(&area->list);
+ __free_pages(area->pages, area->order);
+ kfree(area);
+ }
+
+ return ret;
+}
+
+static int cbe_ptcal_notify_reboot(struct notifier_block *nb,
+ unsigned long code, void *data)
+{
+ return cbe_ptcal_disable();
+}
+
+static void cbe_ptcal_crash_shutdown(void)
+{
+ cbe_ptcal_disable();
+}
+
+static struct notifier_block cbe_ptcal_reboot_notifier = {
+ .notifier_call = cbe_ptcal_notify_reboot
+};
+
+#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON
+static int sysreset_hack;
+
+static int __init cbe_sysreset_init(void)
+{
+ struct cbe_pmd_regs __iomem *regs;
+
+ sysreset_hack = of_machine_is_compatible("IBM,CBPLUS-1.0");
+ if (!sysreset_hack)
+ return 0;
+
+ regs = cbe_get_cpu_pmd_regs(0);
+ if (!regs)
+ return 0;
+
+ /* Enable JTAG system-reset hack */
+ out_be32(&regs->fir_mode_reg,
+ in_be32(&regs->fir_mode_reg) |
+ CBE_PMD_FIR_MODE_M8);
+
+ return 0;
+}
+device_initcall(cbe_sysreset_init);
+
+int cbe_sysreset_hack(void)
+{
+ struct cbe_pmd_regs __iomem *regs;
+
+ /*
+ * The BMC can inject user triggered system reset exceptions,
+ * but cannot set the system reset reason in srr1,
+ * so check an extra register here.
+ */
+ if (sysreset_hack && (smp_processor_id() == 0)) {
+ regs = cbe_get_cpu_pmd_regs(0);
+ if (!regs)
+ return 0;
+ if (in_be64(&regs->ras_esc_0) & 0x0000ffff) {
+ out_be64(&regs->ras_esc_0, 0);
+ return 0;
+ }
+ }
+ return 1;
+}
+#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
+
+int __init cbe_ptcal_init(void)
+{
+ int ret;
+ ptcal_start_tok = rtas_token("ibm,cbe-start-ptcal");
+ ptcal_stop_tok = rtas_token("ibm,cbe-stop-ptcal");
+
+ if (ptcal_start_tok == RTAS_UNKNOWN_SERVICE
+ || ptcal_stop_tok == RTAS_UNKNOWN_SERVICE)
+ return -ENODEV;
+
+ ret = register_reboot_notifier(&cbe_ptcal_reboot_notifier);
+ if (ret)
+ goto out1;
+
+ ret = crash_shutdown_register(&cbe_ptcal_crash_shutdown);
+ if (ret)
+ goto out2;
+
+ return cbe_ptcal_enable();
+
+out2:
+ unregister_reboot_notifier(&cbe_ptcal_reboot_notifier);
+out1:
+ printk(KERN_ERR "Can't disable PTCAL, so not enabling\n");
+ return ret;
+}
+
+arch_initcall(cbe_ptcal_init);
+
+void __init cbe_ras_init(void)
+{
+ unsigned long hid0;
+
+ /*
+ * Enable System Error & thermal interrupts and wakeup conditions
+ */
+
+ hid0 = mfspr(SPRN_HID0);
+ hid0 |= HID0_CBE_THERM_INT_EN | HID0_CBE_THERM_WAKEUP |
+ HID0_CBE_SYSERR_INT_EN | HID0_CBE_SYSERR_WAKEUP;
+ mtspr(SPRN_HID0, hid0);
+ mb();
+
+ /*
+ * Install machine check handler. Leave setting of precise mode to
+ * what the firmware did for now
+ */
+ ppc_md.machine_check_exception = cbe_machine_check_handler;
+ mb();
+
+ /*
+ * For now, we assume that IOC_FIR is already set to forward some
+ * error conditions to the System Error handler. If that is not true
+ * then it will have to be fixed up here.
+ */
+}
diff --git a/kernel/arch/powerpc/platforms/cell/ras.h b/kernel/arch/powerpc/platforms/cell/ras.h
new file mode 100644
index 000000000..eb7ee54c8
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/ras.h
@@ -0,0 +1,9 @@
+#ifndef RAS_H
+#define RAS_H
+
+extern void cbe_system_error_exception(struct pt_regs *regs);
+extern void cbe_maintenance_exception(struct pt_regs *regs);
+extern void cbe_thermal_exception(struct pt_regs *regs);
+extern void cbe_ras_init(void);
+
+#endif /* RAS_H */
diff --git a/kernel/arch/powerpc/platforms/cell/setup.c b/kernel/arch/powerpc/platforms/cell/setup.c
new file mode 100644
index 000000000..36cff28d0
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/setup.c
@@ -0,0 +1,286 @@
+/*
+ * linux/arch/powerpc/platforms/cell/cell_setup.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ * Adapted from 'alpha' version by Gary Thomas
+ * Modified by Cort Dougan (cort@cs.nmt.edu)
+ * Modified by PPC64 Team, IBM Corp
+ * Modified by Cell Team, IBM Deutschland Entwicklung GmbH
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#undef DEBUG
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/export.h>
+#include <linux/unistd.h>
+#include <linux/user.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/mutex.h>
+#include <linux/memory_hotplug.h>
+#include <linux/of_platform.h>
+
+#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/nvram.h>
+#include <asm/cputable.h>
+#include <asm/ppc-pci.h>
+#include <asm/irq.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/cell-regs.h>
+#include <asm/io-workarounds.h>
+
+#include "cell.h"
+#include "interrupt.h"
+#include "pervasive.h"
+#include "ras.h"
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static void cell_show_cpuinfo(struct seq_file *m)
+{
+ struct device_node *root;
+ const char *model = "";
+
+ root = of_find_node_by_path("/");
+ if (root)
+ model = of_get_property(root, "model", NULL);
+ seq_printf(m, "machine\t\t: CHRP %s\n", model);
+ of_node_put(root);
+}
+
+static void cell_progress(char *s, unsigned short hex)
+{
+ printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+static void cell_fixup_pcie_rootcomplex(struct pci_dev *dev)
+{
+ struct pci_controller *hose;
+ const char *s;
+ int i;
+
+ if (!machine_is(cell))
+ return;
+
+ /* We're searching for a direct child of the PHB */
+ if (dev->bus->self != NULL || dev->devfn != 0)
+ return;
+
+ hose = pci_bus_to_host(dev->bus);
+ if (hose == NULL)
+ return;
+
+ /* Only on PCIE */
+ if (!of_device_is_compatible(hose->dn, "pciex"))
+ return;
+
+ /* And only on axon */
+ s = of_get_property(hose->dn, "model", NULL);
+ if (!s || strcmp(s, "Axon") != 0)
+ return;
+
+ for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) {
+ dev->resource[i].start = dev->resource[i].end = 0;
+ dev->resource[i].flags = 0;
+ }
+
+ printk(KERN_DEBUG "PCI: Hiding resources on Axon PCIE RC %s\n",
+ pci_name(dev));
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, cell_fixup_pcie_rootcomplex);
+
+static int cell_setup_phb(struct pci_controller *phb)
+{
+ const char *model;
+ struct device_node *np;
+
+ int rc = rtas_setup_phb(phb);
+ if (rc)
+ return rc;
+
+ phb->controller_ops = cell_pci_controller_ops;
+
+ np = phb->dn;
+ model = of_get_property(np, "model", NULL);
+ if (model == NULL || strcmp(np->name, "pci"))
+ return 0;
+
+ /* Setup workarounds for spider */
+ if (strcmp(model, "Spider"))
+ return 0;
+
+ iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init,
+ (void *)SPIDER_PCI_REG_BASE);
+ return 0;
+}
+
+static const struct of_device_id cell_bus_ids[] __initconst = {
+ { .type = "soc", },
+ { .compatible = "soc", },
+ { .type = "spider", },
+ { .type = "axon", },
+ { .type = "plb5", },
+ { .type = "plb4", },
+ { .type = "opb", },
+ { .type = "ebc", },
+ {},
+};
+
+static int __init cell_publish_devices(void)
+{
+ struct device_node *root = of_find_node_by_path("/");
+ struct device_node *np;
+ int node;
+
+ /* Publish OF platform devices for southbridge IOs */
+ of_platform_bus_probe(NULL, cell_bus_ids, NULL);
+
+ /* On spider based blades, we need to manually create the OF
+ * platform devices for the PCI host bridges
+ */
+ for_each_child_of_node(root, np) {
+ if (np->type == NULL || (strcmp(np->type, "pci") != 0 &&
+ strcmp(np->type, "pciex") != 0))
+ continue;
+ of_platform_device_create(np, NULL, NULL);
+ }
+
+ /* There is no device for the MIC memory controller, thus we create
+ * a platform device for it to attach the EDAC driver to.
+ */
+ for_each_online_node(node) {
+ if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL)
+ continue;
+ platform_device_register_simple("cbe-mic", node, NULL, 0);
+ }
+
+ return 0;
+}
+machine_subsys_initcall(cell, cell_publish_devices);
+
+static void __init mpic_init_IRQ(void)
+{
+ struct device_node *dn;
+ struct mpic *mpic;
+
+ for (dn = NULL;
+ (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+ if (!of_device_is_compatible(dn, "CBEA,platform-open-pic"))
+ continue;
+
+ /* The MPIC driver will get everything it needs from the
+ * device-tree, just pass 0 to all arguments
+ */
+ mpic = mpic_alloc(dn, 0, MPIC_SECONDARY | MPIC_NO_RESET,
+ 0, 0, " MPIC ");
+ if (mpic == NULL)
+ continue;
+ mpic_init(mpic);
+ }
+}
+
+
+static void __init cell_init_irq(void)
+{
+ iic_init_IRQ();
+ spider_init_IRQ();
+ mpic_init_IRQ();
+}
+
+static void __init cell_set_dabrx(void)
+{
+ mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
+}
+
+static void __init cell_setup_arch(void)
+{
+#ifdef CONFIG_SPU_BASE
+ spu_priv1_ops = &spu_priv1_mmio_ops;
+ spu_management_ops = &spu_management_of_ops;
+#endif
+
+ cbe_regs_init();
+
+ cell_set_dabrx();
+
+#ifdef CONFIG_CBE_RAS
+ cbe_ras_init();
+#endif
+
+#ifdef CONFIG_SMP
+ smp_init_cell();
+#endif
+ /* init to some ~sane value until calibrate_delay() runs */
+ loops_per_jiffy = 50000000;
+
+ /* Find and initialize PCI host bridges */
+ init_pci_config_tokens();
+
+ cbe_pervasive_init();
+#ifdef CONFIG_DUMMY_CONSOLE
+ conswitchp = &dummy_con;
+#endif
+
+ mmio_nvram_init();
+}
+
+static int __init cell_probe(void)
+{
+ unsigned long root = of_get_flat_dt_root();
+
+ if (!of_flat_dt_is_compatible(root, "IBM,CBEA") &&
+ !of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
+ return 0;
+
+ hpte_init_native();
+ pm_power_off = rtas_power_off;
+
+ return 1;
+}
+
+define_machine(cell) {
+ .name = "Cell",
+ .probe = cell_probe,
+ .setup_arch = cell_setup_arch,
+ .show_cpuinfo = cell_show_cpuinfo,
+ .restart = rtas_restart,
+ .halt = rtas_halt,
+ .get_boot_time = rtas_get_boot_time,
+ .get_rtc_time = rtas_get_rtc_time,
+ .set_rtc_time = rtas_set_rtc_time,
+ .calibrate_decr = generic_calibrate_decr,
+ .progress = cell_progress,
+ .init_IRQ = cell_init_irq,
+ .pci_setup_phb = cell_setup_phb,
+};
+
+struct pci_controller_ops cell_pci_controller_ops;
diff --git a/kernel/arch/powerpc/platforms/cell/smp.c b/kernel/arch/powerpc/platforms/cell/smp.c
new file mode 100644
index 000000000..895560f4b
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/smp.c
@@ -0,0 +1,168 @@
+/*
+ * SMP support for BPA machines.
+ *
+ * Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
+ *
+ * Plus various changes from other IBM teams...
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/paca.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/cputhreads.h>
+#include <asm/code-patching.h>
+
+#include "interrupt.h"
+#include <asm/udbg.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/*
+ * The Primary thread of each non-boot processor was started from the OF client
+ * interface by prom_hold_cpus and is spinning on secondary_hold_spinloop.
+ */
+static cpumask_t of_spin_map;
+
+/**
+ * smp_startup_cpu() - start the given cpu
+ *
+ * At boot time, there is nothing to do for primary threads which were
+ * started from Open Firmware. For anything else, call RTAS with the
+ * appropriate start location.
+ *
+ * Returns:
+ * 0 - failure
+ * 1 - success
+ */
+static inline int smp_startup_cpu(unsigned int lcpu)
+{
+ int status;
+ unsigned long start_here =
+ __pa(ppc_function_entry(generic_secondary_smp_init));
+ unsigned int pcpu;
+ int start_cpu;
+
+ if (cpumask_test_cpu(lcpu, &of_spin_map))
+ /* Already started by OF and sitting in spin loop */
+ return 1;
+
+ pcpu = get_hard_smp_processor_id(lcpu);
+
+ /* Fixup atomic count: it exited inside IRQ handler. */
+ task_thread_info(paca[lcpu].__current)->preempt_count = 0;
+
+ /*
+ * If the RTAS start-cpu token does not exist then presume the
+ * cpu is already spinning.
+ */
+ start_cpu = rtas_token("start-cpu");
+ if (start_cpu == RTAS_UNKNOWN_SERVICE)
+ return 1;
+
+ status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, lcpu);
+ if (status != 0) {
+ printk(KERN_ERR "start-cpu failed: %i\n", status);
+ return 0;
+ }
+
+ return 1;
+}
+
+static void smp_cell_setup_cpu(int cpu)
+{
+ if (cpu != boot_cpuid)
+ iic_setup_cpu();
+
+ /*
+ * change default DABRX to allow user watchpoints
+ */
+ mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
+}
+
+static int smp_cell_kick_cpu(int nr)
+{
+ BUG_ON(nr < 0 || nr >= NR_CPUS);
+
+ if (!smp_startup_cpu(nr))
+ return -ENOENT;
+
+ /*
+ * The processor is currently spinning, waiting for the
+ * cpu_start field to become non-zero After we set cpu_start,
+ * the processor will continue on to secondary_start
+ */
+ paca[nr].cpu_start = 1;
+
+ return 0;
+}
+
+static struct smp_ops_t bpa_iic_smp_ops = {
+ .message_pass = iic_message_pass,
+ .probe = iic_request_IPIs,
+ .kick_cpu = smp_cell_kick_cpu,
+ .setup_cpu = smp_cell_setup_cpu,
+ .cpu_bootable = smp_generic_cpu_bootable,
+};
+
+/* This is called very early */
+void __init smp_init_cell(void)
+{
+ int i;
+
+ DBG(" -> smp_init_cell()\n");
+
+ smp_ops = &bpa_iic_smp_ops;
+
+ /* Mark threads which are still spinning in hold loops. */
+ if (cpu_has_feature(CPU_FTR_SMT)) {
+ for_each_present_cpu(i) {
+ if (cpu_thread_in_core(i) == 0)
+ cpumask_set_cpu(i, &of_spin_map);
+ }
+ } else
+ cpumask_copy(&of_spin_map, cpu_present_mask);
+
+ cpumask_clear_cpu(boot_cpuid, &of_spin_map);
+
+ /* Non-lpar has additional take/give timebase */
+ if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
+ smp_ops->give_timebase = rtas_give_timebase;
+ smp_ops->take_timebase = rtas_take_timebase;
+ }
+
+ DBG(" <- smp_init_cell()\n");
+}
diff --git a/kernel/arch/powerpc/platforms/cell/spider-pci.c b/kernel/arch/powerpc/platforms/cell/spider-pci.c
new file mode 100644
index 000000000..f1f787889
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spider-pci.c
@@ -0,0 +1,184 @@
+/*
+ * IO workarounds for PCI on Celleb/Cell platform
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+
+#include <asm/ppc-pci.h>
+#include <asm/pci-bridge.h>
+#include <asm/io-workarounds.h>
+
+#define SPIDER_PCI_DISABLE_PREFETCH
+
+struct spiderpci_iowa_private {
+ void __iomem *regs;
+};
+
+static void spiderpci_io_flush(struct iowa_bus *bus)
+{
+ struct spiderpci_iowa_private *priv;
+ u32 val;
+
+ priv = bus->private;
+ val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ);
+ iosync();
+}
+
+#define SPIDER_PCI_MMIO_READ(name, ret) \
+static ret spiderpci_##name(const PCI_IO_ADDR addr) \
+{ \
+ ret val = __do_##name(addr); \
+ spiderpci_io_flush(iowa_mem_find_bus(addr)); \
+ return val; \
+}
+
+#define SPIDER_PCI_MMIO_READ_STR(name) \
+static void spiderpci_##name(const PCI_IO_ADDR addr, void *buf, \
+ unsigned long count) \
+{ \
+ __do_##name(addr, buf, count); \
+ spiderpci_io_flush(iowa_mem_find_bus(addr)); \
+}
+
+SPIDER_PCI_MMIO_READ(readb, u8)
+SPIDER_PCI_MMIO_READ(readw, u16)
+SPIDER_PCI_MMIO_READ(readl, u32)
+SPIDER_PCI_MMIO_READ(readq, u64)
+SPIDER_PCI_MMIO_READ(readw_be, u16)
+SPIDER_PCI_MMIO_READ(readl_be, u32)
+SPIDER_PCI_MMIO_READ(readq_be, u64)
+SPIDER_PCI_MMIO_READ_STR(readsb)
+SPIDER_PCI_MMIO_READ_STR(readsw)
+SPIDER_PCI_MMIO_READ_STR(readsl)
+
+static void spiderpci_memcpy_fromio(void *dest, const PCI_IO_ADDR src,
+ unsigned long n)
+{
+ __do_memcpy_fromio(dest, src, n);
+ spiderpci_io_flush(iowa_mem_find_bus(src));
+}
+
+static int __init spiderpci_pci_setup_chip(struct pci_controller *phb,
+ void __iomem *regs)
+{
+ void *dummy_page_va;
+ dma_addr_t dummy_page_da;
+
+#ifdef SPIDER_PCI_DISABLE_PREFETCH
+ u32 val = in_be32(regs + SPIDER_PCI_VCI_CNTL_STAT);
+ pr_debug("SPIDER_IOWA:PVCI_Control_Status was 0x%08x\n", val);
+ out_be32(regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8);
+#endif /* SPIDER_PCI_DISABLE_PREFETCH */
+
+ /* setup dummy read */
+ /*
+ * On CellBlade, we can't know that which XDR memory is used by
+ * kmalloc() to allocate dummy_page_va.
+ * In order to imporve the performance, the XDR which is used to
+ * allocate dummy_page_va is the nearest the spider-pci.
+ * We have to select the CBE which is the nearest the spider-pci
+ * to allocate memory from the best XDR, but I don't know that
+ * how to do.
+ *
+ * Celleb does not have this problem, because it has only one XDR.
+ */
+ dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!dummy_page_va) {
+ pr_err("SPIDERPCI-IOWA:Alloc dummy_page_va failed.\n");
+ return -1;
+ }
+
+ dummy_page_da = dma_map_single(phb->parent, dummy_page_va,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ if (dma_mapping_error(phb->parent, dummy_page_da)) {
+ pr_err("SPIDER-IOWA:Map dummy page filed.\n");
+ kfree(dummy_page_va);
+ return -1;
+ }
+
+ out_be32(regs + SPIDER_PCI_DUMMY_READ_BASE, dummy_page_da);
+
+ return 0;
+}
+
+int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data)
+{
+ void __iomem *regs = NULL;
+ struct spiderpci_iowa_private *priv;
+ struct device_node *np = bus->phb->dn;
+ struct resource r;
+ unsigned long offset = (unsigned long)data;
+
+ pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%s)\n",
+ np->full_name);
+
+ priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL);
+ if (!priv) {
+ pr_err("SPIDERPCI-IOWA:"
+ "Can't allocate struct spiderpci_iowa_private");
+ return -1;
+ }
+
+ if (of_address_to_resource(np, 0, &r)) {
+ pr_err("SPIDERPCI-IOWA:Can't get resource.\n");
+ goto error;
+ }
+
+ regs = ioremap(r.start + offset, SPIDER_PCI_REG_SIZE);
+ if (!regs) {
+ pr_err("SPIDERPCI-IOWA:ioremap failed.\n");
+ goto error;
+ }
+ priv->regs = regs;
+ bus->private = priv;
+
+ if (spiderpci_pci_setup_chip(bus->phb, regs))
+ goto error;
+
+ return 0;
+
+error:
+ kfree(priv);
+ bus->private = NULL;
+
+ if (regs)
+ iounmap(regs);
+
+ return -1;
+}
+
+struct ppc_pci_io spiderpci_ops = {
+ .readb = spiderpci_readb,
+ .readw = spiderpci_readw,
+ .readl = spiderpci_readl,
+ .readq = spiderpci_readq,
+ .readw_be = spiderpci_readw_be,
+ .readl_be = spiderpci_readl_be,
+ .readq_be = spiderpci_readq_be,
+ .readsb = spiderpci_readsb,
+ .readsw = spiderpci_readsw,
+ .readsl = spiderpci_readsl,
+ .memcpy_fromio = spiderpci_memcpy_fromio,
+};
+
diff --git a/kernel/arch/powerpc/platforms/cell/spider-pic.c b/kernel/arch/powerpc/platforms/cell/spider-pic.c
new file mode 100644
index 000000000..1f72f4ab6
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spider-pic.c
@@ -0,0 +1,359 @@
+/*
+ * External Interrupt Controller on Spider South Bridge
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/ioport.h>
+
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+
+#include "interrupt.h"
+
+/* register layout taken from Spider spec, table 7.4-4 */
+enum {
+ TIR_DEN = 0x004, /* Detection Enable Register */
+ TIR_MSK = 0x084, /* Mask Level Register */
+ TIR_EDC = 0x0c0, /* Edge Detection Clear Register */
+ TIR_PNDA = 0x100, /* Pending Register A */
+ TIR_PNDB = 0x104, /* Pending Register B */
+ TIR_CS = 0x144, /* Current Status Register */
+ TIR_LCSA = 0x150, /* Level Current Status Register A */
+ TIR_LCSB = 0x154, /* Level Current Status Register B */
+ TIR_LCSC = 0x158, /* Level Current Status Register C */
+ TIR_LCSD = 0x15c, /* Level Current Status Register D */
+ TIR_CFGA = 0x200, /* Setting Register A0 */
+ TIR_CFGB = 0x204, /* Setting Register B0 */
+ /* 0x208 ... 0x3ff Setting Register An/Bn */
+ TIR_PPNDA = 0x400, /* Packet Pending Register A */
+ TIR_PPNDB = 0x404, /* Packet Pending Register B */
+ TIR_PIERA = 0x408, /* Packet Output Error Register A */
+ TIR_PIERB = 0x40c, /* Packet Output Error Register B */
+ TIR_PIEN = 0x444, /* Packet Output Enable Register */
+ TIR_PIPND = 0x454, /* Packet Output Pending Register */
+ TIRDID = 0x484, /* Spider Device ID Register */
+ REISTIM = 0x500, /* Reissue Command Timeout Time Setting */
+ REISTIMEN = 0x504, /* Reissue Command Timeout Setting */
+ REISWAITEN = 0x508, /* Reissue Wait Control*/
+};
+
+#define SPIDER_CHIP_COUNT 4
+#define SPIDER_SRC_COUNT 64
+#define SPIDER_IRQ_INVALID 63
+
+struct spider_pic {
+ struct irq_domain *host;
+ void __iomem *regs;
+ unsigned int node_id;
+};
+static struct spider_pic spider_pics[SPIDER_CHIP_COUNT];
+
+static struct spider_pic *spider_irq_data_to_pic(struct irq_data *d)
+{
+ return irq_data_get_irq_chip_data(d);
+}
+
+static void __iomem *spider_get_irq_config(struct spider_pic *pic,
+ unsigned int src)
+{
+ return pic->regs + TIR_CFGA + 8 * src;
+}
+
+static void spider_unmask_irq(struct irq_data *d)
+{
+ struct spider_pic *pic = spider_irq_data_to_pic(d);
+ void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
+
+ out_be32(cfg, in_be32(cfg) | 0x30000000u);
+}
+
+static void spider_mask_irq(struct irq_data *d)
+{
+ struct spider_pic *pic = spider_irq_data_to_pic(d);
+ void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
+
+ out_be32(cfg, in_be32(cfg) & ~0x30000000u);
+}
+
+static void spider_ack_irq(struct irq_data *d)
+{
+ struct spider_pic *pic = spider_irq_data_to_pic(d);
+ unsigned int src = irqd_to_hwirq(d);
+
+ /* Reset edge detection logic if necessary
+ */
+ if (irqd_is_level_type(d))
+ return;
+
+ /* Only interrupts 47 to 50 can be set to edge */
+ if (src < 47 || src > 50)
+ return;
+
+ /* Perform the clear of the edge logic */
+ out_be32(pic->regs + TIR_EDC, 0x100 | (src & 0xf));
+}
+
+static int spider_set_irq_type(struct irq_data *d, unsigned int type)
+{
+ unsigned int sense = type & IRQ_TYPE_SENSE_MASK;
+ struct spider_pic *pic = spider_irq_data_to_pic(d);
+ unsigned int hw = irqd_to_hwirq(d);
+ void __iomem *cfg = spider_get_irq_config(pic, hw);
+ u32 old_mask;
+ u32 ic;
+
+ /* Note that only level high is supported for most interrupts */
+ if (sense != IRQ_TYPE_NONE && sense != IRQ_TYPE_LEVEL_HIGH &&
+ (hw < 47 || hw > 50))
+ return -EINVAL;
+
+ /* Decode sense type */
+ switch(sense) {
+ case IRQ_TYPE_EDGE_RISING:
+ ic = 0x3;
+ break;
+ case IRQ_TYPE_EDGE_FALLING:
+ ic = 0x2;
+ break;
+ case IRQ_TYPE_LEVEL_LOW:
+ ic = 0x0;
+ break;
+ case IRQ_TYPE_LEVEL_HIGH:
+ case IRQ_TYPE_NONE:
+ ic = 0x1;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* Configure the source. One gross hack that was there before and
+ * that I've kept around is the priority to the BE which I set to
+ * be the same as the interrupt source number. I don't know whether
+ * that's supposed to make any kind of sense however, we'll have to
+ * decide that, but for now, I'm not changing the behaviour.
+ */
+ old_mask = in_be32(cfg) & 0x30000000u;
+ out_be32(cfg, old_mask | (ic << 24) | (0x7 << 16) |
+ (pic->node_id << 4) | 0xe);
+ out_be32(cfg + 4, (0x2 << 16) | (hw & 0xff));
+
+ return 0;
+}
+
+static struct irq_chip spider_pic = {
+ .name = "SPIDER",
+ .irq_unmask = spider_unmask_irq,
+ .irq_mask = spider_mask_irq,
+ .irq_ack = spider_ack_irq,
+ .irq_set_type = spider_set_irq_type,
+};
+
+static int spider_host_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ irq_set_chip_data(virq, h->host_data);
+ irq_set_chip_and_handler(virq, &spider_pic, handle_level_irq);
+
+ /* Set default irq type */
+ irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+ return 0;
+}
+
+static int spider_host_xlate(struct irq_domain *h, struct device_node *ct,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+ /* Spider interrupts have 2 cells, first is the interrupt source,
+ * second, well, I don't know for sure yet ... We mask the top bits
+ * because old device-trees encode a node number in there
+ */
+ *out_hwirq = intspec[0] & 0x3f;
+ *out_flags = IRQ_TYPE_LEVEL_HIGH;
+ return 0;
+}
+
+static const struct irq_domain_ops spider_host_ops = {
+ .map = spider_host_map,
+ .xlate = spider_host_xlate,
+};
+
+static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct spider_pic *pic = irq_desc_get_handler_data(desc);
+ unsigned int cs, virq;
+
+ cs = in_be32(pic->regs + TIR_CS) >> 24;
+ if (cs == SPIDER_IRQ_INVALID)
+ virq = NO_IRQ;
+ else
+ virq = irq_linear_revmap(pic->host, cs);
+
+ if (virq != NO_IRQ)
+ generic_handle_irq(virq);
+
+ chip->irq_eoi(&desc->irq_data);
+}
+
+/* For hooking up the cascace we have a problem. Our device-tree is
+ * crap and we don't know on which BE iic interrupt we are hooked on at
+ * least not the "standard" way. We can reconstitute it based on two
+ * informations though: which BE node we are connected to and whether
+ * we are connected to IOIF0 or IOIF1. Right now, we really only care
+ * about the IBM cell blade and we know that its firmware gives us an
+ * interrupt-map property which is pretty strange.
+ */
+static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
+{
+ unsigned int virq;
+ const u32 *imap, *tmp;
+ int imaplen, intsize, unit;
+ struct device_node *iic;
+
+ /* First, we check whether we have a real "interrupts" in the device
+ * tree in case the device-tree is ever fixed
+ */
+ virq = irq_of_parse_and_map(pic->host->of_node, 0);
+ if (virq)
+ return virq;
+
+ /* Now do the horrible hacks */
+ tmp = of_get_property(pic->host->of_node, "#interrupt-cells", NULL);
+ if (tmp == NULL)
+ return NO_IRQ;
+ intsize = *tmp;
+ imap = of_get_property(pic->host->of_node, "interrupt-map", &imaplen);
+ if (imap == NULL || imaplen < (intsize + 1))
+ return NO_IRQ;
+ iic = of_find_node_by_phandle(imap[intsize]);
+ if (iic == NULL)
+ return NO_IRQ;
+ imap += intsize + 1;
+ tmp = of_get_property(iic, "#interrupt-cells", NULL);
+ if (tmp == NULL) {
+ of_node_put(iic);
+ return NO_IRQ;
+ }
+ intsize = *tmp;
+ /* Assume unit is last entry of interrupt specifier */
+ unit = imap[intsize - 1];
+ /* Ok, we have a unit, now let's try to get the node */
+ tmp = of_get_property(iic, "ibm,interrupt-server-ranges", NULL);
+ if (tmp == NULL) {
+ of_node_put(iic);
+ return NO_IRQ;
+ }
+ /* ugly as hell but works for now */
+ pic->node_id = (*tmp) >> 1;
+ of_node_put(iic);
+
+ /* Ok, now let's get cracking. You may ask me why I just didn't match
+ * the iic host from the iic OF node, but that way I'm still compatible
+ * with really really old old firmwares for which we don't have a node
+ */
+ /* Manufacture an IIC interrupt number of class 2 */
+ virq = irq_create_mapping(NULL,
+ (pic->node_id << IIC_IRQ_NODE_SHIFT) |
+ (2 << IIC_IRQ_CLASS_SHIFT) |
+ unit);
+ if (virq == NO_IRQ)
+ printk(KERN_ERR "spider_pic: failed to map cascade !");
+ return virq;
+}
+
+
+static void __init spider_init_one(struct device_node *of_node, int chip,
+ unsigned long addr)
+{
+ struct spider_pic *pic = &spider_pics[chip];
+ int i, virq;
+
+ /* Map registers */
+ pic->regs = ioremap(addr, 0x1000);
+ if (pic->regs == NULL)
+ panic("spider_pic: can't map registers !");
+
+ /* Allocate a host */
+ pic->host = irq_domain_add_linear(of_node, SPIDER_SRC_COUNT,
+ &spider_host_ops, pic);
+ if (pic->host == NULL)
+ panic("spider_pic: can't allocate irq host !");
+
+ /* Go through all sources and disable them */
+ for (i = 0; i < SPIDER_SRC_COUNT; i++) {
+ void __iomem *cfg = pic->regs + TIR_CFGA + 8 * i;
+ out_be32(cfg, in_be32(cfg) & ~0x30000000u);
+ }
+
+ /* do not mask any interrupts because of level */
+ out_be32(pic->regs + TIR_MSK, 0x0);
+
+ /* enable interrupt packets to be output */
+ out_be32(pic->regs + TIR_PIEN, in_be32(pic->regs + TIR_PIEN) | 0x1);
+
+ /* Hook up the cascade interrupt to the iic and nodeid */
+ virq = spider_find_cascade_and_node(pic);
+ if (virq == NO_IRQ)
+ return;
+ irq_set_handler_data(virq, pic);
+ irq_set_chained_handler(virq, spider_irq_cascade);
+
+ printk(KERN_INFO "spider_pic: node %d, addr: 0x%lx %s\n",
+ pic->node_id, addr, of_node->full_name);
+
+ /* Enable the interrupt detection enable bit. Do this last! */
+ out_be32(pic->regs + TIR_DEN, in_be32(pic->regs + TIR_DEN) | 0x1);
+}
+
+void __init spider_init_IRQ(void)
+{
+ struct resource r;
+ struct device_node *dn;
+ int chip = 0;
+
+ /* XXX node numbers are totally bogus. We _hope_ we get the device
+ * nodes in the right order here but that's definitely not guaranteed,
+ * we need to get the node from the device tree instead.
+ * There is currently no proper property for it (but our whole
+ * device-tree is bogus anyway) so all we can do is pray or maybe test
+ * the address and deduce the node-id
+ */
+ for (dn = NULL;
+ (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+ if (of_device_is_compatible(dn, "CBEA,platform-spider-pic")) {
+ if (of_address_to_resource(dn, 0, &r)) {
+ printk(KERN_WARNING "spider-pic: Failed\n");
+ continue;
+ }
+ } else if (of_device_is_compatible(dn, "sti,platform-spider-pic")
+ && (chip < 2)) {
+ static long hard_coded_pics[] =
+ { 0x24000008000ul, 0x34000008000ul};
+ r.start = hard_coded_pics[chip];
+ } else
+ continue;
+ spider_init_one(dn, chip++, r.start);
+ }
+}
diff --git a/kernel/arch/powerpc/platforms/cell/spu_base.c b/kernel/arch/powerpc/platforms/cell/spu_base.c
new file mode 100644
index 000000000..f7af74f83
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spu_base.c
@@ -0,0 +1,811 @@
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/linux_logo.h>
+#include <linux/syscore_ops.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/xmon.h>
+#include <asm/prom.h>
+#include <asm/kexec.h>
+
+const struct spu_management_ops *spu_management_ops;
+EXPORT_SYMBOL_GPL(spu_management_ops);
+
+const struct spu_priv1_ops *spu_priv1_ops;
+EXPORT_SYMBOL_GPL(spu_priv1_ops);
+
+struct cbe_spu_info cbe_spu_info[MAX_NUMNODES];
+EXPORT_SYMBOL_GPL(cbe_spu_info);
+
+/*
+ * The spufs fault-handling code needs to call force_sig_info to raise signals
+ * on DMA errors. Export it here to avoid general kernel-wide access to this
+ * function
+ */
+EXPORT_SYMBOL_GPL(force_sig_info);
+
+/*
+ * Protects cbe_spu_info and spu->number.
+ */
+static DEFINE_SPINLOCK(spu_lock);
+
+/*
+ * List of all spus in the system.
+ *
+ * This list is iterated by callers from irq context and callers that
+ * want to sleep. Thus modifications need to be done with both
+ * spu_full_list_lock and spu_full_list_mutex held, while iterating
+ * through it requires either of these locks.
+ *
+ * In addition spu_full_list_lock protects all assignmens to
+ * spu->mm.
+ */
+static LIST_HEAD(spu_full_list);
+static DEFINE_SPINLOCK(spu_full_list_lock);
+static DEFINE_MUTEX(spu_full_list_mutex);
+
+void spu_invalidate_slbs(struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ unsigned long flags;
+
+ spin_lock_irqsave(&spu->register_lock, flags);
+ if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK)
+ out_be64(&priv2->slb_invalidate_all_W, 0UL);
+ spin_unlock_irqrestore(&spu->register_lock, flags);
+}
+EXPORT_SYMBOL_GPL(spu_invalidate_slbs);
+
+/* This is called by the MM core when a segment size is changed, to
+ * request a flush of all the SPEs using a given mm
+ */
+void spu_flush_all_slbs(struct mm_struct *mm)
+{
+ struct spu *spu;
+ unsigned long flags;
+
+ spin_lock_irqsave(&spu_full_list_lock, flags);
+ list_for_each_entry(spu, &spu_full_list, full_list) {
+ if (spu->mm == mm)
+ spu_invalidate_slbs(spu);
+ }
+ spin_unlock_irqrestore(&spu_full_list_lock, flags);
+}
+
+/* The hack below stinks... try to do something better one of
+ * these days... Does it even work properly with NR_CPUS == 1 ?
+ */
+static inline void mm_needs_global_tlbie(struct mm_struct *mm)
+{
+ int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
+
+ /* Global TLBIE broadcast required with SPEs. */
+ bitmap_fill(cpumask_bits(mm_cpumask(mm)), nr);
+}
+
+void spu_associate_mm(struct spu *spu, struct mm_struct *mm)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&spu_full_list_lock, flags);
+ spu->mm = mm;
+ spin_unlock_irqrestore(&spu_full_list_lock, flags);
+ if (mm)
+ mm_needs_global_tlbie(mm);
+}
+EXPORT_SYMBOL_GPL(spu_associate_mm);
+
+int spu_64k_pages_available(void)
+{
+ return mmu_psize_defs[MMU_PAGE_64K].shift != 0;
+}
+EXPORT_SYMBOL_GPL(spu_64k_pages_available);
+
+static void spu_restart_dma(struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags))
+ out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+ else {
+ set_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags);
+ mb();
+ }
+}
+
+static inline void spu_load_slb(struct spu *spu, int slbe, struct copro_slb *slb)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ pr_debug("%s: adding SLB[%d] 0x%016llx 0x%016llx\n",
+ __func__, slbe, slb->vsid, slb->esid);
+
+ out_be64(&priv2->slb_index_W, slbe);
+ /* set invalid before writing vsid */
+ out_be64(&priv2->slb_esid_RW, 0);
+ /* now it's safe to write the vsid */
+ out_be64(&priv2->slb_vsid_RW, slb->vsid);
+ /* setting the new esid makes the entry valid again */
+ out_be64(&priv2->slb_esid_RW, slb->esid);
+}
+
+static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
+{
+ struct copro_slb slb;
+ int ret;
+
+ ret = copro_calculate_slb(spu->mm, ea, &slb);
+ if (ret)
+ return ret;
+
+ spu_load_slb(spu, spu->slb_replace, &slb);
+
+ spu->slb_replace++;
+ if (spu->slb_replace >= 8)
+ spu->slb_replace = 0;
+
+ spu_restart_dma(spu);
+ spu->stats.slb_flt++;
+ return 0;
+}
+
+extern int hash_page(unsigned long ea, unsigned long access,
+ unsigned long trap, unsigned long dsisr); //XXX
+static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
+{
+ int ret;
+
+ pr_debug("%s, %llx, %lx\n", __func__, dsisr, ea);
+
+ /*
+ * Handle kernel space hash faults immediately. User hash
+ * faults need to be deferred to process context.
+ */
+ if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) &&
+ (REGION_ID(ea) != USER_REGION_ID)) {
+
+ spin_unlock(&spu->register_lock);
+ ret = hash_page(ea, _PAGE_PRESENT, 0x300, dsisr);
+ spin_lock(&spu->register_lock);
+
+ if (!ret) {
+ spu_restart_dma(spu);
+ return 0;
+ }
+ }
+
+ spu->class_1_dar = ea;
+ spu->class_1_dsisr = dsisr;
+
+ spu->stop_callback(spu, 1);
+
+ spu->class_1_dar = 0;
+ spu->class_1_dsisr = 0;
+
+ return 0;
+}
+
+static void __spu_kernel_slb(void *addr, struct copro_slb *slb)
+{
+ unsigned long ea = (unsigned long)addr;
+ u64 llp;
+
+ if (REGION_ID(ea) == KERNEL_REGION_ID)
+ llp = mmu_psize_defs[mmu_linear_psize].sllp;
+ else
+ llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+
+ slb->vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) |
+ SLB_VSID_KERNEL | llp;
+ slb->esid = (ea & ESID_MASK) | SLB_ESID_V;
+}
+
+/**
+ * Given an array of @nr_slbs SLB entries, @slbs, return non-zero if the
+ * address @new_addr is present.
+ */
+static inline int __slb_present(struct copro_slb *slbs, int nr_slbs,
+ void *new_addr)
+{
+ unsigned long ea = (unsigned long)new_addr;
+ int i;
+
+ for (i = 0; i < nr_slbs; i++)
+ if (!((slbs[i].esid ^ ea) & ESID_MASK))
+ return 1;
+
+ return 0;
+}
+
+/**
+ * Setup the SPU kernel SLBs, in preparation for a context save/restore. We
+ * need to map both the context save area, and the save/restore code.
+ *
+ * Because the lscsa and code may cross segment boundaires, we check to see
+ * if mappings are required for the start and end of each range. We currently
+ * assume that the mappings are smaller that one segment - if not, something
+ * is seriously wrong.
+ */
+void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa,
+ void *code, int code_size)
+{
+ struct copro_slb slbs[4];
+ int i, nr_slbs = 0;
+ /* start and end addresses of both mappings */
+ void *addrs[] = {
+ lscsa, (void *)lscsa + sizeof(*lscsa) - 1,
+ code, code + code_size - 1
+ };
+
+ /* check the set of addresses, and create a new entry in the slbs array
+ * if there isn't already a SLB for that address */
+ for (i = 0; i < ARRAY_SIZE(addrs); i++) {
+ if (__slb_present(slbs, nr_slbs, addrs[i]))
+ continue;
+
+ __spu_kernel_slb(addrs[i], &slbs[nr_slbs]);
+ nr_slbs++;
+ }
+
+ spin_lock_irq(&spu->register_lock);
+ /* Add the set of SLBs */
+ for (i = 0; i < nr_slbs; i++)
+ spu_load_slb(spu, i, &slbs[i]);
+ spin_unlock_irq(&spu->register_lock);
+}
+EXPORT_SYMBOL_GPL(spu_setup_kernel_slbs);
+
+static irqreturn_t
+spu_irq_class_0(int irq, void *data)
+{
+ struct spu *spu;
+ unsigned long stat, mask;
+
+ spu = data;
+
+ spin_lock(&spu->register_lock);
+ mask = spu_int_mask_get(spu, 0);
+ stat = spu_int_stat_get(spu, 0) & mask;
+
+ spu->class_0_pending |= stat;
+ spu->class_0_dar = spu_mfc_dar_get(spu);
+ spu->stop_callback(spu, 0);
+ spu->class_0_pending = 0;
+ spu->class_0_dar = 0;
+
+ spu_int_stat_clear(spu, 0, stat);
+ spin_unlock(&spu->register_lock);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t
+spu_irq_class_1(int irq, void *data)
+{
+ struct spu *spu;
+ unsigned long stat, mask, dar, dsisr;
+
+ spu = data;
+
+ /* atomically read & clear class1 status. */
+ spin_lock(&spu->register_lock);
+ mask = spu_int_mask_get(spu, 1);
+ stat = spu_int_stat_get(spu, 1) & mask;
+ dar = spu_mfc_dar_get(spu);
+ dsisr = spu_mfc_dsisr_get(spu);
+ if (stat & CLASS1_STORAGE_FAULT_INTR)
+ spu_mfc_dsisr_set(spu, 0ul);
+ spu_int_stat_clear(spu, 1, stat);
+
+ pr_debug("%s: %lx %lx %lx %lx\n", __func__, mask, stat,
+ dar, dsisr);
+
+ if (stat & CLASS1_SEGMENT_FAULT_INTR)
+ __spu_trap_data_seg(spu, dar);
+
+ if (stat & CLASS1_STORAGE_FAULT_INTR)
+ __spu_trap_data_map(spu, dar, dsisr);
+
+ if (stat & CLASS1_LS_COMPARE_SUSPEND_ON_GET_INTR)
+ ;
+
+ if (stat & CLASS1_LS_COMPARE_SUSPEND_ON_PUT_INTR)
+ ;
+
+ spu->class_1_dsisr = 0;
+ spu->class_1_dar = 0;
+
+ spin_unlock(&spu->register_lock);
+
+ return stat ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static irqreturn_t
+spu_irq_class_2(int irq, void *data)
+{
+ struct spu *spu;
+ unsigned long stat;
+ unsigned long mask;
+ const int mailbox_intrs =
+ CLASS2_MAILBOX_THRESHOLD_INTR | CLASS2_MAILBOX_INTR;
+
+ spu = data;
+ spin_lock(&spu->register_lock);
+ stat = spu_int_stat_get(spu, 2);
+ mask = spu_int_mask_get(spu, 2);
+ /* ignore interrupts we're not waiting for */
+ stat &= mask;
+ /* mailbox interrupts are level triggered. mask them now before
+ * acknowledging */
+ if (stat & mailbox_intrs)
+ spu_int_mask_and(spu, 2, ~(stat & mailbox_intrs));
+ /* acknowledge all interrupts before the callbacks */
+ spu_int_stat_clear(spu, 2, stat);
+
+ pr_debug("class 2 interrupt %d, %lx, %lx\n", irq, stat, mask);
+
+ if (stat & CLASS2_MAILBOX_INTR)
+ spu->ibox_callback(spu);
+
+ if (stat & CLASS2_SPU_STOP_INTR)
+ spu->stop_callback(spu, 2);
+
+ if (stat & CLASS2_SPU_HALT_INTR)
+ spu->stop_callback(spu, 2);
+
+ if (stat & CLASS2_SPU_DMA_TAG_GROUP_COMPLETE_INTR)
+ spu->mfc_callback(spu);
+
+ if (stat & CLASS2_MAILBOX_THRESHOLD_INTR)
+ spu->wbox_callback(spu);
+
+ spu->stats.class2_intr++;
+
+ spin_unlock(&spu->register_lock);
+
+ return stat ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int spu_request_irqs(struct spu *spu)
+{
+ int ret = 0;
+
+ if (spu->irqs[0] != NO_IRQ) {
+ snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0",
+ spu->number);
+ ret = request_irq(spu->irqs[0], spu_irq_class_0,
+ 0, spu->irq_c0, spu);
+ if (ret)
+ goto bail0;
+ }
+ if (spu->irqs[1] != NO_IRQ) {
+ snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1",
+ spu->number);
+ ret = request_irq(spu->irqs[1], spu_irq_class_1,
+ 0, spu->irq_c1, spu);
+ if (ret)
+ goto bail1;
+ }
+ if (spu->irqs[2] != NO_IRQ) {
+ snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2",
+ spu->number);
+ ret = request_irq(spu->irqs[2], spu_irq_class_2,
+ 0, spu->irq_c2, spu);
+ if (ret)
+ goto bail2;
+ }
+ return 0;
+
+bail2:
+ if (spu->irqs[1] != NO_IRQ)
+ free_irq(spu->irqs[1], spu);
+bail1:
+ if (spu->irqs[0] != NO_IRQ)
+ free_irq(spu->irqs[0], spu);
+bail0:
+ return ret;
+}
+
+static void spu_free_irqs(struct spu *spu)
+{
+ if (spu->irqs[0] != NO_IRQ)
+ free_irq(spu->irqs[0], spu);
+ if (spu->irqs[1] != NO_IRQ)
+ free_irq(spu->irqs[1], spu);
+ if (spu->irqs[2] != NO_IRQ)
+ free_irq(spu->irqs[2], spu);
+}
+
+void spu_init_channels(struct spu *spu)
+{
+ static const struct {
+ unsigned channel;
+ unsigned count;
+ } zero_list[] = {
+ { 0x00, 1, }, { 0x01, 1, }, { 0x03, 1, }, { 0x04, 1, },
+ { 0x18, 1, }, { 0x19, 1, }, { 0x1b, 1, }, { 0x1d, 1, },
+ }, count_list[] = {
+ { 0x00, 0, }, { 0x03, 0, }, { 0x04, 0, }, { 0x15, 16, },
+ { 0x17, 1, }, { 0x18, 0, }, { 0x19, 0, }, { 0x1b, 0, },
+ { 0x1c, 1, }, { 0x1d, 0, }, { 0x1e, 1, },
+ };
+ struct spu_priv2 __iomem *priv2;
+ int i;
+
+ priv2 = spu->priv2;
+
+ /* initialize all channel data to zero */
+ for (i = 0; i < ARRAY_SIZE(zero_list); i++) {
+ int count;
+
+ out_be64(&priv2->spu_chnlcntptr_RW, zero_list[i].channel);
+ for (count = 0; count < zero_list[i].count; count++)
+ out_be64(&priv2->spu_chnldata_RW, 0);
+ }
+
+ /* initialize channel counts to meaningful values */
+ for (i = 0; i < ARRAY_SIZE(count_list); i++) {
+ out_be64(&priv2->spu_chnlcntptr_RW, count_list[i].channel);
+ out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);
+ }
+}
+EXPORT_SYMBOL_GPL(spu_init_channels);
+
+static struct bus_type spu_subsys = {
+ .name = "spu",
+ .dev_name = "spu",
+};
+
+int spu_add_dev_attr(struct device_attribute *attr)
+{
+ struct spu *spu;
+
+ mutex_lock(&spu_full_list_mutex);
+ list_for_each_entry(spu, &spu_full_list, full_list)
+ device_create_file(&spu->dev, attr);
+ mutex_unlock(&spu_full_list_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(spu_add_dev_attr);
+
+int spu_add_dev_attr_group(struct attribute_group *attrs)
+{
+ struct spu *spu;
+ int rc = 0;
+
+ mutex_lock(&spu_full_list_mutex);
+ list_for_each_entry(spu, &spu_full_list, full_list) {
+ rc = sysfs_create_group(&spu->dev.kobj, attrs);
+
+ /* we're in trouble here, but try unwinding anyway */
+ if (rc) {
+ printk(KERN_ERR "%s: can't create sysfs group '%s'\n",
+ __func__, attrs->name);
+
+ list_for_each_entry_continue_reverse(spu,
+ &spu_full_list, full_list)
+ sysfs_remove_group(&spu->dev.kobj, attrs);
+ break;
+ }
+ }
+
+ mutex_unlock(&spu_full_list_mutex);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(spu_add_dev_attr_group);
+
+
+void spu_remove_dev_attr(struct device_attribute *attr)
+{
+ struct spu *spu;
+
+ mutex_lock(&spu_full_list_mutex);
+ list_for_each_entry(spu, &spu_full_list, full_list)
+ device_remove_file(&spu->dev, attr);
+ mutex_unlock(&spu_full_list_mutex);
+}
+EXPORT_SYMBOL_GPL(spu_remove_dev_attr);
+
+void spu_remove_dev_attr_group(struct attribute_group *attrs)
+{
+ struct spu *spu;
+
+ mutex_lock(&spu_full_list_mutex);
+ list_for_each_entry(spu, &spu_full_list, full_list)
+ sysfs_remove_group(&spu->dev.kobj, attrs);
+ mutex_unlock(&spu_full_list_mutex);
+}
+EXPORT_SYMBOL_GPL(spu_remove_dev_attr_group);
+
+static int spu_create_dev(struct spu *spu)
+{
+ int ret;
+
+ spu->dev.id = spu->number;
+ spu->dev.bus = &spu_subsys;
+ ret = device_register(&spu->dev);
+ if (ret) {
+ printk(KERN_ERR "Can't register SPU %d with sysfs\n",
+ spu->number);
+ return ret;
+ }
+
+ sysfs_add_device_to_node(&spu->dev, spu->node);
+
+ return 0;
+}
+
+static int __init create_spu(void *data)
+{
+ struct spu *spu;
+ int ret;
+ static int number;
+ unsigned long flags;
+
+ ret = -ENOMEM;
+ spu = kzalloc(sizeof (*spu), GFP_KERNEL);
+ if (!spu)
+ goto out;
+
+ spu->alloc_state = SPU_FREE;
+
+ spin_lock_init(&spu->register_lock);
+ spin_lock(&spu_lock);
+ spu->number = number++;
+ spin_unlock(&spu_lock);
+
+ ret = spu_create_spu(spu, data);
+
+ if (ret)
+ goto out_free;
+
+ spu_mfc_sdr_setup(spu);
+ spu_mfc_sr1_set(spu, 0x33);
+ ret = spu_request_irqs(spu);
+ if (ret)
+ goto out_destroy;
+
+ ret = spu_create_dev(spu);
+ if (ret)
+ goto out_free_irqs;
+
+ mutex_lock(&cbe_spu_info[spu->node].list_mutex);
+ list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);
+ cbe_spu_info[spu->node].n_spus++;
+ mutex_unlock(&cbe_spu_info[spu->node].list_mutex);
+
+ mutex_lock(&spu_full_list_mutex);
+ spin_lock_irqsave(&spu_full_list_lock, flags);
+ list_add(&spu->full_list, &spu_full_list);
+ spin_unlock_irqrestore(&spu_full_list_lock, flags);
+ mutex_unlock(&spu_full_list_mutex);
+
+ spu->stats.util_state = SPU_UTIL_IDLE_LOADED;
+ spu->stats.tstamp = ktime_get_ns();
+
+ INIT_LIST_HEAD(&spu->aff_list);
+
+ goto out;
+
+out_free_irqs:
+ spu_free_irqs(spu);
+out_destroy:
+ spu_destroy_spu(spu);
+out_free:
+ kfree(spu);
+out:
+ return ret;
+}
+
+static const char *spu_state_names[] = {
+ "user", "system", "iowait", "idle"
+};
+
+static unsigned long long spu_acct_time(struct spu *spu,
+ enum spu_utilization_state state)
+{
+ unsigned long long time = spu->stats.times[state];
+
+ /*
+ * If the spu is idle or the context is stopped, utilization
+ * statistics are not updated. Apply the time delta from the
+ * last recorded state of the spu.
+ */
+ if (spu->stats.util_state == state)
+ time += ktime_get_ns() - spu->stats.tstamp;
+
+ return time / NSEC_PER_MSEC;
+}
+
+
+static ssize_t spu_stat_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct spu *spu = container_of(dev, struct spu, dev);
+
+ return sprintf(buf, "%s %llu %llu %llu %llu "
+ "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+ spu_state_names[spu->stats.util_state],
+ spu_acct_time(spu, SPU_UTIL_USER),
+ spu_acct_time(spu, SPU_UTIL_SYSTEM),
+ spu_acct_time(spu, SPU_UTIL_IOWAIT),
+ spu_acct_time(spu, SPU_UTIL_IDLE_LOADED),
+ spu->stats.vol_ctx_switch,
+ spu->stats.invol_ctx_switch,
+ spu->stats.slb_flt,
+ spu->stats.hash_flt,
+ spu->stats.min_flt,
+ spu->stats.maj_flt,
+ spu->stats.class2_intr,
+ spu->stats.libassist);
+}
+
+static DEVICE_ATTR(stat, 0444, spu_stat_show, NULL);
+
+#ifdef CONFIG_KEXEC
+
+struct crash_spu_info {
+ struct spu *spu;
+ u32 saved_spu_runcntl_RW;
+ u32 saved_spu_status_R;
+ u32 saved_spu_npc_RW;
+ u64 saved_mfc_sr1_RW;
+ u64 saved_mfc_dar;
+ u64 saved_mfc_dsisr;
+};
+
+#define CRASH_NUM_SPUS 16 /* Enough for current hardware */
+static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS];
+
+static void crash_kexec_stop_spus(void)
+{
+ struct spu *spu;
+ int i;
+ u64 tmp;
+
+ for (i = 0; i < CRASH_NUM_SPUS; i++) {
+ if (!crash_spu_info[i].spu)
+ continue;
+
+ spu = crash_spu_info[i].spu;
+
+ crash_spu_info[i].saved_spu_runcntl_RW =
+ in_be32(&spu->problem->spu_runcntl_RW);
+ crash_spu_info[i].saved_spu_status_R =
+ in_be32(&spu->problem->spu_status_R);
+ crash_spu_info[i].saved_spu_npc_RW =
+ in_be32(&spu->problem->spu_npc_RW);
+
+ crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu);
+ crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu);
+ tmp = spu_mfc_sr1_get(spu);
+ crash_spu_info[i].saved_mfc_sr1_RW = tmp;
+
+ tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+ spu_mfc_sr1_set(spu, tmp);
+
+ __delay(200);
+ }
+}
+
+static void crash_register_spus(struct list_head *list)
+{
+ struct spu *spu;
+ int ret;
+
+ list_for_each_entry(spu, list, full_list) {
+ if (WARN_ON(spu->number >= CRASH_NUM_SPUS))
+ continue;
+
+ crash_spu_info[spu->number].spu = spu;
+ }
+
+ ret = crash_shutdown_register(&crash_kexec_stop_spus);
+ if (ret)
+ printk(KERN_ERR "Could not register SPU crash handler");
+}
+
+#else
+static inline void crash_register_spus(struct list_head *list)
+{
+}
+#endif
+
+static void spu_shutdown(void)
+{
+ struct spu *spu;
+
+ mutex_lock(&spu_full_list_mutex);
+ list_for_each_entry(spu, &spu_full_list, full_list) {
+ spu_free_irqs(spu);
+ spu_destroy_spu(spu);
+ }
+ mutex_unlock(&spu_full_list_mutex);
+}
+
+static struct syscore_ops spu_syscore_ops = {
+ .shutdown = spu_shutdown,
+};
+
+static int __init init_spu_base(void)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ mutex_init(&cbe_spu_info[i].list_mutex);
+ INIT_LIST_HEAD(&cbe_spu_info[i].spus);
+ }
+
+ if (!spu_management_ops)
+ goto out;
+
+ /* create system subsystem for spus */
+ ret = subsys_system_register(&spu_subsys, NULL);
+ if (ret)
+ goto out;
+
+ ret = spu_enumerate_spus(create_spu);
+
+ if (ret < 0) {
+ printk(KERN_WARNING "%s: Error initializing spus\n",
+ __func__);
+ goto out_unregister_subsys;
+ }
+
+ if (ret > 0)
+ fb_append_extra_logo(&logo_spe_clut224, ret);
+
+ mutex_lock(&spu_full_list_mutex);
+ xmon_register_spus(&spu_full_list);
+ crash_register_spus(&spu_full_list);
+ mutex_unlock(&spu_full_list_mutex);
+ spu_add_dev_attr(&dev_attr_stat);
+ register_syscore_ops(&spu_syscore_ops);
+
+ spu_init_affinity();
+
+ return 0;
+
+ out_unregister_subsys:
+ bus_unregister(&spu_subsys);
+ out:
+ return ret;
+}
+module_init(init_spu_base);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");
diff --git a/kernel/arch/powerpc/platforms/cell/spu_callbacks.c b/kernel/arch/powerpc/platforms/cell/spu_callbacks.c
new file mode 100644
index 000000000..a494028b2
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -0,0 +1,74 @@
+/*
+ * System call callback functions for SPUs
+ */
+
+#undef DEBUG
+
+#include <linux/kallsyms.h>
+#include <linux/export.h>
+#include <linux/syscalls.h>
+
+#include <asm/spu.h>
+#include <asm/syscalls.h>
+#include <asm/unistd.h>
+
+/*
+ * This table defines the system calls that an SPU can call.
+ * It is currently a subset of the 64 bit powerpc system calls,
+ * with the exact semantics.
+ *
+ * The reasons for disabling some of the system calls are:
+ * 1. They interact with the way SPU syscalls are handled
+ * and we can't let them execute ever:
+ * restart_syscall, exit, for, execve, ptrace, ...
+ * 2. They are deprecated and replaced by other means:
+ * uselib, pciconfig_*, sysfs, ...
+ * 3. They are somewhat interacting with the system in a way
+ * we don't want an SPU to:
+ * reboot, init_module, mount, kexec_load
+ * 4. They are optional and we can't rely on them being
+ * linked into the kernel. Unfortunately, the cond_syscall
+ * helper does not work here as it does not add the necessary
+ * opd symbols:
+ * mbind, mq_open, ipc, ...
+ */
+
+static void *spu_syscall_table[] = {
+#define SYSCALL(func) sys_ni_syscall,
+#define COMPAT_SYS(func) sys_ni_syscall,
+#define PPC_SYS(func) sys_ni_syscall,
+#define OLDSYS(func) sys_ni_syscall,
+#define SYS32ONLY(func) sys_ni_syscall,
+#define PPC64ONLY(func) sys_ni_syscall,
+#define SYSX(f, f3264, f32) sys_ni_syscall,
+
+#define SYSCALL_SPU(func) sys_##func,
+#define COMPAT_SYS_SPU(func) sys_##func,
+#define PPC_SYS_SPU(func) ppc_##func,
+#define SYSX_SPU(f, f3264, f32) f,
+
+#include <asm/systbl.h>
+};
+
+long spu_sys_callback(struct spu_syscall_block *s)
+{
+ long (*syscall)(u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6);
+
+ if (s->nr_ret >= ARRAY_SIZE(spu_syscall_table)) {
+ pr_debug("%s: invalid syscall #%lld", __func__, s->nr_ret);
+ return -ENOSYS;
+ }
+
+ syscall = spu_syscall_table[s->nr_ret];
+
+ pr_debug("SPU-syscall "
+ "%pSR:syscall%lld(%llx, %llx, %llx, %llx, %llx, %llx)\n",
+ syscall,
+ s->nr_ret,
+ s->parm[0], s->parm[1], s->parm[2],
+ s->parm[3], s->parm[4], s->parm[5]);
+
+ return syscall(s->parm[0], s->parm[1], s->parm[2],
+ s->parm[3], s->parm[4], s->parm[5]);
+}
+EXPORT_SYMBOL_GPL(spu_sys_callback);
diff --git a/kernel/arch/powerpc/platforms/cell/spu_manage.c b/kernel/arch/powerpc/platforms/cell/spu_manage.c
new file mode 100644
index 000000000..c3327f3d8
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spu_manage.c
@@ -0,0 +1,555 @@
+/*
+ * spu management operations for of based platforms
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * Copyright 2006 Sony Corp.
+ * (C) Copyright 2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/export.h>
+#include <linux/ptrace.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+
+#include "spufs/spufs.h"
+#include "interrupt.h"
+
+struct device_node *spu_devnode(struct spu *spu)
+{
+ return spu->devnode;
+}
+
+EXPORT_SYMBOL_GPL(spu_devnode);
+
+static u64 __init find_spu_unit_number(struct device_node *spe)
+{
+ const unsigned int *prop;
+ int proplen;
+
+ /* new device trees should provide the physical-id attribute */
+ prop = of_get_property(spe, "physical-id", &proplen);
+ if (proplen == 4)
+ return (u64)*prop;
+
+ /* celleb device tree provides the unit-id */
+ prop = of_get_property(spe, "unit-id", &proplen);
+ if (proplen == 4)
+ return (u64)*prop;
+
+ /* legacy device trees provide the id in the reg attribute */
+ prop = of_get_property(spe, "reg", &proplen);
+ if (proplen == 4)
+ return (u64)*prop;
+
+ return 0;
+}
+
+static void spu_unmap(struct spu *spu)
+{
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ iounmap(spu->priv1);
+ iounmap(spu->priv2);
+ iounmap(spu->problem);
+ iounmap((__force u8 __iomem *)spu->local_store);
+}
+
+static int __init spu_map_interrupts_old(struct spu *spu,
+ struct device_node *np)
+{
+ unsigned int isrc;
+ const u32 *tmp;
+ int nid;
+
+ /* Get the interrupt source unit from the device-tree */
+ tmp = of_get_property(np, "isrc", NULL);
+ if (!tmp)
+ return -ENODEV;
+ isrc = tmp[0];
+
+ tmp = of_get_property(np->parent->parent, "node-id", NULL);
+ if (!tmp) {
+ printk(KERN_WARNING "%s: can't find node-id\n", __func__);
+ nid = spu->node;
+ } else
+ nid = tmp[0];
+
+ /* Add the node number */
+ isrc |= nid << IIC_IRQ_NODE_SHIFT;
+
+ /* Now map interrupts of all 3 classes */
+ spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc);
+ spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc);
+ spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc);
+
+ /* Right now, we only fail if class 2 failed */
+ return spu->irqs[2] == NO_IRQ ? -EINVAL : 0;
+}
+
+static void __iomem * __init spu_map_prop_old(struct spu *spu,
+ struct device_node *n,
+ const char *name)
+{
+ const struct address_prop {
+ unsigned long address;
+ unsigned int len;
+ } __attribute__((packed)) *prop;
+ int proplen;
+
+ prop = of_get_property(n, name, &proplen);
+ if (prop == NULL || proplen != sizeof (struct address_prop))
+ return NULL;
+
+ return ioremap(prop->address, prop->len);
+}
+
+static int __init spu_map_device_old(struct spu *spu)
+{
+ struct device_node *node = spu->devnode;
+ const char *prop;
+ int ret;
+
+ ret = -ENODEV;
+ spu->name = of_get_property(node, "name", NULL);
+ if (!spu->name)
+ goto out;
+
+ prop = of_get_property(node, "local-store", NULL);
+ if (!prop)
+ goto out;
+ spu->local_store_phys = *(unsigned long *)prop;
+
+ /* we use local store as ram, not io memory */
+ spu->local_store = (void __force *)
+ spu_map_prop_old(spu, node, "local-store");
+ if (!spu->local_store)
+ goto out;
+
+ prop = of_get_property(node, "problem", NULL);
+ if (!prop)
+ goto out_unmap;
+ spu->problem_phys = *(unsigned long *)prop;
+
+ spu->problem = spu_map_prop_old(spu, node, "problem");
+ if (!spu->problem)
+ goto out_unmap;
+
+ spu->priv2 = spu_map_prop_old(spu, node, "priv2");
+ if (!spu->priv2)
+ goto out_unmap;
+
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ spu->priv1 = spu_map_prop_old(spu, node, "priv1");
+ if (!spu->priv1)
+ goto out_unmap;
+ }
+
+ ret = 0;
+ goto out;
+
+out_unmap:
+ spu_unmap(spu);
+out:
+ return ret;
+}
+
+static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
+{
+ struct of_phandle_args oirq;
+ int ret;
+ int i;
+
+ for (i=0; i < 3; i++) {
+ ret = of_irq_parse_one(np, i, &oirq);
+ if (ret) {
+ pr_debug("spu_new: failed to get irq %d\n", i);
+ goto err;
+ }
+ ret = -EINVAL;
+ pr_debug(" irq %d no 0x%x on %s\n", i, oirq.args[0],
+ oirq.np->full_name);
+ spu->irqs[i] = irq_create_of_mapping(&oirq);
+ if (spu->irqs[i] == NO_IRQ) {
+ pr_debug("spu_new: failed to map it !\n");
+ goto err;
+ }
+ }
+ return 0;
+
+err:
+ pr_debug("failed to map irq %x for spu %s\n", *oirq.args,
+ spu->name);
+ for (; i >= 0; i--) {
+ if (spu->irqs[i] != NO_IRQ)
+ irq_dispose_mapping(spu->irqs[i]);
+ }
+ return ret;
+}
+
+static int spu_map_resource(struct spu *spu, int nr,
+ void __iomem** virt, unsigned long *phys)
+{
+ struct device_node *np = spu->devnode;
+ struct resource resource = { };
+ unsigned long len;
+ int ret;
+
+ ret = of_address_to_resource(np, nr, &resource);
+ if (ret)
+ return ret;
+ if (phys)
+ *phys = resource.start;
+ len = resource_size(&resource);
+ *virt = ioremap(resource.start, len);
+ if (!*virt)
+ return -EINVAL;
+ return 0;
+}
+
+static int __init spu_map_device(struct spu *spu)
+{
+ struct device_node *np = spu->devnode;
+ int ret = -ENODEV;
+
+ spu->name = of_get_property(np, "name", NULL);
+ if (!spu->name)
+ goto out;
+
+ ret = spu_map_resource(spu, 0, (void __iomem**)&spu->local_store,
+ &spu->local_store_phys);
+ if (ret) {
+ pr_debug("spu_new: failed to map %s resource 0\n",
+ np->full_name);
+ goto out;
+ }
+ ret = spu_map_resource(spu, 1, (void __iomem**)&spu->problem,
+ &spu->problem_phys);
+ if (ret) {
+ pr_debug("spu_new: failed to map %s resource 1\n",
+ np->full_name);
+ goto out_unmap;
+ }
+ ret = spu_map_resource(spu, 2, (void __iomem**)&spu->priv2, NULL);
+ if (ret) {
+ pr_debug("spu_new: failed to map %s resource 2\n",
+ np->full_name);
+ goto out_unmap;
+ }
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ ret = spu_map_resource(spu, 3,
+ (void __iomem**)&spu->priv1, NULL);
+ if (ret) {
+ pr_debug("spu_new: failed to map %s resource 3\n",
+ np->full_name);
+ goto out_unmap;
+ }
+ pr_debug("spu_new: %s maps:\n", np->full_name);
+ pr_debug(" local store : 0x%016lx -> 0x%p\n",
+ spu->local_store_phys, spu->local_store);
+ pr_debug(" problem state : 0x%016lx -> 0x%p\n",
+ spu->problem_phys, spu->problem);
+ pr_debug(" priv2 : 0x%p\n", spu->priv2);
+ pr_debug(" priv1 : 0x%p\n", spu->priv1);
+
+ return 0;
+
+out_unmap:
+ spu_unmap(spu);
+out:
+ pr_debug("failed to map spe %s: %d\n", spu->name, ret);
+ return ret;
+}
+
+static int __init of_enumerate_spus(int (*fn)(void *data))
+{
+ int ret;
+ struct device_node *node;
+ unsigned int n = 0;
+
+ ret = -ENODEV;
+ for (node = of_find_node_by_type(NULL, "spe");
+ node; node = of_find_node_by_type(node, "spe")) {
+ ret = fn(node);
+ if (ret) {
+ printk(KERN_WARNING "%s: Error initializing %s\n",
+ __func__, node->name);
+ break;
+ }
+ n++;
+ }
+ return ret ? ret : n;
+}
+
+static int __init of_create_spu(struct spu *spu, void *data)
+{
+ int ret;
+ struct device_node *spe = (struct device_node *)data;
+ static int legacy_map = 0, legacy_irq = 0;
+
+ spu->devnode = of_node_get(spe);
+ spu->spe_id = find_spu_unit_number(spe);
+
+ spu->node = of_node_to_nid(spe);
+ if (spu->node >= MAX_NUMNODES) {
+ printk(KERN_WARNING "SPE %s on node %d ignored,"
+ " node number too big\n", spe->full_name, spu->node);
+ printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n");
+ ret = -ENODEV;
+ goto out;
+ }
+
+ ret = spu_map_device(spu);
+ if (ret) {
+ if (!legacy_map) {
+ legacy_map = 1;
+ printk(KERN_WARNING "%s: Legacy device tree found, "
+ "trying to map old style\n", __func__);
+ }
+ ret = spu_map_device_old(spu);
+ if (ret) {
+ printk(KERN_ERR "Unable to map %s\n",
+ spu->name);
+ goto out;
+ }
+ }
+
+ ret = spu_map_interrupts(spu, spe);
+ if (ret) {
+ if (!legacy_irq) {
+ legacy_irq = 1;
+ printk(KERN_WARNING "%s: Legacy device tree found, "
+ "trying old style irq\n", __func__);
+ }
+ ret = spu_map_interrupts_old(spu, spe);
+ if (ret) {
+ printk(KERN_ERR "%s: could not map interrupts\n",
+ spu->name);
+ goto out_unmap;
+ }
+ }
+
+ pr_debug("Using SPE %s %p %p %p %p %d\n", spu->name,
+ spu->local_store, spu->problem, spu->priv1,
+ spu->priv2, spu->number);
+ goto out;
+
+out_unmap:
+ spu_unmap(spu);
+out:
+ return ret;
+}
+
+static int of_destroy_spu(struct spu *spu)
+{
+ spu_unmap(spu);
+ of_node_put(spu->devnode);
+ return 0;
+}
+
+static void enable_spu_by_master_run(struct spu_context *ctx)
+{
+ ctx->ops->master_start(ctx);
+}
+
+static void disable_spu_by_master_run(struct spu_context *ctx)
+{
+ ctx->ops->master_stop(ctx);
+}
+
+/* Hardcoded affinity idxs for qs20 */
+#define QS20_SPES_PER_BE 8
+static int qs20_reg_idxs[QS20_SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 };
+static int qs20_reg_memory[QS20_SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 };
+
+static struct spu *spu_lookup_reg(int node, u32 reg)
+{
+ struct spu *spu;
+ const u32 *spu_reg;
+
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ spu_reg = of_get_property(spu_devnode(spu), "reg", NULL);
+ if (*spu_reg == reg)
+ return spu;
+ }
+ return NULL;
+}
+
+static void init_affinity_qs20_harcoded(void)
+{
+ int node, i;
+ struct spu *last_spu, *spu;
+ u32 reg;
+
+ for (node = 0; node < MAX_NUMNODES; node++) {
+ last_spu = NULL;
+ for (i = 0; i < QS20_SPES_PER_BE; i++) {
+ reg = qs20_reg_idxs[i];
+ spu = spu_lookup_reg(node, reg);
+ if (!spu)
+ continue;
+ spu->has_mem_affinity = qs20_reg_memory[reg];
+ if (last_spu)
+ list_add_tail(&spu->aff_list,
+ &last_spu->aff_list);
+ last_spu = spu;
+ }
+ }
+}
+
+static int of_has_vicinity(void)
+{
+ struct device_node *dn;
+
+ for_each_node_by_type(dn, "spe") {
+ if (of_find_property(dn, "vicinity", NULL)) {
+ of_node_put(dn);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static struct spu *devnode_spu(int cbe, struct device_node *dn)
+{
+ struct spu *spu;
+
+ list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list)
+ if (spu_devnode(spu) == dn)
+ return spu;
+ return NULL;
+}
+
+static struct spu *
+neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid)
+{
+ struct spu *spu;
+ struct device_node *spu_dn;
+ const phandle *vic_handles;
+ int lenp, i;
+
+ list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) {
+ spu_dn = spu_devnode(spu);
+ if (spu_dn == avoid)
+ continue;
+ vic_handles = of_get_property(spu_dn, "vicinity", &lenp);
+ for (i=0; i < (lenp / sizeof(phandle)); i++) {
+ if (vic_handles[i] == target->phandle)
+ return spu;
+ }
+ }
+ return NULL;
+}
+
+static void init_affinity_node(int cbe)
+{
+ struct spu *spu, *last_spu;
+ struct device_node *vic_dn, *last_spu_dn;
+ phandle avoid_ph;
+ const phandle *vic_handles;
+ const char *name;
+ int lenp, i, added;
+
+ last_spu = list_first_entry(&cbe_spu_info[cbe].spus, struct spu,
+ cbe_list);
+ avoid_ph = 0;
+ for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) {
+ last_spu_dn = spu_devnode(last_spu);
+ vic_handles = of_get_property(last_spu_dn, "vicinity", &lenp);
+
+ /*
+ * Walk through each phandle in vicinity property of the spu
+ * (tipically two vicinity phandles per spe node)
+ */
+ for (i = 0; i < (lenp / sizeof(phandle)); i++) {
+ if (vic_handles[i] == avoid_ph)
+ continue;
+
+ vic_dn = of_find_node_by_phandle(vic_handles[i]);
+ if (!vic_dn)
+ continue;
+
+ /* a neighbour might be spe, mic-tm, or bif0 */
+ name = of_get_property(vic_dn, "name", NULL);
+ if (!name)
+ continue;
+
+ if (strcmp(name, "spe") == 0) {
+ spu = devnode_spu(cbe, vic_dn);
+ avoid_ph = last_spu_dn->phandle;
+ } else {
+ /*
+ * "mic-tm" and "bif0" nodes do not have
+ * vicinity property. So we need to find the
+ * spe which has vic_dn as neighbour, but
+ * skipping the one we came from (last_spu_dn)
+ */
+ spu = neighbour_spu(cbe, vic_dn, last_spu_dn);
+ if (!spu)
+ continue;
+ if (!strcmp(name, "mic-tm")) {
+ last_spu->has_mem_affinity = 1;
+ spu->has_mem_affinity = 1;
+ }
+ avoid_ph = vic_dn->phandle;
+ }
+
+ list_add_tail(&spu->aff_list, &last_spu->aff_list);
+ last_spu = spu;
+ break;
+ }
+ }
+}
+
+static void init_affinity_fw(void)
+{
+ int cbe;
+
+ for (cbe = 0; cbe < MAX_NUMNODES; cbe++)
+ init_affinity_node(cbe);
+}
+
+static int __init init_affinity(void)
+{
+ if (of_has_vicinity()) {
+ init_affinity_fw();
+ } else {
+ long root = of_get_flat_dt_root();
+ if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
+ init_affinity_qs20_harcoded();
+ else
+ printk("No affinity configuration found\n");
+ }
+
+ return 0;
+}
+
+const struct spu_management_ops spu_management_of_ops = {
+ .enumerate_spus = of_enumerate_spus,
+ .create_spu = of_create_spu,
+ .destroy_spu = of_destroy_spu,
+ .enable_spu = enable_spu_by_master_run,
+ .disable_spu = disable_spu_by_master_run,
+ .init_affinity = init_affinity,
+};
diff --git a/kernel/arch/powerpc/platforms/cell/spu_notify.c b/kernel/arch/powerpc/platforms/cell/spu_notify.c
new file mode 100644
index 000000000..afdf857c3
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spu_notify.c
@@ -0,0 +1,68 @@
+/*
+ * Move OProfile dependencies from spufs module to the kernel so it
+ * can run on non-cell PPC.
+ *
+ * Copyright (C) IBM 2005
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/export.h>
+#include <linux/notifier.h>
+#include <asm/spu.h>
+#include "spufs/spufs.h"
+
+static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
+
+void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
+{
+ blocking_notifier_call_chain(&spu_switch_notifier,
+ ctx ? ctx->object_id : 0, spu);
+}
+EXPORT_SYMBOL_GPL(spu_switch_notify);
+
+int spu_switch_event_register(struct notifier_block *n)
+{
+ int ret;
+ ret = blocking_notifier_chain_register(&spu_switch_notifier, n);
+ if (!ret)
+ notify_spus_active();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(spu_switch_event_register);
+
+int spu_switch_event_unregister(struct notifier_block *n)
+{
+ return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
+}
+EXPORT_SYMBOL_GPL(spu_switch_event_unregister);
+
+void spu_set_profile_private_kref(struct spu_context *ctx,
+ struct kref *prof_info_kref,
+ void (* prof_info_release) (struct kref *kref))
+{
+ ctx->prof_priv_kref = prof_info_kref;
+ ctx->prof_priv_release = prof_info_release;
+}
+EXPORT_SYMBOL_GPL(spu_set_profile_private_kref);
+
+void *spu_get_profile_private_kref(struct spu_context *ctx)
+{
+ return ctx->prof_priv_kref;
+}
+EXPORT_SYMBOL_GPL(spu_get_profile_private_kref);
+
diff --git a/kernel/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/kernel/arch/powerpc/platforms/cell/spu_priv1_mmio.c
new file mode 100644
index 000000000..66d33724f
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spu_priv1_mmio.c
@@ -0,0 +1,180 @@
+/*
+ * spu hypervisor abstraction for direct hardware access.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * Copyright 2006 Sony Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/ptrace.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <linux/sched.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+
+#include "interrupt.h"
+#include "spu_priv1_mmio.h"
+
+static void int_mask_and(struct spu *spu, int class, u64 mask)
+{
+ u64 old_mask;
+
+ old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
+ out_be64(&spu->priv1->int_mask_RW[class], old_mask & mask);
+}
+
+static void int_mask_or(struct spu *spu, int class, u64 mask)
+{
+ u64 old_mask;
+
+ old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
+ out_be64(&spu->priv1->int_mask_RW[class], old_mask | mask);
+}
+
+static void int_mask_set(struct spu *spu, int class, u64 mask)
+{
+ out_be64(&spu->priv1->int_mask_RW[class], mask);
+}
+
+static u64 int_mask_get(struct spu *spu, int class)
+{
+ return in_be64(&spu->priv1->int_mask_RW[class]);
+}
+
+static void int_stat_clear(struct spu *spu, int class, u64 stat)
+{
+ out_be64(&spu->priv1->int_stat_RW[class], stat);
+}
+
+static u64 int_stat_get(struct spu *spu, int class)
+{
+ return in_be64(&spu->priv1->int_stat_RW[class]);
+}
+
+static void cpu_affinity_set(struct spu *spu, int cpu)
+{
+ u64 target;
+ u64 route;
+
+ if (nr_cpus_node(spu->node)) {
+ const struct cpumask *spumask = cpumask_of_node(spu->node),
+ *cpumask = cpumask_of_node(cpu_to_node(cpu));
+
+ if (!cpumask_intersects(spumask, cpumask))
+ return;
+ }
+
+ target = iic_get_target_id(cpu);
+ route = target << 48 | target << 32 | target << 16;
+ out_be64(&spu->priv1->int_route_RW, route);
+}
+
+static u64 mfc_dar_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->mfc_dar_RW);
+}
+
+static u64 mfc_dsisr_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->mfc_dsisr_RW);
+}
+
+static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
+{
+ out_be64(&spu->priv1->mfc_dsisr_RW, dsisr);
+}
+
+static void mfc_sdr_setup(struct spu *spu)
+{
+ out_be64(&spu->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1));
+}
+
+static void mfc_sr1_set(struct spu *spu, u64 sr1)
+{
+ out_be64(&spu->priv1->mfc_sr1_RW, sr1);
+}
+
+static u64 mfc_sr1_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->mfc_sr1_RW);
+}
+
+static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
+{
+ out_be64(&spu->priv1->mfc_tclass_id_RW, tclass_id);
+}
+
+static u64 mfc_tclass_id_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->mfc_tclass_id_RW);
+}
+
+static void tlb_invalidate(struct spu *spu)
+{
+ out_be64(&spu->priv1->tlb_invalidate_entry_W, 0ul);
+}
+
+static void resource_allocation_groupID_set(struct spu *spu, u64 id)
+{
+ out_be64(&spu->priv1->resource_allocation_groupID_RW, id);
+}
+
+static u64 resource_allocation_groupID_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->resource_allocation_groupID_RW);
+}
+
+static void resource_allocation_enable_set(struct spu *spu, u64 enable)
+{
+ out_be64(&spu->priv1->resource_allocation_enable_RW, enable);
+}
+
+static u64 resource_allocation_enable_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->resource_allocation_enable_RW);
+}
+
+const struct spu_priv1_ops spu_priv1_mmio_ops =
+{
+ .int_mask_and = int_mask_and,
+ .int_mask_or = int_mask_or,
+ .int_mask_set = int_mask_set,
+ .int_mask_get = int_mask_get,
+ .int_stat_clear = int_stat_clear,
+ .int_stat_get = int_stat_get,
+ .cpu_affinity_set = cpu_affinity_set,
+ .mfc_dar_get = mfc_dar_get,
+ .mfc_dsisr_get = mfc_dsisr_get,
+ .mfc_dsisr_set = mfc_dsisr_set,
+ .mfc_sdr_setup = mfc_sdr_setup,
+ .mfc_sr1_set = mfc_sr1_set,
+ .mfc_sr1_get = mfc_sr1_get,
+ .mfc_tclass_id_set = mfc_tclass_id_set,
+ .mfc_tclass_id_get = mfc_tclass_id_get,
+ .tlb_invalidate = tlb_invalidate,
+ .resource_allocation_groupID_set = resource_allocation_groupID_set,
+ .resource_allocation_groupID_get = resource_allocation_groupID_get,
+ .resource_allocation_enable_set = resource_allocation_enable_set,
+ .resource_allocation_enable_get = resource_allocation_enable_get,
+};
diff --git a/kernel/arch/powerpc/platforms/cell/spu_priv1_mmio.h b/kernel/arch/powerpc/platforms/cell/spu_priv1_mmio.h
new file mode 100644
index 000000000..7b62bd1cc
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spu_priv1_mmio.h
@@ -0,0 +1,26 @@
+/*
+ * spu hypervisor abstraction for direct hardware access.
+ *
+ * Copyright (C) 2006 Sony Computer Entertainment Inc.
+ * Copyright 2006 Sony Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef SPU_PRIV1_MMIO_H
+#define SPU_PRIV1_MMIO_H
+
+struct device_node *spu_devnode(struct spu *spu);
+
+#endif /* SPU_PRIV1_MMIO_H */
diff --git a/kernel/arch/powerpc/platforms/cell/spu_syscalls.c b/kernel/arch/powerpc/platforms/cell/spu_syscalls.c
new file mode 100644
index 000000000..5e6e0bad6
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -0,0 +1,178 @@
+/*
+ * SPU file system -- system call stubs
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * (C) Copyright 2006-2007, IBM Corporation
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/rcupdate.h>
+#include <linux/binfmts.h>
+
+#include <asm/spu.h>
+
+/* protected by rcu */
+static struct spufs_calls *spufs_calls;
+
+#ifdef CONFIG_SPU_FS_MODULE
+
+static inline struct spufs_calls *spufs_calls_get(void)
+{
+ struct spufs_calls *calls = NULL;
+
+ rcu_read_lock();
+ calls = rcu_dereference(spufs_calls);
+ if (calls && !try_module_get(calls->owner))
+ calls = NULL;
+ rcu_read_unlock();
+
+ return calls;
+}
+
+static inline void spufs_calls_put(struct spufs_calls *calls)
+{
+ BUG_ON(calls != spufs_calls);
+
+ /* we don't need to rcu this, as we hold a reference to the module */
+ module_put(spufs_calls->owner);
+}
+
+#else /* !defined CONFIG_SPU_FS_MODULE */
+
+static inline struct spufs_calls *spufs_calls_get(void)
+{
+ return spufs_calls;
+}
+
+static inline void spufs_calls_put(struct spufs_calls *calls) { }
+
+#endif /* CONFIG_SPU_FS_MODULE */
+
+SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags,
+ umode_t, mode, int, neighbor_fd)
+{
+ long ret;
+ struct spufs_calls *calls;
+
+ calls = spufs_calls_get();
+ if (!calls)
+ return -ENOSYS;
+
+ if (flags & SPU_CREATE_AFFINITY_SPU) {
+ struct fd neighbor = fdget(neighbor_fd);
+ ret = -EBADF;
+ if (neighbor.file) {
+ ret = calls->create_thread(name, flags, mode, neighbor.file);
+ fdput(neighbor);
+ }
+ } else
+ ret = calls->create_thread(name, flags, mode, NULL);
+
+ spufs_calls_put(calls);
+ return ret;
+}
+
+asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus)
+{
+ long ret;
+ struct fd arg;
+ struct spufs_calls *calls;
+
+ calls = spufs_calls_get();
+ if (!calls)
+ return -ENOSYS;
+
+ ret = -EBADF;
+ arg = fdget(fd);
+ if (arg.file) {
+ ret = calls->spu_run(arg.file, unpc, ustatus);
+ fdput(arg);
+ }
+
+ spufs_calls_put(calls);
+ return ret;
+}
+
+#ifdef CONFIG_COREDUMP
+int elf_coredump_extra_notes_size(void)
+{
+ struct spufs_calls *calls;
+ int ret;
+
+ calls = spufs_calls_get();
+ if (!calls)
+ return 0;
+
+ ret = calls->coredump_extra_notes_size();
+
+ spufs_calls_put(calls);
+
+ return ret;
+}
+
+int elf_coredump_extra_notes_write(struct coredump_params *cprm)
+{
+ struct spufs_calls *calls;
+ int ret;
+
+ calls = spufs_calls_get();
+ if (!calls)
+ return 0;
+
+ ret = calls->coredump_extra_notes_write(cprm);
+
+ spufs_calls_put(calls);
+
+ return ret;
+}
+#endif
+
+void notify_spus_active(void)
+{
+ struct spufs_calls *calls;
+
+ calls = spufs_calls_get();
+ if (!calls)
+ return;
+
+ calls->notify_spus_active();
+ spufs_calls_put(calls);
+
+ return;
+}
+
+int register_spu_syscalls(struct spufs_calls *calls)
+{
+ if (spufs_calls)
+ return -EBUSY;
+
+ rcu_assign_pointer(spufs_calls, calls);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(register_spu_syscalls);
+
+void unregister_spu_syscalls(struct spufs_calls *calls)
+{
+ BUG_ON(spufs_calls->owner != calls->owner);
+ RCU_INIT_POINTER(spufs_calls, NULL);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(unregister_spu_syscalls);
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/.gitignore b/kernel/arch/powerpc/platforms/cell/spufs/.gitignore
new file mode 100644
index 000000000..a09ee8d84
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/.gitignore
@@ -0,0 +1,2 @@
+spu_save_dump.h
+spu_restore_dump.h
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/Makefile b/kernel/arch/powerpc/platforms/cell/spufs/Makefile
new file mode 100644
index 000000000..52a7d2596
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/Makefile
@@ -0,0 +1,62 @@
+
+obj-$(CONFIG_SPU_FS) += spufs.o
+spufs-y += inode.o file.o context.o syscalls.o
+spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o
+spufs-y += switch.o fault.o lscsa_alloc.o
+spufs-$(CONFIG_COREDUMP) += coredump.o
+
+# magic for the trace events
+CFLAGS_sched.o := -I$(src)
+
+# Rules to build switch.o with the help of SPU tool chain
+SPU_CROSS := spu-
+SPU_CC := $(SPU_CROSS)gcc
+SPU_AS := $(SPU_CROSS)gcc
+SPU_LD := $(SPU_CROSS)ld
+SPU_OBJCOPY := $(SPU_CROSS)objcopy
+SPU_CFLAGS := -O2 -Wall -I$(srctree)/include -D__KERNEL__
+SPU_AFLAGS := -c -D__ASSEMBLY__ -I$(srctree)/include -D__KERNEL__
+SPU_LDFLAGS := -N -Ttext=0x0
+
+$(obj)/switch.o: $(obj)/spu_save_dump.h $(obj)/spu_restore_dump.h
+clean-files := spu_save_dump.h spu_restore_dump.h
+
+# Compile SPU files
+ cmd_spu_cc = $(SPU_CC) $(SPU_CFLAGS) -c -o $@ $<
+quiet_cmd_spu_cc = SPU_CC $@
+$(obj)/spu_%.o: $(src)/spu_%.c
+ $(call if_changed,spu_cc)
+
+# Assemble SPU files
+ cmd_spu_as = $(SPU_AS) $(SPU_AFLAGS) -o $@ $<
+quiet_cmd_spu_as = SPU_AS $@
+$(obj)/spu_%.o: $(src)/spu_%.S
+ $(call if_changed,spu_as)
+
+# Link SPU Executables
+ cmd_spu_ld = $(SPU_LD) $(SPU_LDFLAGS) -o $@ $^
+quiet_cmd_spu_ld = SPU_LD $@
+$(obj)/spu_%: $(obj)/spu_%_crt0.o $(obj)/spu_%.o
+ $(call if_changed,spu_ld)
+
+# Copy into binary format
+ cmd_spu_objcopy = $(SPU_OBJCOPY) -O binary $< $@
+quiet_cmd_spu_objcopy = OBJCOPY $@
+$(obj)/spu_%.bin: $(src)/spu_%
+ $(call if_changed,spu_objcopy)
+
+# create C code from ELF executable
+cmd_hexdump = ( \
+ echo "/*" ; \
+ echo " * $*_dump.h: Copyright (C) 2005 IBM." ; \
+ echo " * Hex-dump auto generated from $*.c." ; \
+ echo " * Do not edit!" ; \
+ echo " */" ; \
+ echo "static unsigned int $*_code[] " \
+ "__attribute__((__aligned__(128))) = {" ; \
+ hexdump -v -e '"0x" 4/1 "%02x" "," "\n"' $< ; \
+ echo "};" ; \
+ ) > $@
+quiet_cmd_hexdump = HEXDUMP $@
+$(obj)/%_dump.h: $(obj)/%.bin
+ $(call if_changed,hexdump)
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/backing_ops.c b/kernel/arch/powerpc/platforms/cell/spufs/backing_ops.c
new file mode 100644
index 000000000..6e8a9ef85
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -0,0 +1,413 @@
+/* backing_ops.c - query/set operations on saved SPU context.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * These register operations allow SPUFS to operate on saved
+ * SPU contexts rather than hardware.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/poll.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_info.h>
+#include <asm/mmu_context.h>
+#include "spufs.h"
+
+/*
+ * Reads/writes to various problem and priv2 registers require
+ * state changes, i.e. generate SPU events, modify channel
+ * counts, etc.
+ */
+
+static void gen_spu_event(struct spu_context *ctx, u32 event)
+{
+ u64 ch0_cnt;
+ u64 ch0_data;
+ u64 ch1_data;
+
+ ch0_cnt = ctx->csa.spu_chnlcnt_RW[0];
+ ch0_data = ctx->csa.spu_chnldata_RW[0];
+ ch1_data = ctx->csa.spu_chnldata_RW[1];
+ ctx->csa.spu_chnldata_RW[0] |= event;
+ if ((ch0_cnt == 0) && !(ch0_data & event) && (ch1_data & event)) {
+ ctx->csa.spu_chnlcnt_RW[0] = 1;
+ }
+}
+
+static int spu_backing_mbox_read(struct spu_context *ctx, u32 * data)
+{
+ u32 mbox_stat;
+ int ret = 0;
+
+ spin_lock(&ctx->csa.register_lock);
+ mbox_stat = ctx->csa.prob.mb_stat_R;
+ if (mbox_stat & 0x0000ff) {
+ /* Read the first available word.
+ * Implementation note: the depth
+ * of pu_mb_R is currently 1.
+ */
+ *data = ctx->csa.prob.pu_mb_R;
+ ctx->csa.prob.mb_stat_R &= ~(0x0000ff);
+ ctx->csa.spu_chnlcnt_RW[28] = 1;
+ gen_spu_event(ctx, MFC_PU_MAILBOX_AVAILABLE_EVENT);
+ ret = 4;
+ }
+ spin_unlock(&ctx->csa.register_lock);
+ return ret;
+}
+
+static u32 spu_backing_mbox_stat_read(struct spu_context *ctx)
+{
+ return ctx->csa.prob.mb_stat_R;
+}
+
+static unsigned int spu_backing_mbox_stat_poll(struct spu_context *ctx,
+ unsigned int events)
+{
+ int ret;
+ u32 stat;
+
+ ret = 0;
+ spin_lock_irq(&ctx->csa.register_lock);
+ stat = ctx->csa.prob.mb_stat_R;
+
+ /* if the requested event is there, return the poll
+ mask, otherwise enable the interrupt to get notified,
+ but first mark any pending interrupts as done so
+ we don't get woken up unnecessarily */
+
+ if (events & (POLLIN | POLLRDNORM)) {
+ if (stat & 0xff0000)
+ ret |= POLLIN | POLLRDNORM;
+ else {
+ ctx->csa.priv1.int_stat_class2_RW &=
+ ~CLASS2_MAILBOX_INTR;
+ ctx->csa.priv1.int_mask_class2_RW |=
+ CLASS2_ENABLE_MAILBOX_INTR;
+ }
+ }
+ if (events & (POLLOUT | POLLWRNORM)) {
+ if (stat & 0x00ff00)
+ ret = POLLOUT | POLLWRNORM;
+ else {
+ ctx->csa.priv1.int_stat_class2_RW &=
+ ~CLASS2_MAILBOX_THRESHOLD_INTR;
+ ctx->csa.priv1.int_mask_class2_RW |=
+ CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR;
+ }
+ }
+ spin_unlock_irq(&ctx->csa.register_lock);
+ return ret;
+}
+
+static int spu_backing_ibox_read(struct spu_context *ctx, u32 * data)
+{
+ int ret;
+
+ spin_lock(&ctx->csa.register_lock);
+ if (ctx->csa.prob.mb_stat_R & 0xff0000) {
+ /* Read the first available word.
+ * Implementation note: the depth
+ * of puint_mb_R is currently 1.
+ */
+ *data = ctx->csa.priv2.puint_mb_R;
+ ctx->csa.prob.mb_stat_R &= ~(0xff0000);
+ ctx->csa.spu_chnlcnt_RW[30] = 1;
+ gen_spu_event(ctx, MFC_PU_INT_MAILBOX_AVAILABLE_EVENT);
+ ret = 4;
+ } else {
+ /* make sure we get woken up by the interrupt */
+ ctx->csa.priv1.int_mask_class2_RW |= CLASS2_ENABLE_MAILBOX_INTR;
+ ret = 0;
+ }
+ spin_unlock(&ctx->csa.register_lock);
+ return ret;
+}
+
+static int spu_backing_wbox_write(struct spu_context *ctx, u32 data)
+{
+ int ret;
+
+ spin_lock(&ctx->csa.register_lock);
+ if ((ctx->csa.prob.mb_stat_R) & 0x00ff00) {
+ int slot = ctx->csa.spu_chnlcnt_RW[29];
+ int avail = (ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8;
+
+ /* We have space to write wbox_data.
+ * Implementation note: the depth
+ * of spu_mb_W is currently 4.
+ */
+ BUG_ON(avail != (4 - slot));
+ ctx->csa.spu_mailbox_data[slot] = data;
+ ctx->csa.spu_chnlcnt_RW[29] = ++slot;
+ ctx->csa.prob.mb_stat_R &= ~(0x00ff00);
+ ctx->csa.prob.mb_stat_R |= (((4 - slot) & 0xff) << 8);
+ gen_spu_event(ctx, MFC_SPU_MAILBOX_WRITTEN_EVENT);
+ ret = 4;
+ } else {
+ /* make sure we get woken up by the interrupt when space
+ becomes available */
+ ctx->csa.priv1.int_mask_class2_RW |=
+ CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR;
+ ret = 0;
+ }
+ spin_unlock(&ctx->csa.register_lock);
+ return ret;
+}
+
+static u32 spu_backing_signal1_read(struct spu_context *ctx)
+{
+ return ctx->csa.spu_chnldata_RW[3];
+}
+
+static void spu_backing_signal1_write(struct spu_context *ctx, u32 data)
+{
+ spin_lock(&ctx->csa.register_lock);
+ if (ctx->csa.priv2.spu_cfg_RW & 0x1)
+ ctx->csa.spu_chnldata_RW[3] |= data;
+ else
+ ctx->csa.spu_chnldata_RW[3] = data;
+ ctx->csa.spu_chnlcnt_RW[3] = 1;
+ gen_spu_event(ctx, MFC_SIGNAL_1_EVENT);
+ spin_unlock(&ctx->csa.register_lock);
+}
+
+static u32 spu_backing_signal2_read(struct spu_context *ctx)
+{
+ return ctx->csa.spu_chnldata_RW[4];
+}
+
+static void spu_backing_signal2_write(struct spu_context *ctx, u32 data)
+{
+ spin_lock(&ctx->csa.register_lock);
+ if (ctx->csa.priv2.spu_cfg_RW & 0x2)
+ ctx->csa.spu_chnldata_RW[4] |= data;
+ else
+ ctx->csa.spu_chnldata_RW[4] = data;
+ ctx->csa.spu_chnlcnt_RW[4] = 1;
+ gen_spu_event(ctx, MFC_SIGNAL_2_EVENT);
+ spin_unlock(&ctx->csa.register_lock);
+}
+
+static void spu_backing_signal1_type_set(struct spu_context *ctx, u64 val)
+{
+ u64 tmp;
+
+ spin_lock(&ctx->csa.register_lock);
+ tmp = ctx->csa.priv2.spu_cfg_RW;
+ if (val)
+ tmp |= 1;
+ else
+ tmp &= ~1;
+ ctx->csa.priv2.spu_cfg_RW = tmp;
+ spin_unlock(&ctx->csa.register_lock);
+}
+
+static u64 spu_backing_signal1_type_get(struct spu_context *ctx)
+{
+ return ((ctx->csa.priv2.spu_cfg_RW & 1) != 0);
+}
+
+static void spu_backing_signal2_type_set(struct spu_context *ctx, u64 val)
+{
+ u64 tmp;
+
+ spin_lock(&ctx->csa.register_lock);
+ tmp = ctx->csa.priv2.spu_cfg_RW;
+ if (val)
+ tmp |= 2;
+ else
+ tmp &= ~2;
+ ctx->csa.priv2.spu_cfg_RW = tmp;
+ spin_unlock(&ctx->csa.register_lock);
+}
+
+static u64 spu_backing_signal2_type_get(struct spu_context *ctx)
+{
+ return ((ctx->csa.priv2.spu_cfg_RW & 2) != 0);
+}
+
+static u32 spu_backing_npc_read(struct spu_context *ctx)
+{
+ return ctx->csa.prob.spu_npc_RW;
+}
+
+static void spu_backing_npc_write(struct spu_context *ctx, u32 val)
+{
+ ctx->csa.prob.spu_npc_RW = val;
+}
+
+static u32 spu_backing_status_read(struct spu_context *ctx)
+{
+ return ctx->csa.prob.spu_status_R;
+}
+
+static char *spu_backing_get_ls(struct spu_context *ctx)
+{
+ return ctx->csa.lscsa->ls;
+}
+
+static void spu_backing_privcntl_write(struct spu_context *ctx, u64 val)
+{
+ ctx->csa.priv2.spu_privcntl_RW = val;
+}
+
+static u32 spu_backing_runcntl_read(struct spu_context *ctx)
+{
+ return ctx->csa.prob.spu_runcntl_RW;
+}
+
+static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val)
+{
+ spin_lock(&ctx->csa.register_lock);
+ ctx->csa.prob.spu_runcntl_RW = val;
+ if (val & SPU_RUNCNTL_RUNNABLE) {
+ ctx->csa.prob.spu_status_R &=
+ ~SPU_STATUS_STOPPED_BY_STOP &
+ ~SPU_STATUS_STOPPED_BY_HALT &
+ ~SPU_STATUS_SINGLE_STEP &
+ ~SPU_STATUS_INVALID_INSTR &
+ ~SPU_STATUS_INVALID_CH;
+ ctx->csa.prob.spu_status_R |= SPU_STATUS_RUNNING;
+ } else {
+ ctx->csa.prob.spu_status_R &= ~SPU_STATUS_RUNNING;
+ }
+ spin_unlock(&ctx->csa.register_lock);
+}
+
+static void spu_backing_runcntl_stop(struct spu_context *ctx)
+{
+ spu_backing_runcntl_write(ctx, SPU_RUNCNTL_STOP);
+}
+
+static void spu_backing_master_start(struct spu_context *ctx)
+{
+ struct spu_state *csa = &ctx->csa;
+ u64 sr1;
+
+ spin_lock(&csa->register_lock);
+ sr1 = csa->priv1.mfc_sr1_RW | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+ csa->priv1.mfc_sr1_RW = sr1;
+ spin_unlock(&csa->register_lock);
+}
+
+static void spu_backing_master_stop(struct spu_context *ctx)
+{
+ struct spu_state *csa = &ctx->csa;
+ u64 sr1;
+
+ spin_lock(&csa->register_lock);
+ sr1 = csa->priv1.mfc_sr1_RW & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+ csa->priv1.mfc_sr1_RW = sr1;
+ spin_unlock(&csa->register_lock);
+}
+
+static int spu_backing_set_mfc_query(struct spu_context * ctx, u32 mask,
+ u32 mode)
+{
+ struct spu_problem_collapsed *prob = &ctx->csa.prob;
+ int ret;
+
+ spin_lock(&ctx->csa.register_lock);
+ ret = -EAGAIN;
+ if (prob->dma_querytype_RW)
+ goto out;
+ ret = 0;
+ /* FIXME: what are the side-effects of this? */
+ prob->dma_querymask_RW = mask;
+ prob->dma_querytype_RW = mode;
+ /* In the current implementation, the SPU context is always
+ * acquired in runnable state when new bits are added to the
+ * mask (tagwait), so it's sufficient just to mask
+ * dma_tagstatus_R with the 'mask' parameter here.
+ */
+ ctx->csa.prob.dma_tagstatus_R &= mask;
+out:
+ spin_unlock(&ctx->csa.register_lock);
+
+ return ret;
+}
+
+static u32 spu_backing_read_mfc_tagstatus(struct spu_context * ctx)
+{
+ return ctx->csa.prob.dma_tagstatus_R;
+}
+
+static u32 spu_backing_get_mfc_free_elements(struct spu_context *ctx)
+{
+ return ctx->csa.prob.dma_qstatus_R;
+}
+
+static int spu_backing_send_mfc_command(struct spu_context *ctx,
+ struct mfc_dma_command *cmd)
+{
+ int ret;
+
+ spin_lock(&ctx->csa.register_lock);
+ ret = -EAGAIN;
+ /* FIXME: set up priv2->puq */
+ spin_unlock(&ctx->csa.register_lock);
+
+ return ret;
+}
+
+static void spu_backing_restart_dma(struct spu_context *ctx)
+{
+ ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND;
+}
+
+struct spu_context_ops spu_backing_ops = {
+ .mbox_read = spu_backing_mbox_read,
+ .mbox_stat_read = spu_backing_mbox_stat_read,
+ .mbox_stat_poll = spu_backing_mbox_stat_poll,
+ .ibox_read = spu_backing_ibox_read,
+ .wbox_write = spu_backing_wbox_write,
+ .signal1_read = spu_backing_signal1_read,
+ .signal1_write = spu_backing_signal1_write,
+ .signal2_read = spu_backing_signal2_read,
+ .signal2_write = spu_backing_signal2_write,
+ .signal1_type_set = spu_backing_signal1_type_set,
+ .signal1_type_get = spu_backing_signal1_type_get,
+ .signal2_type_set = spu_backing_signal2_type_set,
+ .signal2_type_get = spu_backing_signal2_type_get,
+ .npc_read = spu_backing_npc_read,
+ .npc_write = spu_backing_npc_write,
+ .status_read = spu_backing_status_read,
+ .get_ls = spu_backing_get_ls,
+ .privcntl_write = spu_backing_privcntl_write,
+ .runcntl_read = spu_backing_runcntl_read,
+ .runcntl_write = spu_backing_runcntl_write,
+ .runcntl_stop = spu_backing_runcntl_stop,
+ .master_start = spu_backing_master_start,
+ .master_stop = spu_backing_master_stop,
+ .set_mfc_query = spu_backing_set_mfc_query,
+ .read_mfc_tagstatus = spu_backing_read_mfc_tagstatus,
+ .get_mfc_free_elements = spu_backing_get_mfc_free_elements,
+ .send_mfc_command = spu_backing_send_mfc_command,
+ .restart_dma = spu_backing_restart_dma,
+};
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/context.c b/kernel/arch/powerpc/platforms/cell/spufs/context.c
new file mode 100644
index 000000000..3b4152fae
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/context.c
@@ -0,0 +1,186 @@
+/*
+ * SPU file system -- SPU context management
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/sched.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include "spufs.h"
+#include "sputrace.h"
+
+
+atomic_t nr_spu_contexts = ATOMIC_INIT(0);
+
+struct spu_context *alloc_spu_context(struct spu_gang *gang)
+{
+ struct spu_context *ctx;
+
+ ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
+ if (!ctx)
+ goto out;
+ /* Binding to physical processor deferred
+ * until spu_activate().
+ */
+ if (spu_init_csa(&ctx->csa))
+ goto out_free;
+ spin_lock_init(&ctx->mmio_lock);
+ mutex_init(&ctx->mapping_lock);
+ kref_init(&ctx->kref);
+ mutex_init(&ctx->state_mutex);
+ mutex_init(&ctx->run_mutex);
+ init_waitqueue_head(&ctx->ibox_wq);
+ init_waitqueue_head(&ctx->wbox_wq);
+ init_waitqueue_head(&ctx->stop_wq);
+ init_waitqueue_head(&ctx->mfc_wq);
+ init_waitqueue_head(&ctx->run_wq);
+ ctx->state = SPU_STATE_SAVED;
+ ctx->ops = &spu_backing_ops;
+ ctx->owner = get_task_mm(current);
+ INIT_LIST_HEAD(&ctx->rq);
+ INIT_LIST_HEAD(&ctx->aff_list);
+ if (gang)
+ spu_gang_add_ctx(gang, ctx);
+
+ __spu_update_sched_info(ctx);
+ spu_set_timeslice(ctx);
+ ctx->stats.util_state = SPU_UTIL_IDLE_LOADED;
+ ctx->stats.tstamp = ktime_get_ns();
+
+ atomic_inc(&nr_spu_contexts);
+ goto out;
+out_free:
+ kfree(ctx);
+ ctx = NULL;
+out:
+ return ctx;
+}
+
+void destroy_spu_context(struct kref *kref)
+{
+ struct spu_context *ctx;
+ ctx = container_of(kref, struct spu_context, kref);
+ spu_context_nospu_trace(destroy_spu_context__enter, ctx);
+ mutex_lock(&ctx->state_mutex);
+ spu_deactivate(ctx);
+ mutex_unlock(&ctx->state_mutex);
+ spu_fini_csa(&ctx->csa);
+ if (ctx->gang)
+ spu_gang_remove_ctx(ctx->gang, ctx);
+ if (ctx->prof_priv_kref)
+ kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
+ BUG_ON(!list_empty(&ctx->rq));
+ atomic_dec(&nr_spu_contexts);
+ kfree(ctx->switch_log);
+ kfree(ctx);
+}
+
+struct spu_context * get_spu_context(struct spu_context *ctx)
+{
+ kref_get(&ctx->kref);
+ return ctx;
+}
+
+int put_spu_context(struct spu_context *ctx)
+{
+ return kref_put(&ctx->kref, &destroy_spu_context);
+}
+
+/* give up the mm reference when the context is about to be destroyed */
+void spu_forget(struct spu_context *ctx)
+{
+ struct mm_struct *mm;
+
+ /*
+ * This is basically an open-coded spu_acquire_saved, except that
+ * we don't acquire the state mutex interruptible, and we don't
+ * want this context to be rescheduled on release.
+ */
+ mutex_lock(&ctx->state_mutex);
+ if (ctx->state != SPU_STATE_SAVED)
+ spu_deactivate(ctx);
+
+ mm = ctx->owner;
+ ctx->owner = NULL;
+ mmput(mm);
+ spu_release(ctx);
+}
+
+void spu_unmap_mappings(struct spu_context *ctx)
+{
+ mutex_lock(&ctx->mapping_lock);
+ if (ctx->local_store)
+ unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1);
+ if (ctx->mfc)
+ unmap_mapping_range(ctx->mfc, 0, SPUFS_MFC_MAP_SIZE, 1);
+ if (ctx->cntl)
+ unmap_mapping_range(ctx->cntl, 0, SPUFS_CNTL_MAP_SIZE, 1);
+ if (ctx->signal1)
+ unmap_mapping_range(ctx->signal1, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
+ if (ctx->signal2)
+ unmap_mapping_range(ctx->signal2, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
+ if (ctx->mss)
+ unmap_mapping_range(ctx->mss, 0, SPUFS_MSS_MAP_SIZE, 1);
+ if (ctx->psmap)
+ unmap_mapping_range(ctx->psmap, 0, SPUFS_PS_MAP_SIZE, 1);
+ mutex_unlock(&ctx->mapping_lock);
+}
+
+/**
+ * spu_acquire_saved - lock spu contex and make sure it is in saved state
+ * @ctx: spu contex to lock
+ */
+int spu_acquire_saved(struct spu_context *ctx)
+{
+ int ret;
+
+ spu_context_nospu_trace(spu_acquire_saved__enter, ctx);
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+
+ if (ctx->state != SPU_STATE_SAVED) {
+ set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags);
+ spu_deactivate(ctx);
+ }
+
+ return 0;
+}
+
+/**
+ * spu_release_saved - unlock spu context and return it to the runqueue
+ * @ctx: context to unlock
+ */
+void spu_release_saved(struct spu_context *ctx)
+{
+ BUG_ON(ctx->state != SPU_STATE_SAVED);
+
+ if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags) &&
+ test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+ spu_activate(ctx, 0);
+
+ spu_release(ctx);
+}
+
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/coredump.c b/kernel/arch/powerpc/platforms/cell/spufs/coredump.c
new file mode 100644
index 000000000..be6212ddb
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -0,0 +1,211 @@
+/*
+ * SPU core dump code
+ *
+ * (C) Copyright 2006 IBM Corp.
+ *
+ * Author: Dwayne Grant McConnell <decimal@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/elf.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+#include <linux/coredump.h>
+#include <linux/binfmts.h>
+
+#include <asm/uaccess.h>
+
+#include "spufs.h"
+
+static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
+ size_t size, loff_t *off)
+{
+ u64 data;
+ int ret;
+
+ if (spufs_coredump_read[num].read)
+ return spufs_coredump_read[num].read(ctx, buffer, size, off);
+
+ data = spufs_coredump_read[num].get(ctx);
+ ret = snprintf(buffer, size, "0x%.16llx", data);
+ if (ret >= size)
+ return size;
+ return ++ret; /* count trailing NULL */
+}
+
+static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
+{
+ int i, sz, total = 0;
+ char *name;
+ char fullname[80];
+
+ for (i = 0; spufs_coredump_read[i].name != NULL; i++) {
+ name = spufs_coredump_read[i].name;
+ sz = spufs_coredump_read[i].size;
+
+ sprintf(fullname, "SPU/%d/%s", dfd, name);
+
+ total += sizeof(struct elf_note);
+ total += roundup(strlen(fullname) + 1, 4);
+ total += roundup(sz, 4);
+ }
+
+ return total;
+}
+
+static int match_context(const void *v, struct file *file, unsigned fd)
+{
+ struct spu_context *ctx;
+ if (file->f_op != &spufs_context_fops)
+ return 0;
+ ctx = SPUFS_I(file_inode(file))->i_ctx;
+ if (ctx->flags & SPU_CREATE_NOSCHED)
+ return 0;
+ return fd + 1;
+}
+
+/*
+ * The additional architecture-specific notes for Cell are various
+ * context files in the spu context.
+ *
+ * This function iterates over all open file descriptors and sees
+ * if they are a directory in spufs. In that case we use spufs
+ * internal functionality to dump them without needing to actually
+ * open the files.
+ */
+/*
+ * descriptor table is not shared, so files can't change or go away.
+ */
+static struct spu_context *coredump_next_context(int *fd)
+{
+ struct file *file;
+ int n = iterate_fd(current->files, *fd, match_context, NULL);
+ if (!n)
+ return NULL;
+ *fd = n - 1;
+ file = fcheck(*fd);
+ return SPUFS_I(file_inode(file))->i_ctx;
+}
+
+int spufs_coredump_extra_notes_size(void)
+{
+ struct spu_context *ctx;
+ int size = 0, rc, fd;
+
+ fd = 0;
+ while ((ctx = coredump_next_context(&fd)) != NULL) {
+ rc = spu_acquire_saved(ctx);
+ if (rc)
+ break;
+ rc = spufs_ctx_note_size(ctx, fd);
+ spu_release_saved(ctx);
+ if (rc < 0)
+ break;
+
+ size += rc;
+
+ /* start searching the next fd next time */
+ fd++;
+ }
+
+ return size;
+}
+
+static int spufs_arch_write_note(struct spu_context *ctx, int i,
+ struct coredump_params *cprm, int dfd)
+{
+ loff_t pos = 0;
+ int sz, rc, total = 0;
+ const int bufsz = PAGE_SIZE;
+ char *name;
+ char fullname[80], *buf;
+ struct elf_note en;
+
+ buf = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ name = spufs_coredump_read[i].name;
+ sz = spufs_coredump_read[i].size;
+
+ sprintf(fullname, "SPU/%d/%s", dfd, name);
+ en.n_namesz = strlen(fullname) + 1;
+ en.n_descsz = sz;
+ en.n_type = NT_SPU;
+
+ if (!dump_emit(cprm, &en, sizeof(en)))
+ goto Eio;
+
+ if (!dump_emit(cprm, fullname, en.n_namesz))
+ goto Eio;
+
+ if (!dump_align(cprm, 4))
+ goto Eio;
+
+ do {
+ rc = do_coredump_read(i, ctx, buf, bufsz, &pos);
+ if (rc > 0) {
+ if (!dump_emit(cprm, buf, rc))
+ goto Eio;
+ total += rc;
+ }
+ } while (rc == bufsz && total < sz);
+
+ if (rc < 0)
+ goto out;
+
+ if (!dump_skip(cprm,
+ roundup(cprm->written - total + sz, 4) - cprm->written))
+ goto Eio;
+out:
+ free_page((unsigned long)buf);
+ return rc;
+Eio:
+ free_page((unsigned long)buf);
+ return -EIO;
+}
+
+int spufs_coredump_extra_notes_write(struct coredump_params *cprm)
+{
+ struct spu_context *ctx;
+ int fd, j, rc;
+
+ fd = 0;
+ while ((ctx = coredump_next_context(&fd)) != NULL) {
+ rc = spu_acquire_saved(ctx);
+ if (rc)
+ return rc;
+
+ for (j = 0; spufs_coredump_read[j].name != NULL; j++) {
+ rc = spufs_arch_write_note(ctx, j, cprm, fd);
+ if (rc) {
+ spu_release_saved(ctx);
+ return rc;
+ }
+ }
+
+ spu_release_saved(ctx);
+
+ /* start searching the next fd next time */
+ fd++;
+ }
+
+ return 0;
+}
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/fault.c b/kernel/arch/powerpc/platforms/cell/spufs/fault.c
new file mode 100644
index 000000000..d98f845ac
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/fault.c
@@ -0,0 +1,191 @@
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+#include "spufs.h"
+
+/**
+ * Handle an SPE event, depending on context SPU_CREATE_EVENTS_ENABLED flag.
+ *
+ * If the context was created with events, we just set the return event.
+ * Otherwise, send an appropriate signal to the process.
+ */
+static void spufs_handle_event(struct spu_context *ctx,
+ unsigned long ea, int type)
+{
+ siginfo_t info;
+
+ if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
+ ctx->event_return |= type;
+ wake_up_all(&ctx->stop_wq);
+ return;
+ }
+
+ memset(&info, 0, sizeof(info));
+
+ switch (type) {
+ case SPE_EVENT_INVALID_DMA:
+ info.si_signo = SIGBUS;
+ info.si_code = BUS_OBJERR;
+ break;
+ case SPE_EVENT_SPE_DATA_STORAGE:
+ info.si_signo = SIGSEGV;
+ info.si_addr = (void __user *)ea;
+ info.si_code = SEGV_ACCERR;
+ ctx->ops->restart_dma(ctx);
+ break;
+ case SPE_EVENT_DMA_ALIGNMENT:
+ info.si_signo = SIGBUS;
+ /* DAR isn't set for an alignment fault :( */
+ info.si_code = BUS_ADRALN;
+ break;
+ case SPE_EVENT_SPE_ERROR:
+ info.si_signo = SIGILL;
+ info.si_addr = (void __user *)(unsigned long)
+ ctx->ops->npc_read(ctx) - 4;
+ info.si_code = ILL_ILLOPC;
+ break;
+ }
+
+ if (info.si_signo)
+ force_sig_info(info.si_signo, &info, current);
+}
+
+int spufs_handle_class0(struct spu_context *ctx)
+{
+ unsigned long stat = ctx->csa.class_0_pending & CLASS0_INTR_MASK;
+
+ if (likely(!stat))
+ return 0;
+
+ if (stat & CLASS0_DMA_ALIGNMENT_INTR)
+ spufs_handle_event(ctx, ctx->csa.class_0_dar,
+ SPE_EVENT_DMA_ALIGNMENT);
+
+ if (stat & CLASS0_INVALID_DMA_COMMAND_INTR)
+ spufs_handle_event(ctx, ctx->csa.class_0_dar,
+ SPE_EVENT_INVALID_DMA);
+
+ if (stat & CLASS0_SPU_ERROR_INTR)
+ spufs_handle_event(ctx, ctx->csa.class_0_dar,
+ SPE_EVENT_SPE_ERROR);
+
+ ctx->csa.class_0_pending = 0;
+
+ return -EIO;
+}
+
+/*
+ * bottom half handler for page faults, we can't do this from
+ * interrupt context, since we might need to sleep.
+ * we also need to give up the mutex so we can get scheduled
+ * out while waiting for the backing store.
+ *
+ * TODO: try calling hash_page from the interrupt handler first
+ * in order to speed up the easy case.
+ */
+int spufs_handle_class1(struct spu_context *ctx)
+{
+ u64 ea, dsisr, access;
+ unsigned long flags;
+ unsigned flt = 0;
+ int ret;
+
+ /*
+ * dar and dsisr get passed from the registers
+ * to the spu_context, to this function, but not
+ * back to the spu if it gets scheduled again.
+ *
+ * if we don't handle the fault for a saved context
+ * in time, we can still expect to get the same fault
+ * the immediately after the context restore.
+ */
+ ea = ctx->csa.class_1_dar;
+ dsisr = ctx->csa.class_1_dsisr;
+
+ if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
+ return 0;
+
+ spuctx_switch_state(ctx, SPU_UTIL_IOWAIT);
+
+ pr_debug("ctx %p: ea %016llx, dsisr %016llx state %d\n", ctx, ea,
+ dsisr, ctx->state);
+
+ ctx->stats.hash_flt++;
+ if (ctx->state == SPU_STATE_RUNNABLE)
+ ctx->spu->stats.hash_flt++;
+
+ /* we must not hold the lock when entering copro_handle_mm_fault */
+ spu_release(ctx);
+
+ access = (_PAGE_PRESENT | _PAGE_USER);
+ access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+ local_irq_save(flags);
+ ret = hash_page(ea, access, 0x300, dsisr);
+ local_irq_restore(flags);
+
+ /* hashing failed, so try the actual fault handler */
+ if (ret)
+ ret = copro_handle_mm_fault(current->mm, ea, dsisr, &flt);
+
+ /*
+ * This is nasty: we need the state_mutex for all the bookkeeping even
+ * if the syscall was interrupted by a signal. ewww.
+ */
+ mutex_lock(&ctx->state_mutex);
+
+ /*
+ * Clear dsisr under ctxt lock after handling the fault, so that
+ * time slicing will not preempt the context while the page fault
+ * handler is running. Context switch code removes mappings.
+ */
+ ctx->csa.class_1_dar = ctx->csa.class_1_dsisr = 0;
+
+ /*
+ * If we handled the fault successfully and are in runnable
+ * state, restart the DMA.
+ * In case of unhandled error report the problem to user space.
+ */
+ if (!ret) {
+ if (flt & VM_FAULT_MAJOR)
+ ctx->stats.maj_flt++;
+ else
+ ctx->stats.min_flt++;
+ if (ctx->state == SPU_STATE_RUNNABLE) {
+ if (flt & VM_FAULT_MAJOR)
+ ctx->spu->stats.maj_flt++;
+ else
+ ctx->spu->stats.min_flt++;
+ }
+
+ if (ctx->spu)
+ ctx->ops->restart_dma(ctx);
+ } else
+ spufs_handle_event(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
+
+ spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+ return ret;
+}
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/file.c b/kernel/arch/powerpc/platforms/cell/spufs/file.c
new file mode 100644
index 000000000..d966bbe58
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/file.c
@@ -0,0 +1,2771 @@
+/*
+ * SPU file system -- file contents
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/fs.h>
+#include <linux/ioctl.h>
+#include <linux/export.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+#include <linux/ptrace.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+#include <asm/io.h>
+#include <asm/time.h>
+#include <asm/spu.h>
+#include <asm/spu_info.h>
+#include <asm/uaccess.h>
+
+#include "spufs.h"
+#include "sputrace.h"
+
+#define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000)
+
+/* Simple attribute files */
+struct spufs_attr {
+ int (*get)(void *, u64 *);
+ int (*set)(void *, u64);
+ char get_buf[24]; /* enough to store a u64 and "\n\0" */
+ char set_buf[24];
+ void *data;
+ const char *fmt; /* format for read operation */
+ struct mutex mutex; /* protects access to these buffers */
+};
+
+static int spufs_attr_open(struct inode *inode, struct file *file,
+ int (*get)(void *, u64 *), int (*set)(void *, u64),
+ const char *fmt)
+{
+ struct spufs_attr *attr;
+
+ attr = kmalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ return -ENOMEM;
+
+ attr->get = get;
+ attr->set = set;
+ attr->data = inode->i_private;
+ attr->fmt = fmt;
+ mutex_init(&attr->mutex);
+ file->private_data = attr;
+
+ return nonseekable_open(inode, file);
+}
+
+static int spufs_attr_release(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
+
+static ssize_t spufs_attr_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct spufs_attr *attr;
+ size_t size;
+ ssize_t ret;
+
+ attr = file->private_data;
+ if (!attr->get)
+ return -EACCES;
+
+ ret = mutex_lock_interruptible(&attr->mutex);
+ if (ret)
+ return ret;
+
+ if (*ppos) { /* continued read */
+ size = strlen(attr->get_buf);
+ } else { /* first read */
+ u64 val;
+ ret = attr->get(attr->data, &val);
+ if (ret)
+ goto out;
+
+ size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
+ attr->fmt, (unsigned long long)val);
+ }
+
+ ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
+out:
+ mutex_unlock(&attr->mutex);
+ return ret;
+}
+
+static ssize_t spufs_attr_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct spufs_attr *attr;
+ u64 val;
+ size_t size;
+ ssize_t ret;
+
+ attr = file->private_data;
+ if (!attr->set)
+ return -EACCES;
+
+ ret = mutex_lock_interruptible(&attr->mutex);
+ if (ret)
+ return ret;
+
+ ret = -EFAULT;
+ size = min(sizeof(attr->set_buf) - 1, len);
+ if (copy_from_user(attr->set_buf, buf, size))
+ goto out;
+
+ ret = len; /* claim we got the whole input */
+ attr->set_buf[size] = '\0';
+ val = simple_strtol(attr->set_buf, NULL, 0);
+ attr->set(attr->data, val);
+out:
+ mutex_unlock(&attr->mutex);
+ return ret;
+}
+
+#define DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \
+static int __fops ## _open(struct inode *inode, struct file *file) \
+{ \
+ __simple_attr_check_format(__fmt, 0ull); \
+ return spufs_attr_open(inode, file, __get, __set, __fmt); \
+} \
+static const struct file_operations __fops = { \
+ .open = __fops ## _open, \
+ .release = spufs_attr_release, \
+ .read = spufs_attr_read, \
+ .write = spufs_attr_write, \
+ .llseek = generic_file_llseek, \
+};
+
+
+static int
+spufs_mem_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = ctx;
+ if (!i->i_openers++)
+ ctx->local_store = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static int
+spufs_mem_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->local_store = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static ssize_t
+__spufs_mem_read(struct spu_context *ctx, char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ char *local_store = ctx->ops->get_ls(ctx);
+ return simple_read_from_buffer(buffer, size, pos, local_store,
+ LS_SIZE);
+}
+
+static ssize_t
+spufs_mem_read(struct file *file, char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ ssize_t ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ret = __spufs_mem_read(ctx, buffer, size, pos);
+ spu_release(ctx);
+
+ return ret;
+}
+
+static ssize_t
+spufs_mem_write(struct file *file, const char __user *buffer,
+ size_t size, loff_t *ppos)
+{
+ struct spu_context *ctx = file->private_data;
+ char *local_store;
+ loff_t pos = *ppos;
+ int ret;
+
+ if (pos > LS_SIZE)
+ return -EFBIG;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+
+ local_store = ctx->ops->get_ls(ctx);
+ size = simple_write_to_buffer(local_store, LS_SIZE, ppos, buffer, size);
+ spu_release(ctx);
+
+ return size;
+}
+
+static int
+spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct spu_context *ctx = vma->vm_file->private_data;
+ unsigned long address = (unsigned long)vmf->virtual_address;
+ unsigned long pfn, offset;
+
+#ifdef CONFIG_SPU_FS_64K_LS
+ struct spu_state *csa = &ctx->csa;
+ int psize;
+
+ /* Check what page size we are using */
+ psize = get_slice_psize(vma->vm_mm, address);
+
+ /* Some sanity checking */
+ BUG_ON(csa->use_big_pages != (psize == MMU_PAGE_64K));
+
+ /* Wow, 64K, cool, we need to align the address though */
+ if (csa->use_big_pages) {
+ BUG_ON(vma->vm_start & 0xffff);
+ address &= ~0xfffful;
+ }
+#endif /* CONFIG_SPU_FS_64K_LS */
+
+ offset = vmf->pgoff << PAGE_SHIFT;
+ if (offset >= LS_SIZE)
+ return VM_FAULT_SIGBUS;
+
+ pr_debug("spufs_mem_mmap_fault address=0x%lx, offset=0x%lx\n",
+ address, offset);
+
+ if (spu_acquire(ctx))
+ return VM_FAULT_NOPAGE;
+
+ if (ctx->state == SPU_STATE_SAVED) {
+ vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+ pfn = vmalloc_to_pfn(ctx->csa.lscsa->ls + offset);
+ } else {
+ vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+ pfn = (ctx->spu->local_store_phys + offset) >> PAGE_SHIFT;
+ }
+ vm_insert_pfn(vma, address, pfn);
+
+ spu_release(ctx);
+
+ return VM_FAULT_NOPAGE;
+}
+
+static int spufs_mem_mmap_access(struct vm_area_struct *vma,
+ unsigned long address,
+ void *buf, int len, int write)
+{
+ struct spu_context *ctx = vma->vm_file->private_data;
+ unsigned long offset = address - vma->vm_start;
+ char *local_store;
+
+ if (write && !(vma->vm_flags & VM_WRITE))
+ return -EACCES;
+ if (spu_acquire(ctx))
+ return -EINTR;
+ if ((offset + len) > vma->vm_end)
+ len = vma->vm_end - offset;
+ local_store = ctx->ops->get_ls(ctx);
+ if (write)
+ memcpy_toio(local_store + offset, buf, len);
+ else
+ memcpy_fromio(buf, local_store + offset, len);
+ spu_release(ctx);
+ return len;
+}
+
+static const struct vm_operations_struct spufs_mem_mmap_vmops = {
+ .fault = spufs_mem_mmap_fault,
+ .access = spufs_mem_mmap_access,
+};
+
+static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+#ifdef CONFIG_SPU_FS_64K_LS
+ struct spu_context *ctx = file->private_data;
+ struct spu_state *csa = &ctx->csa;
+
+ /* Sanity check VMA alignment */
+ if (csa->use_big_pages) {
+ pr_debug("spufs_mem_mmap 64K, start=0x%lx, end=0x%lx,"
+ " pgoff=0x%lx\n", vma->vm_start, vma->vm_end,
+ vma->vm_pgoff);
+ if (vma->vm_start & 0xffff)
+ return -EINVAL;
+ if (vma->vm_pgoff & 0xf)
+ return -EINVAL;
+ }
+#endif /* CONFIG_SPU_FS_64K_LS */
+
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_mem_mmap_vmops;
+ return 0;
+}
+
+#ifdef CONFIG_SPU_FS_64K_LS
+static unsigned long spufs_get_unmapped_area(struct file *file,
+ unsigned long addr, unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ struct spu_context *ctx = file->private_data;
+ struct spu_state *csa = &ctx->csa;
+
+ /* If not using big pages, fallback to normal MM g_u_a */
+ if (!csa->use_big_pages)
+ return current->mm->get_unmapped_area(file, addr, len,
+ pgoff, flags);
+
+ /* Else, try to obtain a 64K pages slice */
+ return slice_get_unmapped_area(addr, len, flags,
+ MMU_PAGE_64K, 1);
+}
+#endif /* CONFIG_SPU_FS_64K_LS */
+
+static const struct file_operations spufs_mem_fops = {
+ .open = spufs_mem_open,
+ .release = spufs_mem_release,
+ .read = spufs_mem_read,
+ .write = spufs_mem_write,
+ .llseek = generic_file_llseek,
+ .mmap = spufs_mem_mmap,
+#ifdef CONFIG_SPU_FS_64K_LS
+ .get_unmapped_area = spufs_get_unmapped_area,
+#endif
+};
+
+static int spufs_ps_fault(struct vm_area_struct *vma,
+ struct vm_fault *vmf,
+ unsigned long ps_offs,
+ unsigned long ps_size)
+{
+ struct spu_context *ctx = vma->vm_file->private_data;
+ unsigned long area, offset = vmf->pgoff << PAGE_SHIFT;
+ int ret = 0;
+
+ spu_context_nospu_trace(spufs_ps_fault__enter, ctx);
+
+ if (offset >= ps_size)
+ return VM_FAULT_SIGBUS;
+
+ if (fatal_signal_pending(current))
+ return VM_FAULT_SIGBUS;
+
+ /*
+ * Because we release the mmap_sem, the context may be destroyed while
+ * we're in spu_wait. Grab an extra reference so it isn't destroyed
+ * in the meantime.
+ */
+ get_spu_context(ctx);
+
+ /*
+ * We have to wait for context to be loaded before we have
+ * pages to hand out to the user, but we don't want to wait
+ * with the mmap_sem held.
+ * It is possible to drop the mmap_sem here, but then we need
+ * to return VM_FAULT_NOPAGE because the mappings may have
+ * hanged.
+ */
+ if (spu_acquire(ctx))
+ goto refault;
+
+ if (ctx->state == SPU_STATE_SAVED) {
+ up_read(&current->mm->mmap_sem);
+ spu_context_nospu_trace(spufs_ps_fault__sleep, ctx);
+ ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+ spu_context_trace(spufs_ps_fault__wake, ctx, ctx->spu);
+ down_read(&current->mm->mmap_sem);
+ } else {
+ area = ctx->spu->problem_phys + ps_offs;
+ vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
+ (area + offset) >> PAGE_SHIFT);
+ spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu);
+ }
+
+ if (!ret)
+ spu_release(ctx);
+
+refault:
+ put_spu_context(ctx);
+ return VM_FAULT_NOPAGE;
+}
+
+#if SPUFS_MMAP_4K
+static int spufs_cntl_mmap_fault(struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+{
+ return spufs_ps_fault(vma, vmf, 0x4000, SPUFS_CNTL_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_cntl_mmap_vmops = {
+ .fault = spufs_cntl_mmap_fault,
+};
+
+/*
+ * mmap support for problem state control area [0x4000 - 0x4fff].
+ */
+static int spufs_cntl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_cntl_mmap_vmops;
+ return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_cntl_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_cntl_get(void *data, u64 *val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ *val = ctx->ops->status_read(ctx);
+ spu_release(ctx);
+
+ return 0;
+}
+
+static int spufs_cntl_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->runcntl_write(ctx, val);
+ spu_release(ctx);
+
+ return 0;
+}
+
+static int spufs_cntl_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = ctx;
+ if (!i->i_openers++)
+ ctx->cntl = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return simple_attr_open(inode, file, spufs_cntl_get,
+ spufs_cntl_set, "0x%08lx");
+}
+
+static int
+spufs_cntl_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ simple_attr_release(inode, file);
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->cntl = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static const struct file_operations spufs_cntl_fops = {
+ .open = spufs_cntl_open,
+ .release = spufs_cntl_release,
+ .read = simple_attr_read,
+ .write = simple_attr_write,
+ .llseek = generic_file_llseek,
+ .mmap = spufs_cntl_mmap,
+};
+
+static int
+spufs_regs_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ file->private_data = i->i_ctx;
+ return 0;
+}
+
+static ssize_t
+__spufs_regs_read(struct spu_context *ctx, char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ return simple_read_from_buffer(buffer, size, pos,
+ lscsa->gprs, sizeof lscsa->gprs);
+}
+
+static ssize_t
+spufs_regs_read(struct file *file, char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ int ret;
+ struct spu_context *ctx = file->private_data;
+
+ /* pre-check for file position: if we'd return EOF, there's no point
+ * causing a deschedule */
+ if (*pos >= sizeof(ctx->csa.lscsa->gprs))
+ return 0;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ ret = __spufs_regs_read(ctx, buffer, size, pos);
+ spu_release_saved(ctx);
+ return ret;
+}
+
+static ssize_t
+spufs_regs_write(struct file *file, const char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ int ret;
+
+ if (*pos >= sizeof(lscsa->gprs))
+ return -EFBIG;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+
+ size = simple_write_to_buffer(lscsa->gprs, sizeof(lscsa->gprs), pos,
+ buffer, size);
+
+ spu_release_saved(ctx);
+ return size;
+}
+
+static const struct file_operations spufs_regs_fops = {
+ .open = spufs_regs_open,
+ .read = spufs_regs_read,
+ .write = spufs_regs_write,
+ .llseek = generic_file_llseek,
+};
+
+static ssize_t
+__spufs_fpcr_read(struct spu_context *ctx, char __user * buffer,
+ size_t size, loff_t * pos)
+{
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ return simple_read_from_buffer(buffer, size, pos,
+ &lscsa->fpcr, sizeof(lscsa->fpcr));
+}
+
+static ssize_t
+spufs_fpcr_read(struct file *file, char __user * buffer,
+ size_t size, loff_t * pos)
+{
+ int ret;
+ struct spu_context *ctx = file->private_data;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ ret = __spufs_fpcr_read(ctx, buffer, size, pos);
+ spu_release_saved(ctx);
+ return ret;
+}
+
+static ssize_t
+spufs_fpcr_write(struct file *file, const char __user * buffer,
+ size_t size, loff_t * pos)
+{
+ struct spu_context *ctx = file->private_data;
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ int ret;
+
+ if (*pos >= sizeof(lscsa->fpcr))
+ return -EFBIG;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+
+ size = simple_write_to_buffer(&lscsa->fpcr, sizeof(lscsa->fpcr), pos,
+ buffer, size);
+
+ spu_release_saved(ctx);
+ return size;
+}
+
+static const struct file_operations spufs_fpcr_fops = {
+ .open = spufs_regs_open,
+ .read = spufs_fpcr_read,
+ .write = spufs_fpcr_write,
+ .llseek = generic_file_llseek,
+};
+
+/* generic open function for all pipe-like files */
+static int spufs_pipe_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ file->private_data = i->i_ctx;
+
+ return nonseekable_open(inode, file);
+}
+
+/*
+ * Read as many bytes from the mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ */
+static ssize_t spufs_mbox_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ u32 mbox_data, __user *udata;
+ ssize_t count;
+
+ if (len < 4)
+ return -EINVAL;
+
+ if (!access_ok(VERIFY_WRITE, buf, len))
+ return -EFAULT;
+
+ udata = (void __user *)buf;
+
+ count = spu_acquire(ctx);
+ if (count)
+ return count;
+
+ for (count = 0; (count + 4) <= len; count += 4, udata++) {
+ int ret;
+ ret = ctx->ops->mbox_read(ctx, &mbox_data);
+ if (ret == 0)
+ break;
+
+ /*
+ * at the end of the mapped area, we can fault
+ * but still need to return the data we have
+ * read successfully so far.
+ */
+ ret = __put_user(mbox_data, udata);
+ if (ret) {
+ if (!count)
+ count = -EFAULT;
+ break;
+ }
+ }
+ spu_release(ctx);
+
+ if (!count)
+ count = -EAGAIN;
+
+ return count;
+}
+
+static const struct file_operations spufs_mbox_fops = {
+ .open = spufs_pipe_open,
+ .read = spufs_mbox_read,
+ .llseek = no_llseek,
+};
+
+static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ ssize_t ret;
+ u32 mbox_stat;
+
+ if (len < 4)
+ return -EINVAL;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+
+ mbox_stat = ctx->ops->mbox_stat_read(ctx) & 0xff;
+
+ spu_release(ctx);
+
+ if (copy_to_user(buf, &mbox_stat, sizeof mbox_stat))
+ return -EFAULT;
+
+ return 4;
+}
+
+static const struct file_operations spufs_mbox_stat_fops = {
+ .open = spufs_pipe_open,
+ .read = spufs_mbox_stat_read,
+ .llseek = no_llseek,
+};
+
+/* low-level ibox access function */
+size_t spu_ibox_read(struct spu_context *ctx, u32 *data)
+{
+ return ctx->ops->ibox_read(ctx, data);
+}
+
+static int spufs_ibox_fasync(int fd, struct file *file, int on)
+{
+ struct spu_context *ctx = file->private_data;
+
+ return fasync_helper(fd, file, on, &ctx->ibox_fasync);
+}
+
+/* interrupt-level ibox callback function. */
+void spufs_ibox_callback(struct spu *spu)
+{
+ struct spu_context *ctx = spu->ctx;
+
+ if (!ctx)
+ return;
+
+ wake_up_all(&ctx->ibox_wq);
+ kill_fasync(&ctx->ibox_fasync, SIGIO, POLLIN);
+}
+
+/*
+ * Read as many bytes from the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * any data is available, but return when we have been able to
+ * read something.
+ */
+static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ u32 ibox_data, __user *udata;
+ ssize_t count;
+
+ if (len < 4)
+ return -EINVAL;
+
+ if (!access_ok(VERIFY_WRITE, buf, len))
+ return -EFAULT;
+
+ udata = (void __user *)buf;
+
+ count = spu_acquire(ctx);
+ if (count)
+ goto out;
+
+ /* wait only for the first element */
+ count = 0;
+ if (file->f_flags & O_NONBLOCK) {
+ if (!spu_ibox_read(ctx, &ibox_data)) {
+ count = -EAGAIN;
+ goto out_unlock;
+ }
+ } else {
+ count = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data));
+ if (count)
+ goto out;
+ }
+
+ /* if we can't write at all, return -EFAULT */
+ count = __put_user(ibox_data, udata);
+ if (count)
+ goto out_unlock;
+
+ for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+ int ret;
+ ret = ctx->ops->ibox_read(ctx, &ibox_data);
+ if (ret == 0)
+ break;
+ /*
+ * at the end of the mapped area, we can fault
+ * but still need to return the data we have
+ * read successfully so far.
+ */
+ ret = __put_user(ibox_data, udata);
+ if (ret)
+ break;
+ }
+
+out_unlock:
+ spu_release(ctx);
+out:
+ return count;
+}
+
+static unsigned int spufs_ibox_poll(struct file *file, poll_table *wait)
+{
+ struct spu_context *ctx = file->private_data;
+ unsigned int mask;
+
+ poll_wait(file, &ctx->ibox_wq, wait);
+
+ /*
+ * For now keep this uninterruptible and also ignore the rule
+ * that poll should not sleep. Will be fixed later.
+ */
+ mutex_lock(&ctx->state_mutex);
+ mask = ctx->ops->mbox_stat_poll(ctx, POLLIN | POLLRDNORM);
+ spu_release(ctx);
+
+ return mask;
+}
+
+static const struct file_operations spufs_ibox_fops = {
+ .open = spufs_pipe_open,
+ .read = spufs_ibox_read,
+ .poll = spufs_ibox_poll,
+ .fasync = spufs_ibox_fasync,
+ .llseek = no_llseek,
+};
+
+static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ ssize_t ret;
+ u32 ibox_stat;
+
+ if (len < 4)
+ return -EINVAL;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ibox_stat = (ctx->ops->mbox_stat_read(ctx) >> 16) & 0xff;
+ spu_release(ctx);
+
+ if (copy_to_user(buf, &ibox_stat, sizeof ibox_stat))
+ return -EFAULT;
+
+ return 4;
+}
+
+static const struct file_operations spufs_ibox_stat_fops = {
+ .open = spufs_pipe_open,
+ .read = spufs_ibox_stat_read,
+ .llseek = no_llseek,
+};
+
+/* low-level mailbox write */
+size_t spu_wbox_write(struct spu_context *ctx, u32 data)
+{
+ return ctx->ops->wbox_write(ctx, data);
+}
+
+static int spufs_wbox_fasync(int fd, struct file *file, int on)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret;
+
+ ret = fasync_helper(fd, file, on, &ctx->wbox_fasync);
+
+ return ret;
+}
+
+/* interrupt-level wbox callback function. */
+void spufs_wbox_callback(struct spu *spu)
+{
+ struct spu_context *ctx = spu->ctx;
+
+ if (!ctx)
+ return;
+
+ wake_up_all(&ctx->wbox_wq);
+ kill_fasync(&ctx->wbox_fasync, SIGIO, POLLOUT);
+}
+
+/*
+ * Write as many bytes to the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - the mailbox is full
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * space is availabyl, but return when we have been able to
+ * write something.
+ */
+static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ u32 wbox_data, __user *udata;
+ ssize_t count;
+
+ if (len < 4)
+ return -EINVAL;
+
+ udata = (void __user *)buf;
+ if (!access_ok(VERIFY_READ, buf, len))
+ return -EFAULT;
+
+ if (__get_user(wbox_data, udata))
+ return -EFAULT;
+
+ count = spu_acquire(ctx);
+ if (count)
+ goto out;
+
+ /*
+ * make sure we can at least write one element, by waiting
+ * in case of !O_NONBLOCK
+ */
+ count = 0;
+ if (file->f_flags & O_NONBLOCK) {
+ if (!spu_wbox_write(ctx, wbox_data)) {
+ count = -EAGAIN;
+ goto out_unlock;
+ }
+ } else {
+ count = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data));
+ if (count)
+ goto out;
+ }
+
+
+ /* write as much as possible */
+ for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+ int ret;
+ ret = __get_user(wbox_data, udata);
+ if (ret)
+ break;
+
+ ret = spu_wbox_write(ctx, wbox_data);
+ if (ret == 0)
+ break;
+ }
+
+out_unlock:
+ spu_release(ctx);
+out:
+ return count;
+}
+
+static unsigned int spufs_wbox_poll(struct file *file, poll_table *wait)
+{
+ struct spu_context *ctx = file->private_data;
+ unsigned int mask;
+
+ poll_wait(file, &ctx->wbox_wq, wait);
+
+ /*
+ * For now keep this uninterruptible and also ignore the rule
+ * that poll should not sleep. Will be fixed later.
+ */
+ mutex_lock(&ctx->state_mutex);
+ mask = ctx->ops->mbox_stat_poll(ctx, POLLOUT | POLLWRNORM);
+ spu_release(ctx);
+
+ return mask;
+}
+
+static const struct file_operations spufs_wbox_fops = {
+ .open = spufs_pipe_open,
+ .write = spufs_wbox_write,
+ .poll = spufs_wbox_poll,
+ .fasync = spufs_wbox_fasync,
+ .llseek = no_llseek,
+};
+
+static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ ssize_t ret;
+ u32 wbox_stat;
+
+ if (len < 4)
+ return -EINVAL;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ wbox_stat = (ctx->ops->mbox_stat_read(ctx) >> 8) & 0xff;
+ spu_release(ctx);
+
+ if (copy_to_user(buf, &wbox_stat, sizeof wbox_stat))
+ return -EFAULT;
+
+ return 4;
+}
+
+static const struct file_operations spufs_wbox_stat_fops = {
+ .open = spufs_pipe_open,
+ .read = spufs_wbox_stat_read,
+ .llseek = no_llseek,
+};
+
+static int spufs_signal1_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = ctx;
+ if (!i->i_openers++)
+ ctx->signal1 = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return nonseekable_open(inode, file);
+}
+
+static int
+spufs_signal1_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->signal1 = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ int ret = 0;
+ u32 data;
+
+ if (len < 4)
+ return -EINVAL;
+
+ if (ctx->csa.spu_chnlcnt_RW[3]) {
+ data = ctx->csa.spu_chnldata_RW[3];
+ ret = 4;
+ }
+
+ if (!ret)
+ goto out;
+
+ if (copy_to_user(buf, &data, 4))
+ return -EFAULT;
+
+out:
+ return ret;
+}
+
+static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ int ret;
+ struct spu_context *ctx = file->private_data;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ ret = __spufs_signal1_read(ctx, buf, len, pos);
+ spu_release_saved(ctx);
+
+ return ret;
+}
+
+static ssize_t spufs_signal1_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx;
+ ssize_t ret;
+ u32 data;
+
+ ctx = file->private_data;
+
+ if (len < 4)
+ return -EINVAL;
+
+ if (copy_from_user(&data, buf, 4))
+ return -EFAULT;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->signal1_write(ctx, data);
+ spu_release(ctx);
+
+ return 4;
+}
+
+static int
+spufs_signal1_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
+ return spufs_ps_fault(vma, vmf, 0x14000, SPUFS_SIGNAL_MAP_SIZE);
+#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
+ /* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+ * signal 1 and 2 area
+ */
+ return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+#else
+#error unsupported page size
+#endif
+}
+
+static const struct vm_operations_struct spufs_signal1_mmap_vmops = {
+ .fault = spufs_signal1_mmap_fault,
+};
+
+static int spufs_signal1_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_signal1_mmap_vmops;
+ return 0;
+}
+
+static const struct file_operations spufs_signal1_fops = {
+ .open = spufs_signal1_open,
+ .release = spufs_signal1_release,
+ .read = spufs_signal1_read,
+ .write = spufs_signal1_write,
+ .mmap = spufs_signal1_mmap,
+ .llseek = no_llseek,
+};
+
+static const struct file_operations spufs_signal1_nosched_fops = {
+ .open = spufs_signal1_open,
+ .release = spufs_signal1_release,
+ .write = spufs_signal1_write,
+ .mmap = spufs_signal1_mmap,
+ .llseek = no_llseek,
+};
+
+static int spufs_signal2_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = ctx;
+ if (!i->i_openers++)
+ ctx->signal2 = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return nonseekable_open(inode, file);
+}
+
+static int
+spufs_signal2_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->signal2 = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ int ret = 0;
+ u32 data;
+
+ if (len < 4)
+ return -EINVAL;
+
+ if (ctx->csa.spu_chnlcnt_RW[4]) {
+ data = ctx->csa.spu_chnldata_RW[4];
+ ret = 4;
+ }
+
+ if (!ret)
+ goto out;
+
+ if (copy_to_user(buf, &data, 4))
+ return -EFAULT;
+
+out:
+ return ret;
+}
+
+static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ ret = __spufs_signal2_read(ctx, buf, len, pos);
+ spu_release_saved(ctx);
+
+ return ret;
+}
+
+static ssize_t spufs_signal2_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx;
+ ssize_t ret;
+ u32 data;
+
+ ctx = file->private_data;
+
+ if (len < 4)
+ return -EINVAL;
+
+ if (copy_from_user(&data, buf, 4))
+ return -EFAULT;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->signal2_write(ctx, data);
+ spu_release(ctx);
+
+ return 4;
+}
+
+#if SPUFS_MMAP_4K
+static int
+spufs_signal2_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
+ return spufs_ps_fault(vma, vmf, 0x1c000, SPUFS_SIGNAL_MAP_SIZE);
+#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
+ /* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+ * signal 1 and 2 area
+ */
+ return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+#else
+#error unsupported page size
+#endif
+}
+
+static const struct vm_operations_struct spufs_signal2_mmap_vmops = {
+ .fault = spufs_signal2_mmap_fault,
+};
+
+static int spufs_signal2_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_signal2_mmap_vmops;
+ return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_signal2_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static const struct file_operations spufs_signal2_fops = {
+ .open = spufs_signal2_open,
+ .release = spufs_signal2_release,
+ .read = spufs_signal2_read,
+ .write = spufs_signal2_write,
+ .mmap = spufs_signal2_mmap,
+ .llseek = no_llseek,
+};
+
+static const struct file_operations spufs_signal2_nosched_fops = {
+ .open = spufs_signal2_open,
+ .release = spufs_signal2_release,
+ .write = spufs_signal2_write,
+ .mmap = spufs_signal2_mmap,
+ .llseek = no_llseek,
+};
+
+/*
+ * This is a wrapper around DEFINE_SIMPLE_ATTRIBUTE which does the
+ * work of acquiring (or not) the SPU context before calling through
+ * to the actual get routine. The set routine is called directly.
+ */
+#define SPU_ATTR_NOACQUIRE 0
+#define SPU_ATTR_ACQUIRE 1
+#define SPU_ATTR_ACQUIRE_SAVED 2
+
+#define DEFINE_SPUFS_ATTRIBUTE(__name, __get, __set, __fmt, __acquire) \
+static int __##__get(void *data, u64 *val) \
+{ \
+ struct spu_context *ctx = data; \
+ int ret = 0; \
+ \
+ if (__acquire == SPU_ATTR_ACQUIRE) { \
+ ret = spu_acquire(ctx); \
+ if (ret) \
+ return ret; \
+ *val = __get(ctx); \
+ spu_release(ctx); \
+ } else if (__acquire == SPU_ATTR_ACQUIRE_SAVED) { \
+ ret = spu_acquire_saved(ctx); \
+ if (ret) \
+ return ret; \
+ *val = __get(ctx); \
+ spu_release_saved(ctx); \
+ } else \
+ *val = __get(ctx); \
+ \
+ return 0; \
+} \
+DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__name, __##__get, __set, __fmt);
+
+static int spufs_signal1_type_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->signal1_type_set(ctx, val);
+ spu_release(ctx);
+
+ return 0;
+}
+
+static u64 spufs_signal1_type_get(struct spu_context *ctx)
+{
+ return ctx->ops->signal1_type_get(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get,
+ spufs_signal1_type_set, "%llu\n", SPU_ATTR_ACQUIRE);
+
+
+static int spufs_signal2_type_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->signal2_type_set(ctx, val);
+ spu_release(ctx);
+
+ return 0;
+}
+
+static u64 spufs_signal2_type_get(struct spu_context *ctx)
+{
+ return ctx->ops->signal2_type_get(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get,
+ spufs_signal2_type_set, "%llu\n", SPU_ATTR_ACQUIRE);
+
+#if SPUFS_MMAP_4K
+static int
+spufs_mss_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_MSS_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_mss_mmap_vmops = {
+ .fault = spufs_mss_mmap_fault,
+};
+
+/*
+ * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
+ */
+static int spufs_mss_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_mss_mmap_vmops;
+ return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_mss_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_mss_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ file->private_data = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!i->i_openers++)
+ ctx->mss = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return nonseekable_open(inode, file);
+}
+
+static int
+spufs_mss_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->mss = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static const struct file_operations spufs_mss_fops = {
+ .open = spufs_mss_open,
+ .release = spufs_mss_release,
+ .mmap = spufs_mss_mmap,
+ .llseek = no_llseek,
+};
+
+static int
+spufs_psmap_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_PS_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_psmap_mmap_vmops = {
+ .fault = spufs_psmap_mmap_fault,
+};
+
+/*
+ * mmap support for full problem state area [0x00000 - 0x1ffff].
+ */
+static int spufs_psmap_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_psmap_mmap_vmops;
+ return 0;
+}
+
+static int spufs_psmap_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = i->i_ctx;
+ if (!i->i_openers++)
+ ctx->psmap = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return nonseekable_open(inode, file);
+}
+
+static int
+spufs_psmap_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->psmap = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static const struct file_operations spufs_psmap_fops = {
+ .open = spufs_psmap_open,
+ .release = spufs_psmap_release,
+ .mmap = spufs_psmap_mmap,
+ .llseek = no_llseek,
+};
+
+
+#if SPUFS_MMAP_4K
+static int
+spufs_mfc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ return spufs_ps_fault(vma, vmf, 0x3000, SPUFS_MFC_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_mfc_mmap_vmops = {
+ .fault = spufs_mfc_mmap_fault,
+};
+
+/*
+ * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
+ */
+static int spufs_mfc_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_mfc_mmap_vmops;
+ return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_mfc_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_mfc_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ /* we don't want to deal with DMA into other processes */
+ if (ctx->owner != current->mm)
+ return -EINVAL;
+
+ if (atomic_read(&inode->i_count) != 1)
+ return -EBUSY;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = ctx;
+ if (!i->i_openers++)
+ ctx->mfc = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return nonseekable_open(inode, file);
+}
+
+static int
+spufs_mfc_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->mfc = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+/* interrupt-level mfc callback function. */
+void spufs_mfc_callback(struct spu *spu)
+{
+ struct spu_context *ctx = spu->ctx;
+
+ if (!ctx)
+ return;
+
+ wake_up_all(&ctx->mfc_wq);
+
+ pr_debug("%s %s\n", __func__, spu->name);
+ if (ctx->mfc_fasync) {
+ u32 free_elements, tagstatus;
+ unsigned int mask;
+
+ /* no need for spu_acquire in interrupt context */
+ free_elements = ctx->ops->get_mfc_free_elements(ctx);
+ tagstatus = ctx->ops->read_mfc_tagstatus(ctx);
+
+ mask = 0;
+ if (free_elements & 0xffff)
+ mask |= POLLOUT;
+ if (tagstatus & ctx->tagwait)
+ mask |= POLLIN;
+
+ kill_fasync(&ctx->mfc_fasync, SIGIO, mask);
+ }
+}
+
+static int spufs_read_mfc_tagstatus(struct spu_context *ctx, u32 *status)
+{
+ /* See if there is one tag group is complete */
+ /* FIXME we need locking around tagwait */
+ *status = ctx->ops->read_mfc_tagstatus(ctx) & ctx->tagwait;
+ ctx->tagwait &= ~*status;
+ if (*status)
+ return 1;
+
+ /* enable interrupt waiting for any tag group,
+ may silently fail if interrupts are already enabled */
+ ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+ return 0;
+}
+
+static ssize_t spufs_mfc_read(struct file *file, char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret = -EINVAL;
+ u32 status;
+
+ if (size != 4)
+ goto out;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+
+ ret = -EINVAL;
+ if (file->f_flags & O_NONBLOCK) {
+ status = ctx->ops->read_mfc_tagstatus(ctx);
+ if (!(status & ctx->tagwait))
+ ret = -EAGAIN;
+ else
+ /* XXX(hch): shouldn't we clear ret here? */
+ ctx->tagwait &= ~status;
+ } else {
+ ret = spufs_wait(ctx->mfc_wq,
+ spufs_read_mfc_tagstatus(ctx, &status));
+ if (ret)
+ goto out;
+ }
+ spu_release(ctx);
+
+ ret = 4;
+ if (copy_to_user(buffer, &status, 4))
+ ret = -EFAULT;
+
+out:
+ return ret;
+}
+
+static int spufs_check_valid_dma(struct mfc_dma_command *cmd)
+{
+ pr_debug("queueing DMA %x %llx %x %x %x\n", cmd->lsa,
+ cmd->ea, cmd->size, cmd->tag, cmd->cmd);
+
+ switch (cmd->cmd) {
+ case MFC_PUT_CMD:
+ case MFC_PUTF_CMD:
+ case MFC_PUTB_CMD:
+ case MFC_GET_CMD:
+ case MFC_GETF_CMD:
+ case MFC_GETB_CMD:
+ break;
+ default:
+ pr_debug("invalid DMA opcode %x\n", cmd->cmd);
+ return -EIO;
+ }
+
+ if ((cmd->lsa & 0xf) != (cmd->ea &0xf)) {
+ pr_debug("invalid DMA alignment, ea %llx lsa %x\n",
+ cmd->ea, cmd->lsa);
+ return -EIO;
+ }
+
+ switch (cmd->size & 0xf) {
+ case 1:
+ break;
+ case 2:
+ if (cmd->lsa & 1)
+ goto error;
+ break;
+ case 4:
+ if (cmd->lsa & 3)
+ goto error;
+ break;
+ case 8:
+ if (cmd->lsa & 7)
+ goto error;
+ break;
+ case 0:
+ if (cmd->lsa & 15)
+ goto error;
+ break;
+ error:
+ default:
+ pr_debug("invalid DMA alignment %x for size %x\n",
+ cmd->lsa & 0xf, cmd->size);
+ return -EIO;
+ }
+
+ if (cmd->size > 16 * 1024) {
+ pr_debug("invalid DMA size %x\n", cmd->size);
+ return -EIO;
+ }
+
+ if (cmd->tag & 0xfff0) {
+ /* we reserve the higher tag numbers for kernel use */
+ pr_debug("invalid DMA tag\n");
+ return -EIO;
+ }
+
+ if (cmd->class) {
+ /* not supported in this version */
+ pr_debug("invalid DMA class\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int spu_send_mfc_command(struct spu_context *ctx,
+ struct mfc_dma_command cmd,
+ int *error)
+{
+ *error = ctx->ops->send_mfc_command(ctx, &cmd);
+ if (*error == -EAGAIN) {
+ /* wait for any tag group to complete
+ so we have space for the new command */
+ ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+ /* try again, because the queue might be
+ empty again */
+ *error = ctx->ops->send_mfc_command(ctx, &cmd);
+ if (*error == -EAGAIN)
+ return 0;
+ }
+ return 1;
+}
+
+static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ struct mfc_dma_command cmd;
+ int ret = -EINVAL;
+
+ if (size != sizeof cmd)
+ goto out;
+
+ ret = -EFAULT;
+ if (copy_from_user(&cmd, buffer, sizeof cmd))
+ goto out;
+
+ ret = spufs_check_valid_dma(&cmd);
+ if (ret)
+ goto out;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ goto out;
+
+ ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+ if (ret)
+ goto out;
+
+ if (file->f_flags & O_NONBLOCK) {
+ ret = ctx->ops->send_mfc_command(ctx, &cmd);
+ } else {
+ int status;
+ ret = spufs_wait(ctx->mfc_wq,
+ spu_send_mfc_command(ctx, cmd, &status));
+ if (ret)
+ goto out;
+ if (status)
+ ret = status;
+ }
+
+ if (ret)
+ goto out_unlock;
+
+ ctx->tagwait |= 1 << cmd.tag;
+ ret = size;
+
+out_unlock:
+ spu_release(ctx);
+out:
+ return ret;
+}
+
+static unsigned int spufs_mfc_poll(struct file *file,poll_table *wait)
+{
+ struct spu_context *ctx = file->private_data;
+ u32 free_elements, tagstatus;
+ unsigned int mask;
+
+ poll_wait(file, &ctx->mfc_wq, wait);
+
+ /*
+ * For now keep this uninterruptible and also ignore the rule
+ * that poll should not sleep. Will be fixed later.
+ */
+ mutex_lock(&ctx->state_mutex);
+ ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2);
+ free_elements = ctx->ops->get_mfc_free_elements(ctx);
+ tagstatus = ctx->ops->read_mfc_tagstatus(ctx);
+ spu_release(ctx);
+
+ mask = 0;
+ if (free_elements & 0xffff)
+ mask |= POLLOUT | POLLWRNORM;
+ if (tagstatus & ctx->tagwait)
+ mask |= POLLIN | POLLRDNORM;
+
+ pr_debug("%s: free %d tagstatus %d tagwait %d\n", __func__,
+ free_elements, tagstatus, ctx->tagwait);
+
+ return mask;
+}
+
+static int spufs_mfc_flush(struct file *file, fl_owner_t id)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ goto out;
+#if 0
+/* this currently hangs */
+ ret = spufs_wait(ctx->mfc_wq,
+ ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2));
+ if (ret)
+ goto out;
+ ret = spufs_wait(ctx->mfc_wq,
+ ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait);
+ if (ret)
+ goto out;
+#else
+ ret = 0;
+#endif
+ spu_release(ctx);
+out:
+ return ret;
+}
+
+static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ struct inode *inode = file_inode(file);
+ int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (!err) {
+ mutex_lock(&inode->i_mutex);
+ err = spufs_mfc_flush(file, NULL);
+ mutex_unlock(&inode->i_mutex);
+ }
+ return err;
+}
+
+static int spufs_mfc_fasync(int fd, struct file *file, int on)
+{
+ struct spu_context *ctx = file->private_data;
+
+ return fasync_helper(fd, file, on, &ctx->mfc_fasync);
+}
+
+static const struct file_operations spufs_mfc_fops = {
+ .open = spufs_mfc_open,
+ .release = spufs_mfc_release,
+ .read = spufs_mfc_read,
+ .write = spufs_mfc_write,
+ .poll = spufs_mfc_poll,
+ .flush = spufs_mfc_flush,
+ .fsync = spufs_mfc_fsync,
+ .fasync = spufs_mfc_fasync,
+ .mmap = spufs_mfc_mmap,
+ .llseek = no_llseek,
+};
+
+static int spufs_npc_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->npc_write(ctx, val);
+ spu_release(ctx);
+
+ return 0;
+}
+
+static u64 spufs_npc_get(struct spu_context *ctx)
+{
+ return ctx->ops->npc_read(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set,
+ "0x%llx\n", SPU_ATTR_ACQUIRE);
+
+static int spufs_decr_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ lscsa->decr.slot[0] = (u32) val;
+ spu_release_saved(ctx);
+
+ return 0;
+}
+
+static u64 spufs_decr_get(struct spu_context *ctx)
+{
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ return lscsa->decr.slot[0];
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
+ "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_decr_status_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ if (val)
+ ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
+ else
+ ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
+ spu_release_saved(ctx);
+
+ return 0;
+}
+
+static u64 spufs_decr_status_get(struct spu_context *ctx)
+{
+ if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING)
+ return SPU_DECR_STATUS_RUNNING;
+ else
+ return 0;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get,
+ spufs_decr_status_set, "0x%llx\n",
+ SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_event_mask_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ lscsa->event_mask.slot[0] = (u32) val;
+ spu_release_saved(ctx);
+
+ return 0;
+}
+
+static u64 spufs_event_mask_get(struct spu_context *ctx)
+{
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ return lscsa->event_mask.slot[0];
+}
+
+DEFINE_SPUFS_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get,
+ spufs_event_mask_set, "0x%llx\n",
+ SPU_ATTR_ACQUIRE_SAVED);
+
+static u64 spufs_event_status_get(struct spu_context *ctx)
+{
+ struct spu_state *state = &ctx->csa;
+ u64 stat;
+ stat = state->spu_chnlcnt_RW[0];
+ if (stat)
+ return state->spu_chnldata_RW[0];
+ return 0;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get,
+ NULL, "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED)
+
+static int spufs_srr0_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ lscsa->srr0.slot[0] = (u32) val;
+ spu_release_saved(ctx);
+
+ return 0;
+}
+
+static u64 spufs_srr0_get(struct spu_context *ctx)
+{
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ return lscsa->srr0.slot[0];
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set,
+ "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED)
+
+static u64 spufs_id_get(struct spu_context *ctx)
+{
+ u64 num;
+
+ if (ctx->state == SPU_STATE_RUNNABLE)
+ num = ctx->spu->number;
+ else
+ num = (unsigned int)-1;
+
+ return num;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_id_ops, spufs_id_get, NULL, "0x%llx\n",
+ SPU_ATTR_ACQUIRE)
+
+static u64 spufs_object_id_get(struct spu_context *ctx)
+{
+ /* FIXME: Should there really be no locking here? */
+ return ctx->object_id;
+}
+
+static int spufs_object_id_set(void *data, u64 id)
+{
+ struct spu_context *ctx = data;
+ ctx->object_id = id;
+
+ return 0;
+}
+
+DEFINE_SPUFS_ATTRIBUTE(spufs_object_id_ops, spufs_object_id_get,
+ spufs_object_id_set, "0x%llx\n", SPU_ATTR_NOACQUIRE);
+
+static u64 spufs_lslr_get(struct spu_context *ctx)
+{
+ return ctx->csa.priv2.spu_lslr_RW;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_lslr_ops, spufs_lslr_get, NULL, "0x%llx\n",
+ SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_info_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+ file->private_data = ctx;
+ return 0;
+}
+
+static int spufs_caps_show(struct seq_file *s, void *private)
+{
+ struct spu_context *ctx = s->private;
+
+ if (!(ctx->flags & SPU_CREATE_NOSCHED))
+ seq_puts(s, "sched\n");
+ if (!(ctx->flags & SPU_CREATE_ISOLATE))
+ seq_puts(s, "step\n");
+ return 0;
+}
+
+static int spufs_caps_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, spufs_caps_show, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_caps_fops = {
+ .open = spufs_caps_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static ssize_t __spufs_mbox_info_read(struct spu_context *ctx,
+ char __user *buf, size_t len, loff_t *pos)
+{
+ u32 data;
+
+ /* EOF if there's no entry in the mbox */
+ if (!(ctx->csa.prob.mb_stat_R & 0x0000ff))
+ return 0;
+
+ data = ctx->csa.prob.pu_mb_R;
+
+ return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+}
+
+static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ int ret;
+ struct spu_context *ctx = file->private_data;
+
+ if (!access_ok(VERIFY_WRITE, buf, len))
+ return -EFAULT;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ spin_lock(&ctx->csa.register_lock);
+ ret = __spufs_mbox_info_read(ctx, buf, len, pos);
+ spin_unlock(&ctx->csa.register_lock);
+ spu_release_saved(ctx);
+
+ return ret;
+}
+
+static const struct file_operations spufs_mbox_info_fops = {
+ .open = spufs_info_open,
+ .read = spufs_mbox_info_read,
+ .llseek = generic_file_llseek,
+};
+
+static ssize_t __spufs_ibox_info_read(struct spu_context *ctx,
+ char __user *buf, size_t len, loff_t *pos)
+{
+ u32 data;
+
+ /* EOF if there's no entry in the ibox */
+ if (!(ctx->csa.prob.mb_stat_R & 0xff0000))
+ return 0;
+
+ data = ctx->csa.priv2.puint_mb_R;
+
+ return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+}
+
+static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret;
+
+ if (!access_ok(VERIFY_WRITE, buf, len))
+ return -EFAULT;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ spin_lock(&ctx->csa.register_lock);
+ ret = __spufs_ibox_info_read(ctx, buf, len, pos);
+ spin_unlock(&ctx->csa.register_lock);
+ spu_release_saved(ctx);
+
+ return ret;
+}
+
+static const struct file_operations spufs_ibox_info_fops = {
+ .open = spufs_info_open,
+ .read = spufs_ibox_info_read,
+ .llseek = generic_file_llseek,
+};
+
+static ssize_t __spufs_wbox_info_read(struct spu_context *ctx,
+ char __user *buf, size_t len, loff_t *pos)
+{
+ int i, cnt;
+ u32 data[4];
+ u32 wbox_stat;
+
+ wbox_stat = ctx->csa.prob.mb_stat_R;
+ cnt = 4 - ((wbox_stat & 0x00ff00) >> 8);
+ for (i = 0; i < cnt; i++) {
+ data[i] = ctx->csa.spu_mailbox_data[i];
+ }
+
+ return simple_read_from_buffer(buf, len, pos, &data,
+ cnt * sizeof(u32));
+}
+
+static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret;
+
+ if (!access_ok(VERIFY_WRITE, buf, len))
+ return -EFAULT;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ spin_lock(&ctx->csa.register_lock);
+ ret = __spufs_wbox_info_read(ctx, buf, len, pos);
+ spin_unlock(&ctx->csa.register_lock);
+ spu_release_saved(ctx);
+
+ return ret;
+}
+
+static const struct file_operations spufs_wbox_info_fops = {
+ .open = spufs_info_open,
+ .read = spufs_wbox_info_read,
+ .llseek = generic_file_llseek,
+};
+
+static ssize_t __spufs_dma_info_read(struct spu_context *ctx,
+ char __user *buf, size_t len, loff_t *pos)
+{
+ struct spu_dma_info info;
+ struct mfc_cq_sr *qp, *spuqp;
+ int i;
+
+ info.dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW;
+ info.dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0];
+ info.dma_info_status = ctx->csa.spu_chnldata_RW[24];
+ info.dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25];
+ info.dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27];
+ for (i = 0; i < 16; i++) {
+ qp = &info.dma_info_command_data[i];
+ spuqp = &ctx->csa.priv2.spuq[i];
+
+ qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW;
+ qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW;
+ qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW;
+ qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW;
+ }
+
+ return simple_read_from_buffer(buf, len, pos, &info,
+ sizeof info);
+}
+
+static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret;
+
+ if (!access_ok(VERIFY_WRITE, buf, len))
+ return -EFAULT;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ spin_lock(&ctx->csa.register_lock);
+ ret = __spufs_dma_info_read(ctx, buf, len, pos);
+ spin_unlock(&ctx->csa.register_lock);
+ spu_release_saved(ctx);
+
+ return ret;
+}
+
+static const struct file_operations spufs_dma_info_fops = {
+ .open = spufs_info_open,
+ .read = spufs_dma_info_read,
+ .llseek = no_llseek,
+};
+
+static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx,
+ char __user *buf, size_t len, loff_t *pos)
+{
+ struct spu_proxydma_info info;
+ struct mfc_cq_sr *qp, *puqp;
+ int ret = sizeof info;
+ int i;
+
+ if (len < ret)
+ return -EINVAL;
+
+ if (!access_ok(VERIFY_WRITE, buf, len))
+ return -EFAULT;
+
+ info.proxydma_info_type = ctx->csa.prob.dma_querytype_RW;
+ info.proxydma_info_mask = ctx->csa.prob.dma_querymask_RW;
+ info.proxydma_info_status = ctx->csa.prob.dma_tagstatus_R;
+ for (i = 0; i < 8; i++) {
+ qp = &info.proxydma_info_command_data[i];
+ puqp = &ctx->csa.priv2.puq[i];
+
+ qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW;
+ qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW;
+ qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW;
+ qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW;
+ }
+
+ return simple_read_from_buffer(buf, len, pos, &info,
+ sizeof info);
+}
+
+static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ spin_lock(&ctx->csa.register_lock);
+ ret = __spufs_proxydma_info_read(ctx, buf, len, pos);
+ spin_unlock(&ctx->csa.register_lock);
+ spu_release_saved(ctx);
+
+ return ret;
+}
+
+static const struct file_operations spufs_proxydma_info_fops = {
+ .open = spufs_info_open,
+ .read = spufs_proxydma_info_read,
+ .llseek = no_llseek,
+};
+
+static int spufs_show_tid(struct seq_file *s, void *private)
+{
+ struct spu_context *ctx = s->private;
+
+ seq_printf(s, "%d\n", ctx->tid);
+ return 0;
+}
+
+static int spufs_tid_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, spufs_show_tid, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_tid_fops = {
+ .open = spufs_tid_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const char *ctx_state_names[] = {
+ "user", "system", "iowait", "loaded"
+};
+
+static unsigned long long spufs_acct_time(struct spu_context *ctx,
+ enum spu_utilization_state state)
+{
+ unsigned long long time = ctx->stats.times[state];
+
+ /*
+ * In general, utilization statistics are updated by the controlling
+ * thread as the spu context moves through various well defined
+ * state transitions, but if the context is lazily loaded its
+ * utilization statistics are not updated as the controlling thread
+ * is not tightly coupled with the execution of the spu context. We
+ * calculate and apply the time delta from the last recorded state
+ * of the spu context.
+ */
+ if (ctx->spu && ctx->stats.util_state == state) {
+ time += ktime_get_ns() - ctx->stats.tstamp;
+ }
+
+ return time / NSEC_PER_MSEC;
+}
+
+static unsigned long long spufs_slb_flts(struct spu_context *ctx)
+{
+ unsigned long long slb_flts = ctx->stats.slb_flt;
+
+ if (ctx->state == SPU_STATE_RUNNABLE) {
+ slb_flts += (ctx->spu->stats.slb_flt -
+ ctx->stats.slb_flt_base);
+ }
+
+ return slb_flts;
+}
+
+static unsigned long long spufs_class2_intrs(struct spu_context *ctx)
+{
+ unsigned long long class2_intrs = ctx->stats.class2_intr;
+
+ if (ctx->state == SPU_STATE_RUNNABLE) {
+ class2_intrs += (ctx->spu->stats.class2_intr -
+ ctx->stats.class2_intr_base);
+ }
+
+ return class2_intrs;
+}
+
+
+static int spufs_show_stat(struct seq_file *s, void *private)
+{
+ struct spu_context *ctx = s->private;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+
+ seq_printf(s, "%s %llu %llu %llu %llu "
+ "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+ ctx_state_names[ctx->stats.util_state],
+ spufs_acct_time(ctx, SPU_UTIL_USER),
+ spufs_acct_time(ctx, SPU_UTIL_SYSTEM),
+ spufs_acct_time(ctx, SPU_UTIL_IOWAIT),
+ spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED),
+ ctx->stats.vol_ctx_switch,
+ ctx->stats.invol_ctx_switch,
+ spufs_slb_flts(ctx),
+ ctx->stats.hash_flt,
+ ctx->stats.min_flt,
+ ctx->stats.maj_flt,
+ spufs_class2_intrs(ctx),
+ ctx->stats.libassist);
+ spu_release(ctx);
+ return 0;
+}
+
+static int spufs_stat_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, spufs_show_stat, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_stat_fops = {
+ .open = spufs_stat_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static inline int spufs_switch_log_used(struct spu_context *ctx)
+{
+ return (ctx->switch_log->head - ctx->switch_log->tail) %
+ SWITCH_LOG_BUFSIZE;
+}
+
+static inline int spufs_switch_log_avail(struct spu_context *ctx)
+{
+ return SWITCH_LOG_BUFSIZE - spufs_switch_log_used(ctx);
+}
+
+static int spufs_switch_log_open(struct inode *inode, struct file *file)
+{
+ struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+ int rc;
+
+ rc = spu_acquire(ctx);
+ if (rc)
+ return rc;
+
+ if (ctx->switch_log) {
+ rc = -EBUSY;
+ goto out;
+ }
+
+ ctx->switch_log = kmalloc(sizeof(struct switch_log) +
+ SWITCH_LOG_BUFSIZE * sizeof(struct switch_log_entry),
+ GFP_KERNEL);
+
+ if (!ctx->switch_log) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ ctx->switch_log->head = ctx->switch_log->tail = 0;
+ init_waitqueue_head(&ctx->switch_log->wait);
+ rc = 0;
+
+out:
+ spu_release(ctx);
+ return rc;
+}
+
+static int spufs_switch_log_release(struct inode *inode, struct file *file)
+{
+ struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+ int rc;
+
+ rc = spu_acquire(ctx);
+ if (rc)
+ return rc;
+
+ kfree(ctx->switch_log);
+ ctx->switch_log = NULL;
+ spu_release(ctx);
+
+ return 0;
+}
+
+static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n)
+{
+ struct switch_log_entry *p;
+
+ p = ctx->switch_log->log + ctx->switch_log->tail % SWITCH_LOG_BUFSIZE;
+
+ return snprintf(tbuf, n, "%u.%09u %d %u %u %llu\n",
+ (unsigned int) p->tstamp.tv_sec,
+ (unsigned int) p->tstamp.tv_nsec,
+ p->spu_id,
+ (unsigned int) p->type,
+ (unsigned int) p->val,
+ (unsigned long long) p->timebase);
+}
+
+static ssize_t spufs_switch_log_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+ int error = 0, cnt = 0;
+
+ if (!buf)
+ return -EINVAL;
+
+ error = spu_acquire(ctx);
+ if (error)
+ return error;
+
+ while (cnt < len) {
+ char tbuf[128];
+ int width;
+
+ if (spufs_switch_log_used(ctx) == 0) {
+ if (cnt > 0) {
+ /* If there's data ready to go, we can
+ * just return straight away */
+ break;
+
+ } else if (file->f_flags & O_NONBLOCK) {
+ error = -EAGAIN;
+ break;
+
+ } else {
+ /* spufs_wait will drop the mutex and
+ * re-acquire, but since we're in read(), the
+ * file cannot be _released (and so
+ * ctx->switch_log is stable).
+ */
+ error = spufs_wait(ctx->switch_log->wait,
+ spufs_switch_log_used(ctx) > 0);
+
+ /* On error, spufs_wait returns without the
+ * state mutex held */
+ if (error)
+ return error;
+
+ /* We may have had entries read from underneath
+ * us while we dropped the mutex in spufs_wait,
+ * so re-check */
+ if (spufs_switch_log_used(ctx) == 0)
+ continue;
+ }
+ }
+
+ width = switch_log_sprint(ctx, tbuf, sizeof(tbuf));
+ if (width < len)
+ ctx->switch_log->tail =
+ (ctx->switch_log->tail + 1) %
+ SWITCH_LOG_BUFSIZE;
+ else
+ /* If the record is greater than space available return
+ * partial buffer (so far) */
+ break;
+
+ error = copy_to_user(buf + cnt, tbuf, width);
+ if (error)
+ break;
+ cnt += width;
+ }
+
+ spu_release(ctx);
+
+ return cnt == 0 ? error : cnt;
+}
+
+static unsigned int spufs_switch_log_poll(struct file *file, poll_table *wait)
+{
+ struct inode *inode = file_inode(file);
+ struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+ unsigned int mask = 0;
+ int rc;
+
+ poll_wait(file, &ctx->switch_log->wait, wait);
+
+ rc = spu_acquire(ctx);
+ if (rc)
+ return rc;
+
+ if (spufs_switch_log_used(ctx) > 0)
+ mask |= POLLIN;
+
+ spu_release(ctx);
+
+ return mask;
+}
+
+static const struct file_operations spufs_switch_log_fops = {
+ .open = spufs_switch_log_open,
+ .read = spufs_switch_log_read,
+ .poll = spufs_switch_log_poll,
+ .release = spufs_switch_log_release,
+ .llseek = no_llseek,
+};
+
+/**
+ * Log a context switch event to a switch log reader.
+ *
+ * Must be called with ctx->state_mutex held.
+ */
+void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
+ u32 type, u32 val)
+{
+ if (!ctx->switch_log)
+ return;
+
+ if (spufs_switch_log_avail(ctx) > 1) {
+ struct switch_log_entry *p;
+
+ p = ctx->switch_log->log + ctx->switch_log->head;
+ ktime_get_ts(&p->tstamp);
+ p->timebase = get_tb();
+ p->spu_id = spu ? spu->number : -1;
+ p->type = type;
+ p->val = val;
+
+ ctx->switch_log->head =
+ (ctx->switch_log->head + 1) % SWITCH_LOG_BUFSIZE;
+ }
+
+ wake_up(&ctx->switch_log->wait);
+}
+
+static int spufs_show_ctx(struct seq_file *s, void *private)
+{
+ struct spu_context *ctx = s->private;
+ u64 mfc_control_RW;
+
+ mutex_lock(&ctx->state_mutex);
+ if (ctx->spu) {
+ struct spu *spu = ctx->spu;
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ spin_lock_irq(&spu->register_lock);
+ mfc_control_RW = in_be64(&priv2->mfc_control_RW);
+ spin_unlock_irq(&spu->register_lock);
+ } else {
+ struct spu_state *csa = &ctx->csa;
+
+ mfc_control_RW = csa->priv2.mfc_control_RW;
+ }
+
+ seq_printf(s, "%c flgs(%lx) sflgs(%lx) pri(%d) ts(%d) spu(%02d)"
+ " %c %llx %llx %llx %llx %x %x\n",
+ ctx->state == SPU_STATE_SAVED ? 'S' : 'R',
+ ctx->flags,
+ ctx->sched_flags,
+ ctx->prio,
+ ctx->time_slice,
+ ctx->spu ? ctx->spu->number : -1,
+ !list_empty(&ctx->rq) ? 'q' : ' ',
+ ctx->csa.class_0_pending,
+ ctx->csa.class_0_dar,
+ ctx->csa.class_1_dsisr,
+ mfc_control_RW,
+ ctx->ops->runcntl_read(ctx),
+ ctx->ops->status_read(ctx));
+
+ mutex_unlock(&ctx->state_mutex);
+
+ return 0;
+}
+
+static int spufs_ctx_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, spufs_show_ctx, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_ctx_fops = {
+ .open = spufs_ctx_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+const struct spufs_tree_descr spufs_dir_contents[] = {
+ { "capabilities", &spufs_caps_fops, 0444, },
+ { "mem", &spufs_mem_fops, 0666, LS_SIZE, },
+ { "regs", &spufs_regs_fops, 0666, sizeof(struct spu_reg128[128]), },
+ { "mbox", &spufs_mbox_fops, 0444, },
+ { "ibox", &spufs_ibox_fops, 0444, },
+ { "wbox", &spufs_wbox_fops, 0222, },
+ { "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), },
+ { "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), },
+ { "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), },
+ { "signal1", &spufs_signal1_fops, 0666, },
+ { "signal2", &spufs_signal2_fops, 0666, },
+ { "signal1_type", &spufs_signal1_type, 0666, },
+ { "signal2_type", &spufs_signal2_type, 0666, },
+ { "cntl", &spufs_cntl_fops, 0666, },
+ { "fpcr", &spufs_fpcr_fops, 0666, sizeof(struct spu_reg128), },
+ { "lslr", &spufs_lslr_ops, 0444, },
+ { "mfc", &spufs_mfc_fops, 0666, },
+ { "mss", &spufs_mss_fops, 0666, },
+ { "npc", &spufs_npc_ops, 0666, },
+ { "srr0", &spufs_srr0_ops, 0666, },
+ { "decr", &spufs_decr_ops, 0666, },
+ { "decr_status", &spufs_decr_status_ops, 0666, },
+ { "event_mask", &spufs_event_mask_ops, 0666, },
+ { "event_status", &spufs_event_status_ops, 0444, },
+ { "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, },
+ { "phys-id", &spufs_id_ops, 0666, },
+ { "object-id", &spufs_object_id_ops, 0666, },
+ { "mbox_info", &spufs_mbox_info_fops, 0444, sizeof(u32), },
+ { "ibox_info", &spufs_ibox_info_fops, 0444, sizeof(u32), },
+ { "wbox_info", &spufs_wbox_info_fops, 0444, sizeof(u32), },
+ { "dma_info", &spufs_dma_info_fops, 0444,
+ sizeof(struct spu_dma_info), },
+ { "proxydma_info", &spufs_proxydma_info_fops, 0444,
+ sizeof(struct spu_proxydma_info)},
+ { "tid", &spufs_tid_fops, 0444, },
+ { "stat", &spufs_stat_fops, 0444, },
+ { "switch_log", &spufs_switch_log_fops, 0444 },
+ {},
+};
+
+const struct spufs_tree_descr spufs_dir_nosched_contents[] = {
+ { "capabilities", &spufs_caps_fops, 0444, },
+ { "mem", &spufs_mem_fops, 0666, LS_SIZE, },
+ { "mbox", &spufs_mbox_fops, 0444, },
+ { "ibox", &spufs_ibox_fops, 0444, },
+ { "wbox", &spufs_wbox_fops, 0222, },
+ { "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), },
+ { "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), },
+ { "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), },
+ { "signal1", &spufs_signal1_nosched_fops, 0222, },
+ { "signal2", &spufs_signal2_nosched_fops, 0222, },
+ { "signal1_type", &spufs_signal1_type, 0666, },
+ { "signal2_type", &spufs_signal2_type, 0666, },
+ { "mss", &spufs_mss_fops, 0666, },
+ { "mfc", &spufs_mfc_fops, 0666, },
+ { "cntl", &spufs_cntl_fops, 0666, },
+ { "npc", &spufs_npc_ops, 0666, },
+ { "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, },
+ { "phys-id", &spufs_id_ops, 0666, },
+ { "object-id", &spufs_object_id_ops, 0666, },
+ { "tid", &spufs_tid_fops, 0444, },
+ { "stat", &spufs_stat_fops, 0444, },
+ {},
+};
+
+const struct spufs_tree_descr spufs_dir_debug_contents[] = {
+ { ".ctx", &spufs_ctx_fops, 0444, },
+ {},
+};
+
+const struct spufs_coredump_reader spufs_coredump_read[] = {
+ { "regs", __spufs_regs_read, NULL, sizeof(struct spu_reg128[128])},
+ { "fpcr", __spufs_fpcr_read, NULL, sizeof(struct spu_reg128) },
+ { "lslr", NULL, spufs_lslr_get, 19 },
+ { "decr", NULL, spufs_decr_get, 19 },
+ { "decr_status", NULL, spufs_decr_status_get, 19 },
+ { "mem", __spufs_mem_read, NULL, LS_SIZE, },
+ { "signal1", __spufs_signal1_read, NULL, sizeof(u32) },
+ { "signal1_type", NULL, spufs_signal1_type_get, 19 },
+ { "signal2", __spufs_signal2_read, NULL, sizeof(u32) },
+ { "signal2_type", NULL, spufs_signal2_type_get, 19 },
+ { "event_mask", NULL, spufs_event_mask_get, 19 },
+ { "event_status", NULL, spufs_event_status_get, 19 },
+ { "mbox_info", __spufs_mbox_info_read, NULL, sizeof(u32) },
+ { "ibox_info", __spufs_ibox_info_read, NULL, sizeof(u32) },
+ { "wbox_info", __spufs_wbox_info_read, NULL, 4 * sizeof(u32)},
+ { "dma_info", __spufs_dma_info_read, NULL, sizeof(struct spu_dma_info)},
+ { "proxydma_info", __spufs_proxydma_info_read,
+ NULL, sizeof(struct spu_proxydma_info)},
+ { "object-id", NULL, spufs_object_id_get, 19 },
+ { "npc", NULL, spufs_npc_get, 19 },
+ { NULL },
+};
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/gang.c b/kernel/arch/powerpc/platforms/cell/spufs/gang.c
new file mode 100644
index 000000000..71a443253
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/gang.c
@@ -0,0 +1,87 @@
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include "spufs.h"
+
+struct spu_gang *alloc_spu_gang(void)
+{
+ struct spu_gang *gang;
+
+ gang = kzalloc(sizeof *gang, GFP_KERNEL);
+ if (!gang)
+ goto out;
+
+ kref_init(&gang->kref);
+ mutex_init(&gang->mutex);
+ mutex_init(&gang->aff_mutex);
+ INIT_LIST_HEAD(&gang->list);
+ INIT_LIST_HEAD(&gang->aff_list_head);
+
+out:
+ return gang;
+}
+
+static void destroy_spu_gang(struct kref *kref)
+{
+ struct spu_gang *gang;
+ gang = container_of(kref, struct spu_gang, kref);
+ WARN_ON(gang->contexts || !list_empty(&gang->list));
+ kfree(gang);
+}
+
+struct spu_gang *get_spu_gang(struct spu_gang *gang)
+{
+ kref_get(&gang->kref);
+ return gang;
+}
+
+int put_spu_gang(struct spu_gang *gang)
+{
+ return kref_put(&gang->kref, &destroy_spu_gang);
+}
+
+void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+ mutex_lock(&gang->mutex);
+ ctx->gang = get_spu_gang(gang);
+ list_add(&ctx->gang_list, &gang->list);
+ gang->contexts++;
+ mutex_unlock(&gang->mutex);
+}
+
+void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+ mutex_lock(&gang->mutex);
+ WARN_ON(ctx->gang != gang);
+ if (!list_empty(&ctx->aff_list)) {
+ list_del_init(&ctx->aff_list);
+ gang->aff_flags &= ~AFF_OFFSETS_SET;
+ }
+ list_del_init(&ctx->gang_list);
+ gang->contexts--;
+ mutex_unlock(&gang->mutex);
+
+ put_spu_gang(gang);
+}
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/hw_ops.c b/kernel/arch/powerpc/platforms/cell/spufs/hw_ops.c
new file mode 100644
index 000000000..8655c4cbe
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/hw_ops.c
@@ -0,0 +1,349 @@
+/* hw_ops.c - query/set operations on active SPU context.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/poll.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu_context.h>
+#include "spufs.h"
+
+static int spu_hw_mbox_read(struct spu_context *ctx, u32 * data)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_problem __iomem *prob = spu->problem;
+ u32 mbox_stat;
+ int ret = 0;
+
+ spin_lock_irq(&spu->register_lock);
+ mbox_stat = in_be32(&prob->mb_stat_R);
+ if (mbox_stat & 0x0000ff) {
+ *data = in_be32(&prob->pu_mb_R);
+ ret = 4;
+ }
+ spin_unlock_irq(&spu->register_lock);
+ return ret;
+}
+
+static u32 spu_hw_mbox_stat_read(struct spu_context *ctx)
+{
+ return in_be32(&ctx->spu->problem->mb_stat_R);
+}
+
+static unsigned int spu_hw_mbox_stat_poll(struct spu_context *ctx,
+ unsigned int events)
+{
+ struct spu *spu = ctx->spu;
+ int ret = 0;
+ u32 stat;
+
+ spin_lock_irq(&spu->register_lock);
+ stat = in_be32(&spu->problem->mb_stat_R);
+
+ /* if the requested event is there, return the poll
+ mask, otherwise enable the interrupt to get notified,
+ but first mark any pending interrupts as done so
+ we don't get woken up unnecessarily */
+
+ if (events & (POLLIN | POLLRDNORM)) {
+ if (stat & 0xff0000)
+ ret |= POLLIN | POLLRDNORM;
+ else {
+ spu_int_stat_clear(spu, 2, CLASS2_MAILBOX_INTR);
+ spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+ }
+ }
+ if (events & (POLLOUT | POLLWRNORM)) {
+ if (stat & 0x00ff00)
+ ret = POLLOUT | POLLWRNORM;
+ else {
+ spu_int_stat_clear(spu, 2,
+ CLASS2_MAILBOX_THRESHOLD_INTR);
+ spu_int_mask_or(spu, 2,
+ CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR);
+ }
+ }
+ spin_unlock_irq(&spu->register_lock);
+ return ret;
+}
+
+static int spu_hw_ibox_read(struct spu_context *ctx, u32 * data)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_problem __iomem *prob = spu->problem;
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ int ret;
+
+ spin_lock_irq(&spu->register_lock);
+ if (in_be32(&prob->mb_stat_R) & 0xff0000) {
+ /* read the first available word */
+ *data = in_be64(&priv2->puint_mb_R);
+ ret = 4;
+ } else {
+ /* make sure we get woken up by the interrupt */
+ spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+ ret = 0;
+ }
+ spin_unlock_irq(&spu->register_lock);
+ return ret;
+}
+
+static int spu_hw_wbox_write(struct spu_context *ctx, u32 data)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_problem __iomem *prob = spu->problem;
+ int ret;
+
+ spin_lock_irq(&spu->register_lock);
+ if (in_be32(&prob->mb_stat_R) & 0x00ff00) {
+ /* we have space to write wbox_data to */
+ out_be32(&prob->spu_mb_W, data);
+ ret = 4;
+ } else {
+ /* make sure we get woken up by the interrupt when space
+ becomes available */
+ spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR);
+ ret = 0;
+ }
+ spin_unlock_irq(&spu->register_lock);
+ return ret;
+}
+
+static void spu_hw_signal1_write(struct spu_context *ctx, u32 data)
+{
+ out_be32(&ctx->spu->problem->signal_notify1, data);
+}
+
+static void spu_hw_signal2_write(struct spu_context *ctx, u32 data)
+{
+ out_be32(&ctx->spu->problem->signal_notify2, data);
+}
+
+static void spu_hw_signal1_type_set(struct spu_context *ctx, u64 val)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 tmp;
+
+ spin_lock_irq(&spu->register_lock);
+ tmp = in_be64(&priv2->spu_cfg_RW);
+ if (val)
+ tmp |= 1;
+ else
+ tmp &= ~1;
+ out_be64(&priv2->spu_cfg_RW, tmp);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static u64 spu_hw_signal1_type_get(struct spu_context *ctx)
+{
+ return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 1) != 0);
+}
+
+static void spu_hw_signal2_type_set(struct spu_context *ctx, u64 val)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 tmp;
+
+ spin_lock_irq(&spu->register_lock);
+ tmp = in_be64(&priv2->spu_cfg_RW);
+ if (val)
+ tmp |= 2;
+ else
+ tmp &= ~2;
+ out_be64(&priv2->spu_cfg_RW, tmp);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static u64 spu_hw_signal2_type_get(struct spu_context *ctx)
+{
+ return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 2) != 0);
+}
+
+static u32 spu_hw_npc_read(struct spu_context *ctx)
+{
+ return in_be32(&ctx->spu->problem->spu_npc_RW);
+}
+
+static void spu_hw_npc_write(struct spu_context *ctx, u32 val)
+{
+ out_be32(&ctx->spu->problem->spu_npc_RW, val);
+}
+
+static u32 spu_hw_status_read(struct spu_context *ctx)
+{
+ return in_be32(&ctx->spu->problem->spu_status_R);
+}
+
+static char *spu_hw_get_ls(struct spu_context *ctx)
+{
+ return ctx->spu->local_store;
+}
+
+static void spu_hw_privcntl_write(struct spu_context *ctx, u64 val)
+{
+ out_be64(&ctx->spu->priv2->spu_privcntl_RW, val);
+}
+
+static u32 spu_hw_runcntl_read(struct spu_context *ctx)
+{
+ return in_be32(&ctx->spu->problem->spu_runcntl_RW);
+}
+
+static void spu_hw_runcntl_write(struct spu_context *ctx, u32 val)
+{
+ spin_lock_irq(&ctx->spu->register_lock);
+ if (val & SPU_RUNCNTL_ISOLATE)
+ spu_hw_privcntl_write(ctx,
+ SPU_PRIVCNT_LOAD_REQUEST_ENABLE_MASK);
+ out_be32(&ctx->spu->problem->spu_runcntl_RW, val);
+ spin_unlock_irq(&ctx->spu->register_lock);
+}
+
+static void spu_hw_runcntl_stop(struct spu_context *ctx)
+{
+ spin_lock_irq(&ctx->spu->register_lock);
+ out_be32(&ctx->spu->problem->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+ while (in_be32(&ctx->spu->problem->spu_status_R) & SPU_STATUS_RUNNING)
+ cpu_relax();
+ spin_unlock_irq(&ctx->spu->register_lock);
+}
+
+static void spu_hw_master_start(struct spu_context *ctx)
+{
+ struct spu *spu = ctx->spu;
+ u64 sr1;
+
+ spin_lock_irq(&spu->register_lock);
+ sr1 = spu_mfc_sr1_get(spu) | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+ spu_mfc_sr1_set(spu, sr1);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static void spu_hw_master_stop(struct spu_context *ctx)
+{
+ struct spu *spu = ctx->spu;
+ u64 sr1;
+
+ spin_lock_irq(&spu->register_lock);
+ sr1 = spu_mfc_sr1_get(spu) & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+ spu_mfc_sr1_set(spu, sr1);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static int spu_hw_set_mfc_query(struct spu_context * ctx, u32 mask, u32 mode)
+{
+ struct spu_problem __iomem *prob = ctx->spu->problem;
+ int ret;
+
+ spin_lock_irq(&ctx->spu->register_lock);
+ ret = -EAGAIN;
+ if (in_be32(&prob->dma_querytype_RW))
+ goto out;
+ ret = 0;
+ out_be32(&prob->dma_querymask_RW, mask);
+ out_be32(&prob->dma_querytype_RW, mode);
+out:
+ spin_unlock_irq(&ctx->spu->register_lock);
+ return ret;
+}
+
+static u32 spu_hw_read_mfc_tagstatus(struct spu_context * ctx)
+{
+ return in_be32(&ctx->spu->problem->dma_tagstatus_R);
+}
+
+static u32 spu_hw_get_mfc_free_elements(struct spu_context *ctx)
+{
+ return in_be32(&ctx->spu->problem->dma_qstatus_R);
+}
+
+static int spu_hw_send_mfc_command(struct spu_context *ctx,
+ struct mfc_dma_command *cmd)
+{
+ u32 status;
+ struct spu_problem __iomem *prob = ctx->spu->problem;
+
+ spin_lock_irq(&ctx->spu->register_lock);
+ out_be32(&prob->mfc_lsa_W, cmd->lsa);
+ out_be64(&prob->mfc_ea_W, cmd->ea);
+ out_be32(&prob->mfc_union_W.by32.mfc_size_tag32,
+ cmd->size << 16 | cmd->tag);
+ out_be32(&prob->mfc_union_W.by32.mfc_class_cmd32,
+ cmd->class << 16 | cmd->cmd);
+ status = in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32);
+ spin_unlock_irq(&ctx->spu->register_lock);
+
+ switch (status & 0xffff) {
+ case 0:
+ return 0;
+ case 2:
+ return -EAGAIN;
+ default:
+ return -EINVAL;
+ }
+}
+
+static void spu_hw_restart_dma(struct spu_context *ctx)
+{
+ struct spu_priv2 __iomem *priv2 = ctx->spu->priv2;
+
+ if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags))
+ out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+}
+
+struct spu_context_ops spu_hw_ops = {
+ .mbox_read = spu_hw_mbox_read,
+ .mbox_stat_read = spu_hw_mbox_stat_read,
+ .mbox_stat_poll = spu_hw_mbox_stat_poll,
+ .ibox_read = spu_hw_ibox_read,
+ .wbox_write = spu_hw_wbox_write,
+ .signal1_write = spu_hw_signal1_write,
+ .signal2_write = spu_hw_signal2_write,
+ .signal1_type_set = spu_hw_signal1_type_set,
+ .signal1_type_get = spu_hw_signal1_type_get,
+ .signal2_type_set = spu_hw_signal2_type_set,
+ .signal2_type_get = spu_hw_signal2_type_get,
+ .npc_read = spu_hw_npc_read,
+ .npc_write = spu_hw_npc_write,
+ .status_read = spu_hw_status_read,
+ .get_ls = spu_hw_get_ls,
+ .privcntl_write = spu_hw_privcntl_write,
+ .runcntl_read = spu_hw_runcntl_read,
+ .runcntl_write = spu_hw_runcntl_write,
+ .runcntl_stop = spu_hw_runcntl_stop,
+ .master_start = spu_hw_master_start,
+ .master_stop = spu_hw_master_stop,
+ .set_mfc_query = spu_hw_set_mfc_query,
+ .read_mfc_tagstatus = spu_hw_read_mfc_tagstatus,
+ .get_mfc_free_elements = spu_hw_get_mfc_free_elements,
+ .send_mfc_command = spu_hw_send_mfc_command,
+ .restart_dma = spu_hw_restart_dma,
+};
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/inode.c b/kernel/arch/powerpc/platforms/cell/spufs/inode.c
new file mode 100644
index 000000000..1ba6307be
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/inode.c
@@ -0,0 +1,811 @@
+
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fsnotify.h>
+#include <linux/backing-dev.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/parser.h>
+
+#include <asm/prom.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/uaccess.h>
+
+#include "spufs.h"
+
+struct spufs_sb_info {
+ int debug;
+};
+
+static struct kmem_cache *spufs_inode_cache;
+char *isolated_loader;
+static int isolated_loader_size;
+
+static struct spufs_sb_info *spufs_get_sb_info(struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+
+static struct inode *
+spufs_alloc_inode(struct super_block *sb)
+{
+ struct spufs_inode_info *ei;
+
+ ei = kmem_cache_alloc(spufs_inode_cache, GFP_KERNEL);
+ if (!ei)
+ return NULL;
+
+ ei->i_gang = NULL;
+ ei->i_ctx = NULL;
+ ei->i_openers = 0;
+
+ return &ei->vfs_inode;
+}
+
+static void spufs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ kmem_cache_free(spufs_inode_cache, SPUFS_I(inode));
+}
+
+static void spufs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, spufs_i_callback);
+}
+
+static void
+spufs_init_once(void *p)
+{
+ struct spufs_inode_info *ei = p;
+
+ inode_init_once(&ei->vfs_inode);
+}
+
+static struct inode *
+spufs_new_inode(struct super_block *sb, umode_t mode)
+{
+ struct inode *inode;
+
+ inode = new_inode(sb);
+ if (!inode)
+ goto out;
+
+ inode->i_ino = get_next_ino();
+ inode->i_mode = mode;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+out:
+ return inode;
+}
+
+static int
+spufs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+ struct inode *inode = d_inode(dentry);
+
+ if ((attr->ia_valid & ATTR_SIZE) &&
+ (attr->ia_size != inode->i_size))
+ return -EINVAL;
+ setattr_copy(inode, attr);
+ mark_inode_dirty(inode);
+ return 0;
+}
+
+
+static int
+spufs_new_file(struct super_block *sb, struct dentry *dentry,
+ const struct file_operations *fops, umode_t mode,
+ size_t size, struct spu_context *ctx)
+{
+ static const struct inode_operations spufs_file_iops = {
+ .setattr = spufs_setattr,
+ };
+ struct inode *inode;
+ int ret;
+
+ ret = -ENOSPC;
+ inode = spufs_new_inode(sb, S_IFREG | mode);
+ if (!inode)
+ goto out;
+
+ ret = 0;
+ inode->i_op = &spufs_file_iops;
+ inode->i_fop = fops;
+ inode->i_size = size;
+ inode->i_private = SPUFS_I(inode)->i_ctx = get_spu_context(ctx);
+ d_add(dentry, inode);
+out:
+ return ret;
+}
+
+static void
+spufs_evict_inode(struct inode *inode)
+{
+ struct spufs_inode_info *ei = SPUFS_I(inode);
+ clear_inode(inode);
+ if (ei->i_ctx)
+ put_spu_context(ei->i_ctx);
+ if (ei->i_gang)
+ put_spu_gang(ei->i_gang);
+}
+
+static void spufs_prune_dir(struct dentry *dir)
+{
+ struct dentry *dentry, *tmp;
+
+ mutex_lock(&d_inode(dir)->i_mutex);
+ list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) {
+ spin_lock(&dentry->d_lock);
+ if (!(d_unhashed(dentry)) && d_really_is_positive(dentry)) {
+ dget_dlock(dentry);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
+ simple_unlink(d_inode(dir), dentry);
+ /* XXX: what was dcache_lock protecting here? Other
+ * filesystems (IB, configfs) release dcache_lock
+ * before unlink */
+ dput(dentry);
+ } else {
+ spin_unlock(&dentry->d_lock);
+ }
+ }
+ shrink_dcache_parent(dir);
+ mutex_unlock(&d_inode(dir)->i_mutex);
+}
+
+/* Caller must hold parent->i_mutex */
+static int spufs_rmdir(struct inode *parent, struct dentry *dir)
+{
+ /* remove all entries */
+ int res;
+ spufs_prune_dir(dir);
+ d_drop(dir);
+ res = simple_rmdir(parent, dir);
+ /* We have to give up the mm_struct */
+ spu_forget(SPUFS_I(d_inode(dir))->i_ctx);
+ return res;
+}
+
+static int spufs_fill_dir(struct dentry *dir,
+ const struct spufs_tree_descr *files, umode_t mode,
+ struct spu_context *ctx)
+{
+ while (files->name && files->name[0]) {
+ int ret;
+ struct dentry *dentry = d_alloc_name(dir, files->name);
+ if (!dentry)
+ return -ENOMEM;
+ ret = spufs_new_file(dir->d_sb, dentry, files->ops,
+ files->mode & mode, files->size, ctx);
+ if (ret)
+ return ret;
+ files++;
+ }
+ return 0;
+}
+
+static int spufs_dir_close(struct inode *inode, struct file *file)
+{
+ struct spu_context *ctx;
+ struct inode *parent;
+ struct dentry *dir;
+ int ret;
+
+ dir = file->f_path.dentry;
+ parent = d_inode(dir->d_parent);
+ ctx = SPUFS_I(d_inode(dir))->i_ctx;
+
+ mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT);
+ ret = spufs_rmdir(parent, dir);
+ mutex_unlock(&parent->i_mutex);
+ WARN_ON(ret);
+
+ return dcache_dir_close(inode, file);
+}
+
+const struct file_operations spufs_context_fops = {
+ .open = dcache_dir_open,
+ .release = spufs_dir_close,
+ .llseek = dcache_dir_lseek,
+ .read = generic_read_dir,
+ .iterate = dcache_readdir,
+ .fsync = noop_fsync,
+};
+EXPORT_SYMBOL_GPL(spufs_context_fops);
+
+static int
+spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
+ umode_t mode)
+{
+ int ret;
+ struct inode *inode;
+ struct spu_context *ctx;
+
+ inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
+ if (!inode)
+ return -ENOSPC;
+
+ if (dir->i_mode & S_ISGID) {
+ inode->i_gid = dir->i_gid;
+ inode->i_mode &= S_ISGID;
+ }
+ ctx = alloc_spu_context(SPUFS_I(dir)->i_gang); /* XXX gang */
+ SPUFS_I(inode)->i_ctx = ctx;
+ if (!ctx) {
+ iput(inode);
+ return -ENOSPC;
+ }
+
+ ctx->flags = flags;
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+
+ mutex_lock(&inode->i_mutex);
+
+ dget(dentry);
+ inc_nlink(dir);
+ inc_nlink(inode);
+
+ d_instantiate(dentry, inode);
+
+ if (flags & SPU_CREATE_NOSCHED)
+ ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents,
+ mode, ctx);
+ else
+ ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx);
+
+ if (!ret && spufs_get_sb_info(dir->i_sb)->debug)
+ ret = spufs_fill_dir(dentry, spufs_dir_debug_contents,
+ mode, ctx);
+
+ if (ret)
+ spufs_rmdir(dir, dentry);
+
+ mutex_unlock(&inode->i_mutex);
+
+ return ret;
+}
+
+static int spufs_context_open(struct path *path)
+{
+ int ret;
+ struct file *filp;
+
+ ret = get_unused_fd_flags(0);
+ if (ret < 0)
+ return ret;
+
+ filp = dentry_open(path, O_RDONLY, current_cred());
+ if (IS_ERR(filp)) {
+ put_unused_fd(ret);
+ return PTR_ERR(filp);
+ }
+
+ filp->f_op = &spufs_context_fops;
+ fd_install(ret, filp);
+ return ret;
+}
+
+static struct spu_context *
+spufs_assert_affinity(unsigned int flags, struct spu_gang *gang,
+ struct file *filp)
+{
+ struct spu_context *tmp, *neighbor, *err;
+ int count, node;
+ int aff_supp;
+
+ aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next,
+ struct spu, cbe_list))->aff_list);
+
+ if (!aff_supp)
+ return ERR_PTR(-EINVAL);
+
+ if (flags & SPU_CREATE_GANG)
+ return ERR_PTR(-EINVAL);
+
+ if (flags & SPU_CREATE_AFFINITY_MEM &&
+ gang->aff_ref_ctx &&
+ gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM)
+ return ERR_PTR(-EEXIST);
+
+ if (gang->aff_flags & AFF_MERGED)
+ return ERR_PTR(-EBUSY);
+
+ neighbor = NULL;
+ if (flags & SPU_CREATE_AFFINITY_SPU) {
+ if (!filp || filp->f_op != &spufs_context_fops)
+ return ERR_PTR(-EINVAL);
+
+ neighbor = get_spu_context(
+ SPUFS_I(file_inode(filp))->i_ctx);
+
+ if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) &&
+ !list_is_last(&neighbor->aff_list, &gang->aff_list_head) &&
+ !list_entry(neighbor->aff_list.next, struct spu_context,
+ aff_list)->aff_head) {
+ err = ERR_PTR(-EEXIST);
+ goto out_put_neighbor;
+ }
+
+ if (gang != neighbor->gang) {
+ err = ERR_PTR(-EINVAL);
+ goto out_put_neighbor;
+ }
+
+ count = 1;
+ list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+ count++;
+ if (list_empty(&neighbor->aff_list))
+ count++;
+
+ for (node = 0; node < MAX_NUMNODES; node++) {
+ if ((cbe_spu_info[node].n_spus - atomic_read(
+ &cbe_spu_info[node].reserved_spus)) >= count)
+ break;
+ }
+
+ if (node == MAX_NUMNODES) {
+ err = ERR_PTR(-EEXIST);
+ goto out_put_neighbor;
+ }
+ }
+
+ return neighbor;
+
+out_put_neighbor:
+ put_spu_context(neighbor);
+ return err;
+}
+
+static void
+spufs_set_affinity(unsigned int flags, struct spu_context *ctx,
+ struct spu_context *neighbor)
+{
+ if (flags & SPU_CREATE_AFFINITY_MEM)
+ ctx->gang->aff_ref_ctx = ctx;
+
+ if (flags & SPU_CREATE_AFFINITY_SPU) {
+ if (list_empty(&neighbor->aff_list)) {
+ list_add_tail(&neighbor->aff_list,
+ &ctx->gang->aff_list_head);
+ neighbor->aff_head = 1;
+ }
+
+ if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head)
+ || list_entry(neighbor->aff_list.next, struct spu_context,
+ aff_list)->aff_head) {
+ list_add(&ctx->aff_list, &neighbor->aff_list);
+ } else {
+ list_add_tail(&ctx->aff_list, &neighbor->aff_list);
+ if (neighbor->aff_head) {
+ neighbor->aff_head = 0;
+ ctx->aff_head = 1;
+ }
+ }
+
+ if (!ctx->gang->aff_ref_ctx)
+ ctx->gang->aff_ref_ctx = ctx;
+ }
+}
+
+static int
+spufs_create_context(struct inode *inode, struct dentry *dentry,
+ struct vfsmount *mnt, int flags, umode_t mode,
+ struct file *aff_filp)
+{
+ int ret;
+ int affinity;
+ struct spu_gang *gang;
+ struct spu_context *neighbor;
+ struct path path = {.mnt = mnt, .dentry = dentry};
+
+ if ((flags & SPU_CREATE_NOSCHED) &&
+ !capable(CAP_SYS_NICE))
+ return -EPERM;
+
+ if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE))
+ == SPU_CREATE_ISOLATE)
+ return -EINVAL;
+
+ if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
+ return -ENODEV;
+
+ gang = NULL;
+ neighbor = NULL;
+ affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
+ if (affinity) {
+ gang = SPUFS_I(inode)->i_gang;
+ if (!gang)
+ return -EINVAL;
+ mutex_lock(&gang->aff_mutex);
+ neighbor = spufs_assert_affinity(flags, gang, aff_filp);
+ if (IS_ERR(neighbor)) {
+ ret = PTR_ERR(neighbor);
+ goto out_aff_unlock;
+ }
+ }
+
+ ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO);
+ if (ret)
+ goto out_aff_unlock;
+
+ if (affinity) {
+ spufs_set_affinity(flags, SPUFS_I(d_inode(dentry))->i_ctx,
+ neighbor);
+ if (neighbor)
+ put_spu_context(neighbor);
+ }
+
+ ret = spufs_context_open(&path);
+ if (ret < 0)
+ WARN_ON(spufs_rmdir(inode, dentry));
+
+out_aff_unlock:
+ if (affinity)
+ mutex_unlock(&gang->aff_mutex);
+ return ret;
+}
+
+static int
+spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ int ret;
+ struct inode *inode;
+ struct spu_gang *gang;
+
+ ret = -ENOSPC;
+ inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
+ if (!inode)
+ goto out;
+
+ ret = 0;
+ if (dir->i_mode & S_ISGID) {
+ inode->i_gid = dir->i_gid;
+ inode->i_mode &= S_ISGID;
+ }
+ gang = alloc_spu_gang();
+ SPUFS_I(inode)->i_ctx = NULL;
+ SPUFS_I(inode)->i_gang = gang;
+ if (!gang)
+ goto out_iput;
+
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+
+ d_instantiate(dentry, inode);
+ inc_nlink(dir);
+ inc_nlink(d_inode(dentry));
+ return ret;
+
+out_iput:
+ iput(inode);
+out:
+ return ret;
+}
+
+static int spufs_gang_open(struct path *path)
+{
+ int ret;
+ struct file *filp;
+
+ ret = get_unused_fd_flags(0);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * get references for dget and mntget, will be released
+ * in error path of *_open().
+ */
+ filp = dentry_open(path, O_RDONLY, current_cred());
+ if (IS_ERR(filp)) {
+ put_unused_fd(ret);
+ return PTR_ERR(filp);
+ }
+
+ filp->f_op = &simple_dir_operations;
+ fd_install(ret, filp);
+ return ret;
+}
+
+static int spufs_create_gang(struct inode *inode,
+ struct dentry *dentry,
+ struct vfsmount *mnt, umode_t mode)
+{
+ struct path path = {.mnt = mnt, .dentry = dentry};
+ int ret;
+
+ ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO);
+ if (!ret) {
+ ret = spufs_gang_open(&path);
+ if (ret < 0) {
+ int err = simple_rmdir(inode, dentry);
+ WARN_ON(err);
+ }
+ }
+ return ret;
+}
+
+
+static struct file_system_type spufs_type;
+
+long spufs_create(struct path *path, struct dentry *dentry,
+ unsigned int flags, umode_t mode, struct file *filp)
+{
+ struct inode *dir = d_inode(path->dentry);
+ int ret;
+
+ /* check if we are on spufs */
+ if (path->dentry->d_sb->s_type != &spufs_type)
+ return -EINVAL;
+
+ /* don't accept undefined flags */
+ if (flags & (~SPU_CREATE_FLAG_ALL))
+ return -EINVAL;
+
+ /* only threads can be underneath a gang */
+ if (path->dentry != path->dentry->d_sb->s_root)
+ if ((flags & SPU_CREATE_GANG) || !SPUFS_I(dir)->i_gang)
+ return -EINVAL;
+
+ mode &= ~current_umask();
+
+ if (flags & SPU_CREATE_GANG)
+ ret = spufs_create_gang(dir, dentry, path->mnt, mode);
+ else
+ ret = spufs_create_context(dir, dentry, path->mnt, flags, mode,
+ filp);
+ if (ret >= 0)
+ fsnotify_mkdir(dir, dentry);
+
+ return ret;
+}
+
+/* File system initialization */
+enum {
+ Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err,
+};
+
+static const match_table_t spufs_tokens = {
+ { Opt_uid, "uid=%d" },
+ { Opt_gid, "gid=%d" },
+ { Opt_mode, "mode=%o" },
+ { Opt_debug, "debug" },
+ { Opt_err, NULL },
+};
+
+static int
+spufs_parse_options(struct super_block *sb, char *options, struct inode *root)
+{
+ char *p;
+ substring_t args[MAX_OPT_ARGS];
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token, option;
+
+ if (!*p)
+ continue;
+
+ token = match_token(p, spufs_tokens, args);
+ switch (token) {
+ case Opt_uid:
+ if (match_int(&args[0], &option))
+ return 0;
+ root->i_uid = make_kuid(current_user_ns(), option);
+ if (!uid_valid(root->i_uid))
+ return 0;
+ break;
+ case Opt_gid:
+ if (match_int(&args[0], &option))
+ return 0;
+ root->i_gid = make_kgid(current_user_ns(), option);
+ if (!gid_valid(root->i_gid))
+ return 0;
+ break;
+ case Opt_mode:
+ if (match_octal(&args[0], &option))
+ return 0;
+ root->i_mode = option | S_IFDIR;
+ break;
+ case Opt_debug:
+ spufs_get_sb_info(sb)->debug = 1;
+ break;
+ default:
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static void spufs_exit_isolated_loader(void)
+{
+ free_pages((unsigned long) isolated_loader,
+ get_order(isolated_loader_size));
+}
+
+static void
+spufs_init_isolated_loader(void)
+{
+ struct device_node *dn;
+ const char *loader;
+ int size;
+
+ dn = of_find_node_by_path("/spu-isolation");
+ if (!dn)
+ return;
+
+ loader = of_get_property(dn, "loader", &size);
+ if (!loader)
+ return;
+
+ /* the loader must be align on a 16 byte boundary */
+ isolated_loader = (char *)__get_free_pages(GFP_KERNEL, get_order(size));
+ if (!isolated_loader)
+ return;
+
+ isolated_loader_size = size;
+ memcpy(isolated_loader, loader, size);
+ printk(KERN_INFO "spufs: SPU isolation mode enabled\n");
+}
+
+static int
+spufs_create_root(struct super_block *sb, void *data)
+{
+ struct inode *inode;
+ int ret;
+
+ ret = -ENODEV;
+ if (!spu_management_ops)
+ goto out;
+
+ ret = -ENOMEM;
+ inode = spufs_new_inode(sb, S_IFDIR | 0775);
+ if (!inode)
+ goto out;
+
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+ SPUFS_I(inode)->i_ctx = NULL;
+ inc_nlink(inode);
+
+ ret = -EINVAL;
+ if (!spufs_parse_options(sb, data, inode))
+ goto out_iput;
+
+ ret = -ENOMEM;
+ sb->s_root = d_make_root(inode);
+ if (!sb->s_root)
+ goto out;
+
+ return 0;
+out_iput:
+ iput(inode);
+out:
+ return ret;
+}
+
+static int
+spufs_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct spufs_sb_info *info;
+ static const struct super_operations s_ops = {
+ .alloc_inode = spufs_alloc_inode,
+ .destroy_inode = spufs_destroy_inode,
+ .statfs = simple_statfs,
+ .evict_inode = spufs_evict_inode,
+ .show_options = generic_show_options,
+ };
+
+ save_mount_options(sb, data);
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_blocksize = PAGE_CACHE_SIZE;
+ sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_magic = SPUFS_MAGIC;
+ sb->s_op = &s_ops;
+ sb->s_fs_info = info;
+
+ return spufs_create_root(sb, data);
+}
+
+static struct dentry *
+spufs_mount(struct file_system_type *fstype, int flags,
+ const char *name, void *data)
+{
+ return mount_single(fstype, flags, data, spufs_fill_super);
+}
+
+static struct file_system_type spufs_type = {
+ .owner = THIS_MODULE,
+ .name = "spufs",
+ .mount = spufs_mount,
+ .kill_sb = kill_litter_super,
+};
+MODULE_ALIAS_FS("spufs");
+
+static int __init spufs_init(void)
+{
+ int ret;
+
+ ret = -ENODEV;
+ if (!spu_management_ops)
+ goto out;
+
+ ret = -ENOMEM;
+ spufs_inode_cache = kmem_cache_create("spufs_inode_cache",
+ sizeof(struct spufs_inode_info), 0,
+ SLAB_HWCACHE_ALIGN, spufs_init_once);
+
+ if (!spufs_inode_cache)
+ goto out;
+ ret = spu_sched_init();
+ if (ret)
+ goto out_cache;
+ ret = register_spu_syscalls(&spufs_calls);
+ if (ret)
+ goto out_sched;
+ ret = register_filesystem(&spufs_type);
+ if (ret)
+ goto out_syscalls;
+
+ spufs_init_isolated_loader();
+
+ return 0;
+
+out_syscalls:
+ unregister_spu_syscalls(&spufs_calls);
+out_sched:
+ spu_sched_exit();
+out_cache:
+ kmem_cache_destroy(spufs_inode_cache);
+out:
+ return ret;
+}
+module_init(spufs_init);
+
+static void __exit spufs_exit(void)
+{
+ spu_sched_exit();
+ spufs_exit_isolated_loader();
+ unregister_spu_syscalls(&spufs_calls);
+ unregister_filesystem(&spufs_type);
+ kmem_cache_destroy(spufs_inode_cache);
+}
+module_exit(spufs_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");
+
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/kernel/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
new file mode 100644
index 000000000..147069938
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@@ -0,0 +1,183 @@
+/*
+ * SPU local store allocation routines
+ *
+ * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu.h>
+
+#include "spufs.h"
+
+static int spu_alloc_lscsa_std(struct spu_state *csa)
+{
+ struct spu_lscsa *lscsa;
+ unsigned char *p;
+
+ lscsa = vzalloc(sizeof(struct spu_lscsa));
+ if (!lscsa)
+ return -ENOMEM;
+ csa->lscsa = lscsa;
+
+ /* Set LS pages reserved to allow for user-space mapping. */
+ for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+ SetPageReserved(vmalloc_to_page(p));
+
+ return 0;
+}
+
+static void spu_free_lscsa_std(struct spu_state *csa)
+{
+ /* Clear reserved bit before vfree. */
+ unsigned char *p;
+
+ if (csa->lscsa == NULL)
+ return;
+
+ for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+ ClearPageReserved(vmalloc_to_page(p));
+
+ vfree(csa->lscsa);
+}
+
+#ifdef CONFIG_SPU_FS_64K_LS
+
+#define SPU_64K_PAGE_SHIFT 16
+#define SPU_64K_PAGE_ORDER (SPU_64K_PAGE_SHIFT - PAGE_SHIFT)
+#define SPU_64K_PAGE_COUNT (1ul << SPU_64K_PAGE_ORDER)
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+ struct page **pgarray;
+ unsigned char *p;
+ int i, j, n_4k;
+
+ /* Check availability of 64K pages */
+ if (!spu_64k_pages_available())
+ goto fail;
+
+ csa->use_big_pages = 1;
+
+ pr_debug("spu_alloc_lscsa(csa=0x%p), trying to allocate 64K pages\n",
+ csa);
+
+ /* First try to allocate our 64K pages. We need 5 of them
+ * with the current implementation. In the future, we should try
+ * to separate the lscsa with the actual local store image, thus
+ * allowing us to require only 4 64K pages per context
+ */
+ for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) {
+ /* XXX This is likely to fail, we should use a special pool
+ * similar to what hugetlbfs does.
+ */
+ csa->lscsa_pages[i] = alloc_pages(GFP_KERNEL,
+ SPU_64K_PAGE_ORDER);
+ if (csa->lscsa_pages[i] == NULL)
+ goto fail;
+ }
+
+ pr_debug(" success ! creating vmap...\n");
+
+ /* Now we need to create a vmalloc mapping of these for the kernel
+ * and SPU context switch code to use. Currently, we stick to a
+ * normal kernel vmalloc mapping, which in our case will be 4K
+ */
+ n_4k = SPU_64K_PAGE_COUNT * SPU_LSCSA_NUM_BIG_PAGES;
+ pgarray = kmalloc(sizeof(struct page *) * n_4k, GFP_KERNEL);
+ if (pgarray == NULL)
+ goto fail;
+ for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
+ for (j = 0; j < SPU_64K_PAGE_COUNT; j++)
+ /* We assume all the struct page's are contiguous
+ * which should be hopefully the case for an order 4
+ * allocation..
+ */
+ pgarray[i * SPU_64K_PAGE_COUNT + j] =
+ csa->lscsa_pages[i] + j;
+ csa->lscsa = vmap(pgarray, n_4k, VM_USERMAP, PAGE_KERNEL);
+ kfree(pgarray);
+ if (csa->lscsa == NULL)
+ goto fail;
+
+ memset(csa->lscsa, 0, sizeof(struct spu_lscsa));
+
+ /* Set LS pages reserved to allow for user-space mapping.
+ *
+ * XXX isn't that a bit obsolete ? I think we should just
+ * make sure the page count is high enough. Anyway, won't harm
+ * for now
+ */
+ for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+ SetPageReserved(vmalloc_to_page(p));
+
+ pr_debug(" all good !\n");
+
+ return 0;
+fail:
+ pr_debug("spufs: failed to allocate lscsa 64K pages, falling back\n");
+ spu_free_lscsa(csa);
+ return spu_alloc_lscsa_std(csa);
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+ unsigned char *p;
+ int i;
+
+ if (!csa->use_big_pages) {
+ spu_free_lscsa_std(csa);
+ return;
+ }
+ csa->use_big_pages = 0;
+
+ if (csa->lscsa == NULL)
+ goto free_pages;
+
+ for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+ ClearPageReserved(vmalloc_to_page(p));
+
+ vunmap(csa->lscsa);
+ csa->lscsa = NULL;
+
+ free_pages:
+
+ for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
+ if (csa->lscsa_pages[i])
+ __free_pages(csa->lscsa_pages[i], SPU_64K_PAGE_ORDER);
+}
+
+#else /* CONFIG_SPU_FS_64K_LS */
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+ return spu_alloc_lscsa_std(csa);
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+ spu_free_lscsa_std(csa);
+}
+
+#endif /* !defined(CONFIG_SPU_FS_64K_LS) */
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/run.c b/kernel/arch/powerpc/platforms/cell/spufs/run.c
new file mode 100644
index 000000000..4ddf769a6
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/run.c
@@ -0,0 +1,454 @@
+#define DEBUG
+
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+
+#include "spufs.h"
+
+/* interrupt-level stop callback function. */
+void spufs_stop_callback(struct spu *spu, int irq)
+{
+ struct spu_context *ctx = spu->ctx;
+
+ /*
+ * It should be impossible to preempt a context while an exception
+ * is being processed, since the context switch code is specially
+ * coded to deal with interrupts ... But, just in case, sanity check
+ * the context pointer. It is OK to return doing nothing since
+ * the exception will be regenerated when the context is resumed.
+ */
+ if (ctx) {
+ /* Copy exception arguments into module specific structure */
+ switch(irq) {
+ case 0 :
+ ctx->csa.class_0_pending = spu->class_0_pending;
+ ctx->csa.class_0_dar = spu->class_0_dar;
+ break;
+ case 1 :
+ ctx->csa.class_1_dsisr = spu->class_1_dsisr;
+ ctx->csa.class_1_dar = spu->class_1_dar;
+ break;
+ case 2 :
+ break;
+ }
+
+ /* ensure that the exception status has hit memory before a
+ * thread waiting on the context's stop queue is woken */
+ smp_wmb();
+
+ wake_up_all(&ctx->stop_wq);
+ }
+}
+
+int spu_stopped(struct spu_context *ctx, u32 *stat)
+{
+ u64 dsisr;
+ u32 stopped;
+
+ stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
+ SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+
+top:
+ *stat = ctx->ops->status_read(ctx);
+ if (*stat & stopped) {
+ /*
+ * If the spu hasn't finished stopping, we need to
+ * re-read the register to get the stopped value.
+ */
+ if (*stat & SPU_STATUS_RUNNING)
+ goto top;
+ return 1;
+ }
+
+ if (test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))
+ return 1;
+
+ dsisr = ctx->csa.class_1_dsisr;
+ if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))
+ return 1;
+
+ if (ctx->csa.class_0_pending)
+ return 1;
+
+ return 0;
+}
+
+static int spu_setup_isolated(struct spu_context *ctx)
+{
+ int ret;
+ u64 __iomem *mfc_cntl;
+ u64 sr1;
+ u32 status;
+ unsigned long timeout;
+ const u32 status_loading = SPU_STATUS_RUNNING
+ | SPU_STATUS_ISOLATED_STATE | SPU_STATUS_ISOLATED_LOAD_STATUS;
+
+ ret = -ENODEV;
+ if (!isolated_loader)
+ goto out;
+
+ /*
+ * We need to exclude userspace access to the context.
+ *
+ * To protect against memory access we invalidate all ptes
+ * and make sure the pagefault handlers block on the mutex.
+ */
+ spu_unmap_mappings(ctx);
+
+ mfc_cntl = &ctx->spu->priv2->mfc_control_RW;
+
+ /* purge the MFC DMA queue to ensure no spurious accesses before we
+ * enter kernel mode */
+ timeout = jiffies + HZ;
+ out_be64(mfc_cntl, MFC_CNTL_PURGE_DMA_REQUEST);
+ while ((in_be64(mfc_cntl) & MFC_CNTL_PURGE_DMA_STATUS_MASK)
+ != MFC_CNTL_PURGE_DMA_COMPLETE) {
+ if (time_after(jiffies, timeout)) {
+ printk(KERN_ERR "%s: timeout flushing MFC DMA queue\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+ cond_resched();
+ }
+
+ /* clear purge status */
+ out_be64(mfc_cntl, 0);
+
+ /* put the SPE in kernel mode to allow access to the loader */
+ sr1 = spu_mfc_sr1_get(ctx->spu);
+ sr1 &= ~MFC_STATE1_PROBLEM_STATE_MASK;
+ spu_mfc_sr1_set(ctx->spu, sr1);
+
+ /* start the loader */
+ ctx->ops->signal1_write(ctx, (unsigned long)isolated_loader >> 32);
+ ctx->ops->signal2_write(ctx,
+ (unsigned long)isolated_loader & 0xffffffff);
+
+ ctx->ops->runcntl_write(ctx,
+ SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+
+ ret = 0;
+ timeout = jiffies + HZ;
+ while (((status = ctx->ops->status_read(ctx)) & status_loading) ==
+ status_loading) {
+ if (time_after(jiffies, timeout)) {
+ printk(KERN_ERR "%s: timeout waiting for loader\n",
+ __func__);
+ ret = -EIO;
+ goto out_drop_priv;
+ }
+ cond_resched();
+ }
+
+ if (!(status & SPU_STATUS_RUNNING)) {
+ /* If isolated LOAD has failed: run SPU, we will get a stop-and
+ * signal later. */
+ pr_debug("%s: isolated LOAD failed\n", __func__);
+ ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+ ret = -EACCES;
+ goto out_drop_priv;
+ }
+
+ if (!(status & SPU_STATUS_ISOLATED_STATE)) {
+ /* This isn't allowed by the CBEA, but check anyway */
+ pr_debug("%s: SPU fell out of isolated mode?\n", __func__);
+ ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_STOP);
+ ret = -EINVAL;
+ goto out_drop_priv;
+ }
+
+out_drop_priv:
+ /* Finished accessing the loader. Drop kernel mode */
+ sr1 |= MFC_STATE1_PROBLEM_STATE_MASK;
+ spu_mfc_sr1_set(ctx->spu, sr1);
+
+out:
+ return ret;
+}
+
+static int spu_run_init(struct spu_context *ctx, u32 *npc)
+{
+ unsigned long runcntl = SPU_RUNCNTL_RUNNABLE;
+ int ret;
+
+ spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+ /*
+ * NOSCHED is synchronous scheduling with respect to the caller.
+ * The caller waits for the context to be loaded.
+ */
+ if (ctx->flags & SPU_CREATE_NOSCHED) {
+ if (ctx->state == SPU_STATE_SAVED) {
+ ret = spu_activate(ctx, 0);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /*
+ * Apply special setup as required.
+ */
+ if (ctx->flags & SPU_CREATE_ISOLATE) {
+ if (!(ctx->ops->status_read(ctx) & SPU_STATUS_ISOLATED_STATE)) {
+ ret = spu_setup_isolated(ctx);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * If userspace has set the runcntrl register (eg, to
+ * issue an isolated exit), we need to re-set it here
+ */
+ runcntl = ctx->ops->runcntl_read(ctx) &
+ (SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+ if (runcntl == 0)
+ runcntl = SPU_RUNCNTL_RUNNABLE;
+ } else {
+ unsigned long privcntl;
+
+ if (test_thread_flag(TIF_SINGLESTEP))
+ privcntl = SPU_PRIVCNTL_MODE_SINGLE_STEP;
+ else
+ privcntl = SPU_PRIVCNTL_MODE_NORMAL;
+
+ ctx->ops->privcntl_write(ctx, privcntl);
+ ctx->ops->npc_write(ctx, *npc);
+ }
+
+ ctx->ops->runcntl_write(ctx, runcntl);
+
+ if (ctx->flags & SPU_CREATE_NOSCHED) {
+ spuctx_switch_state(ctx, SPU_UTIL_USER);
+ } else {
+
+ if (ctx->state == SPU_STATE_SAVED) {
+ ret = spu_activate(ctx, 0);
+ if (ret)
+ return ret;
+ } else {
+ spuctx_switch_state(ctx, SPU_UTIL_USER);
+ }
+ }
+
+ set_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags);
+ return 0;
+}
+
+static int spu_run_fini(struct spu_context *ctx, u32 *npc,
+ u32 *status)
+{
+ int ret = 0;
+
+ spu_del_from_rq(ctx);
+
+ *status = ctx->ops->status_read(ctx);
+ *npc = ctx->ops->npc_read(ctx);
+
+ spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+ clear_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags);
+ spu_switch_log_notify(NULL, ctx, SWITCH_LOG_EXIT, *status);
+ spu_release(ctx);
+
+ if (signal_pending(current))
+ ret = -ERESTARTSYS;
+
+ return ret;
+}
+
+/*
+ * SPU syscall restarting is tricky because we violate the basic
+ * assumption that the signal handler is running on the interrupted
+ * thread. Here instead, the handler runs on PowerPC user space code,
+ * while the syscall was called from the SPU.
+ * This means we can only do a very rough approximation of POSIX
+ * signal semantics.
+ */
+static int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret,
+ unsigned int *npc)
+{
+ int ret;
+
+ switch (*spu_ret) {
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
+ /*
+ * Enter the regular syscall restarting for
+ * sys_spu_run, then restart the SPU syscall
+ * callback.
+ */
+ *npc -= 8;
+ ret = -ERESTARTSYS;
+ break;
+ case -ERESTARTNOHAND:
+ case -ERESTART_RESTARTBLOCK:
+ /*
+ * Restart block is too hard for now, just return -EINTR
+ * to the SPU.
+ * ERESTARTNOHAND comes from sys_pause, we also return
+ * -EINTR from there.
+ * Assume that we need to be restarted ourselves though.
+ */
+ *spu_ret = -EINTR;
+ ret = -ERESTARTSYS;
+ break;
+ default:
+ printk(KERN_WARNING "%s: unexpected return code %ld\n",
+ __func__, *spu_ret);
+ ret = 0;
+ }
+ return ret;
+}
+
+static int spu_process_callback(struct spu_context *ctx)
+{
+ struct spu_syscall_block s;
+ u32 ls_pointer, npc;
+ void __iomem *ls;
+ long spu_ret;
+ int ret;
+
+ /* get syscall block from local store */
+ npc = ctx->ops->npc_read(ctx) & ~3;
+ ls = (void __iomem *)ctx->ops->get_ls(ctx);
+ ls_pointer = in_be32(ls + npc);
+ if (ls_pointer > (LS_SIZE - sizeof(s)))
+ return -EFAULT;
+ memcpy_fromio(&s, ls + ls_pointer, sizeof(s));
+
+ /* do actual syscall without pinning the spu */
+ ret = 0;
+ spu_ret = -ENOSYS;
+ npc += 4;
+
+ if (s.nr_ret < __NR_syscalls) {
+ spu_release(ctx);
+ /* do actual system call from here */
+ spu_ret = spu_sys_callback(&s);
+ if (spu_ret <= -ERESTARTSYS) {
+ ret = spu_handle_restartsys(ctx, &spu_ret, &npc);
+ }
+ mutex_lock(&ctx->state_mutex);
+ if (ret == -ERESTARTSYS)
+ return ret;
+ }
+
+ /* need to re-get the ls, as it may have changed when we released the
+ * spu */
+ ls = (void __iomem *)ctx->ops->get_ls(ctx);
+
+ /* write result, jump over indirect pointer */
+ memcpy_toio(ls + ls_pointer, &spu_ret, sizeof(spu_ret));
+ ctx->ops->npc_write(ctx, npc);
+ ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+ return ret;
+}
+
+long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
+{
+ int ret;
+ struct spu *spu;
+ u32 status;
+
+ if (mutex_lock_interruptible(&ctx->run_mutex))
+ return -ERESTARTSYS;
+
+ ctx->event_return = 0;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ goto out_unlock;
+
+ spu_enable_spu(ctx);
+
+ spu_update_sched_info(ctx);
+
+ ret = spu_run_init(ctx, npc);
+ if (ret) {
+ spu_release(ctx);
+ goto out;
+ }
+
+ do {
+ ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
+ if (unlikely(ret)) {
+ /*
+ * This is nasty: we need the state_mutex for all the
+ * bookkeeping even if the syscall was interrupted by
+ * a signal. ewww.
+ */
+ mutex_lock(&ctx->state_mutex);
+ break;
+ }
+ spu = ctx->spu;
+ if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE,
+ &ctx->sched_flags))) {
+ if (!(status & SPU_STATUS_STOPPED_BY_STOP)) {
+ spu_switch_notify(spu, ctx);
+ continue;
+ }
+ }
+
+ spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+ if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+ (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) {
+ ret = spu_process_callback(ctx);
+ if (ret)
+ break;
+ status &= ~SPU_STATUS_STOPPED_BY_STOP;
+ }
+ ret = spufs_handle_class1(ctx);
+ if (ret)
+ break;
+
+ ret = spufs_handle_class0(ctx);
+ if (ret)
+ break;
+
+ if (signal_pending(current))
+ ret = -ERESTARTSYS;
+ } while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP |
+ SPU_STATUS_STOPPED_BY_HALT |
+ SPU_STATUS_SINGLE_STEP)));
+
+ spu_disable_spu(ctx);
+ ret = spu_run_fini(ctx, npc, &status);
+ spu_yield(ctx);
+
+ if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+ (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100))
+ ctx->stats.libassist++;
+
+ if ((ret == 0) ||
+ ((ret == -ERESTARTSYS) &&
+ ((status & SPU_STATUS_STOPPED_BY_HALT) ||
+ (status & SPU_STATUS_SINGLE_STEP) ||
+ ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+ (status >> SPU_STOP_STATUS_SHIFT != 0x2104)))))
+ ret = status;
+
+ /* Note: we don't need to force_sig SIGTRAP on single-step
+ * since we have TIF_SINGLESTEP set, thus the kernel will do
+ * it upon return from the syscall anyawy
+ */
+ if (unlikely(status & SPU_STATUS_SINGLE_STEP))
+ ret = -ERESTARTSYS;
+
+ else if (unlikely((status & SPU_STATUS_STOPPED_BY_STOP)
+ && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff)) {
+ force_sig(SIGTRAP, current);
+ ret = -ERESTARTSYS;
+ }
+
+out:
+ *event = ctx->event_return;
+out_unlock:
+ mutex_unlock(&ctx->run_mutex);
+ return ret;
+}
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/sched.c b/kernel/arch/powerpc/platforms/cell/spufs/sched.c
new file mode 100644
index 000000000..998f632e7
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/sched.c
@@ -0,0 +1,1172 @@
+/* sched.c - SPU scheduler.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * 2006-03-31 NUMA domains added.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/sched/rt.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/completion.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/numa.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+#include <linux/pid_namespace.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_priv1.h>
+#include "spufs.h"
+#define CREATE_TRACE_POINTS
+#include "sputrace.h"
+
+struct spu_prio_array {
+ DECLARE_BITMAP(bitmap, MAX_PRIO);
+ struct list_head runq[MAX_PRIO];
+ spinlock_t runq_lock;
+ int nr_waiting;
+};
+
+static unsigned long spu_avenrun[3];
+static struct spu_prio_array *spu_prio;
+static struct task_struct *spusched_task;
+static struct timer_list spusched_timer;
+static struct timer_list spuloadavg_timer;
+
+/*
+ * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
+ */
+#define NORMAL_PRIO 120
+
+/*
+ * Frequency of the spu scheduler tick. By default we do one SPU scheduler
+ * tick for every 10 CPU scheduler ticks.
+ */
+#define SPUSCHED_TICK (10)
+
+/*
+ * These are the 'tuning knobs' of the scheduler:
+ *
+ * Minimum timeslice is 5 msecs (or 1 spu scheduler tick, whichever is
+ * larger), default timeslice is 100 msecs, maximum timeslice is 800 msecs.
+ */
+#define MIN_SPU_TIMESLICE max(5 * HZ / (1000 * SPUSCHED_TICK), 1)
+#define DEF_SPU_TIMESLICE (100 * HZ / (1000 * SPUSCHED_TICK))
+
+#define SCALE_PRIO(x, prio) \
+ max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_SPU_TIMESLICE)
+
+/*
+ * scale user-nice values [ -20 ... 0 ... 19 ] to time slice values:
+ * [800ms ... 100ms ... 5ms]
+ *
+ * The higher a thread's priority, the bigger timeslices
+ * it gets during one round of execution. But even the lowest
+ * priority thread gets MIN_TIMESLICE worth of execution time.
+ */
+void spu_set_timeslice(struct spu_context *ctx)
+{
+ if (ctx->prio < NORMAL_PRIO)
+ ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE * 4, ctx->prio);
+ else
+ ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE, ctx->prio);
+}
+
+/*
+ * Update scheduling information from the owning thread.
+ */
+void __spu_update_sched_info(struct spu_context *ctx)
+{
+ /*
+ * assert that the context is not on the runqueue, so it is safe
+ * to change its scheduling parameters.
+ */
+ BUG_ON(!list_empty(&ctx->rq));
+
+ /*
+ * 32-Bit assignments are atomic on powerpc, and we don't care about
+ * memory ordering here because retrieving the controlling thread is
+ * per definition racy.
+ */
+ ctx->tid = current->pid;
+
+ /*
+ * We do our own priority calculations, so we normally want
+ * ->static_prio to start with. Unfortunately this field
+ * contains junk for threads with a realtime scheduling
+ * policy so we have to look at ->prio in this case.
+ */
+ if (rt_prio(current->prio))
+ ctx->prio = current->prio;
+ else
+ ctx->prio = current->static_prio;
+ ctx->policy = current->policy;
+
+ /*
+ * TO DO: the context may be loaded, so we may need to activate
+ * it again on a different node. But it shouldn't hurt anything
+ * to update its parameters, because we know that the scheduler
+ * is not actively looking at this field, since it is not on the
+ * runqueue. The context will be rescheduled on the proper node
+ * if it is timesliced or preempted.
+ */
+ cpumask_copy(&ctx->cpus_allowed, tsk_cpus_allowed(current));
+
+ /* Save the current cpu id for spu interrupt routing. */
+ ctx->last_ran = raw_smp_processor_id();
+}
+
+void spu_update_sched_info(struct spu_context *ctx)
+{
+ int node;
+
+ if (ctx->state == SPU_STATE_RUNNABLE) {
+ node = ctx->spu->node;
+
+ /*
+ * Take list_mutex to sync with find_victim().
+ */
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ __spu_update_sched_info(ctx);
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ } else {
+ __spu_update_sched_info(ctx);
+ }
+}
+
+static int __node_allowed(struct spu_context *ctx, int node)
+{
+ if (nr_cpus_node(node)) {
+ const struct cpumask *mask = cpumask_of_node(node);
+
+ if (cpumask_intersects(mask, &ctx->cpus_allowed))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int node_allowed(struct spu_context *ctx, int node)
+{
+ int rval;
+
+ spin_lock(&spu_prio->runq_lock);
+ rval = __node_allowed(ctx, node);
+ spin_unlock(&spu_prio->runq_lock);
+
+ return rval;
+}
+
+void do_notify_spus_active(void)
+{
+ int node;
+
+ /*
+ * Wake up the active spu_contexts.
+ *
+ * When the awakened processes see their "notify_active" flag is set,
+ * they will call spu_switch_notify().
+ */
+ for_each_online_node(node) {
+ struct spu *spu;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ if (spu->alloc_state != SPU_FREE) {
+ struct spu_context *ctx = spu->ctx;
+ set_bit(SPU_SCHED_NOTIFY_ACTIVE,
+ &ctx->sched_flags);
+ mb();
+ wake_up_all(&ctx->stop_wq);
+ }
+ }
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ }
+}
+
+/**
+ * spu_bind_context - bind spu context to physical spu
+ * @spu: physical spu to bind to
+ * @ctx: context to bind
+ */
+static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
+{
+ spu_context_trace(spu_bind_context__enter, ctx, spu);
+
+ spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+ if (ctx->flags & SPU_CREATE_NOSCHED)
+ atomic_inc(&cbe_spu_info[spu->node].reserved_spus);
+
+ ctx->stats.slb_flt_base = spu->stats.slb_flt;
+ ctx->stats.class2_intr_base = spu->stats.class2_intr;
+
+ spu_associate_mm(spu, ctx->owner);
+
+ spin_lock_irq(&spu->register_lock);
+ spu->ctx = ctx;
+ spu->flags = 0;
+ ctx->spu = spu;
+ ctx->ops = &spu_hw_ops;
+ spu->pid = current->pid;
+ spu->tgid = current->tgid;
+ spu->ibox_callback = spufs_ibox_callback;
+ spu->wbox_callback = spufs_wbox_callback;
+ spu->stop_callback = spufs_stop_callback;
+ spu->mfc_callback = spufs_mfc_callback;
+ spin_unlock_irq(&spu->register_lock);
+
+ spu_unmap_mappings(ctx);
+
+ spu_switch_log_notify(spu, ctx, SWITCH_LOG_START, 0);
+ spu_restore(&ctx->csa, spu);
+ spu->timestamp = jiffies;
+ spu_switch_notify(spu, ctx);
+ ctx->state = SPU_STATE_RUNNABLE;
+
+ spuctx_switch_state(ctx, SPU_UTIL_USER);
+}
+
+/*
+ * Must be used with the list_mutex held.
+ */
+static inline int sched_spu(struct spu *spu)
+{
+ BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
+
+ return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
+}
+
+static void aff_merge_remaining_ctxs(struct spu_gang *gang)
+{
+ struct spu_context *ctx;
+
+ list_for_each_entry(ctx, &gang->aff_list_head, aff_list) {
+ if (list_empty(&ctx->aff_list))
+ list_add(&ctx->aff_list, &gang->aff_list_head);
+ }
+ gang->aff_flags |= AFF_MERGED;
+}
+
+static void aff_set_offsets(struct spu_gang *gang)
+{
+ struct spu_context *ctx;
+ int offset;
+
+ offset = -1;
+ list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+ aff_list) {
+ if (&ctx->aff_list == &gang->aff_list_head)
+ break;
+ ctx->aff_offset = offset--;
+ }
+
+ offset = 0;
+ list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) {
+ if (&ctx->aff_list == &gang->aff_list_head)
+ break;
+ ctx->aff_offset = offset++;
+ }
+
+ gang->aff_flags |= AFF_OFFSETS_SET;
+}
+
+static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
+ int group_size, int lowest_offset)
+{
+ struct spu *spu;
+ int node, n;
+
+ /*
+ * TODO: A better algorithm could be used to find a good spu to be
+ * used as reference location for the ctxs chain.
+ */
+ node = cpu_to_node(raw_smp_processor_id());
+ for (n = 0; n < MAX_NUMNODES; n++, node++) {
+ /*
+ * "available_spus" counts how many spus are not potentially
+ * going to be used by other affinity gangs whose reference
+ * context is already in place. Although this code seeks to
+ * avoid having affinity gangs with a summed amount of
+ * contexts bigger than the amount of spus in the node,
+ * this may happen sporadically. In this case, available_spus
+ * becomes negative, which is harmless.
+ */
+ int available_spus;
+
+ node = (node < MAX_NUMNODES) ? node : 0;
+ if (!node_allowed(ctx, node))
+ continue;
+
+ available_spus = 0;
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ if (spu->ctx && spu->ctx->gang && !spu->ctx->aff_offset
+ && spu->ctx->gang->aff_ref_spu)
+ available_spus -= spu->ctx->gang->contexts;
+ available_spus++;
+ }
+ if (available_spus < ctx->gang->contexts) {
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ continue;
+ }
+
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ if ((!mem_aff || spu->has_mem_affinity) &&
+ sched_spu(spu)) {
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ return spu;
+ }
+ }
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ }
+ return NULL;
+}
+
+static void aff_set_ref_point_location(struct spu_gang *gang)
+{
+ int mem_aff, gs, lowest_offset;
+ struct spu_context *ctx;
+ struct spu *tmp;
+
+ mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM;
+ lowest_offset = 0;
+ gs = 0;
+
+ list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+ gs++;
+
+ list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+ aff_list) {
+ if (&ctx->aff_list == &gang->aff_list_head)
+ break;
+ lowest_offset = ctx->aff_offset;
+ }
+
+ gang->aff_ref_spu = aff_ref_location(gang->aff_ref_ctx, mem_aff, gs,
+ lowest_offset);
+}
+
+static struct spu *ctx_location(struct spu *ref, int offset, int node)
+{
+ struct spu *spu;
+
+ spu = NULL;
+ if (offset >= 0) {
+ list_for_each_entry(spu, ref->aff_list.prev, aff_list) {
+ BUG_ON(spu->node != node);
+ if (offset == 0)
+ break;
+ if (sched_spu(spu))
+ offset--;
+ }
+ } else {
+ list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) {
+ BUG_ON(spu->node != node);
+ if (offset == 0)
+ break;
+ if (sched_spu(spu))
+ offset++;
+ }
+ }
+
+ return spu;
+}
+
+/*
+ * affinity_check is called each time a context is going to be scheduled.
+ * It returns the spu ptr on which the context must run.
+ */
+static int has_affinity(struct spu_context *ctx)
+{
+ struct spu_gang *gang = ctx->gang;
+
+ if (list_empty(&ctx->aff_list))
+ return 0;
+
+ if (atomic_read(&ctx->gang->aff_sched_count) == 0)
+ ctx->gang->aff_ref_spu = NULL;
+
+ if (!gang->aff_ref_spu) {
+ if (!(gang->aff_flags & AFF_MERGED))
+ aff_merge_remaining_ctxs(gang);
+ if (!(gang->aff_flags & AFF_OFFSETS_SET))
+ aff_set_offsets(gang);
+ aff_set_ref_point_location(gang);
+ }
+
+ return gang->aff_ref_spu != NULL;
+}
+
+/**
+ * spu_unbind_context - unbind spu context from physical spu
+ * @spu: physical spu to unbind from
+ * @ctx: context to unbind
+ */
+static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
+{
+ u32 status;
+
+ spu_context_trace(spu_unbind_context__enter, ctx, spu);
+
+ spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+ if (spu->ctx->flags & SPU_CREATE_NOSCHED)
+ atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
+
+ if (ctx->gang)
+ /*
+ * If ctx->gang->aff_sched_count is positive, SPU affinity is
+ * being considered in this gang. Using atomic_dec_if_positive
+ * allow us to skip an explicit check for affinity in this gang
+ */
+ atomic_dec_if_positive(&ctx->gang->aff_sched_count);
+
+ spu_switch_notify(spu, NULL);
+ spu_unmap_mappings(ctx);
+ spu_save(&ctx->csa, spu);
+ spu_switch_log_notify(spu, ctx, SWITCH_LOG_STOP, 0);
+
+ spin_lock_irq(&spu->register_lock);
+ spu->timestamp = jiffies;
+ ctx->state = SPU_STATE_SAVED;
+ spu->ibox_callback = NULL;
+ spu->wbox_callback = NULL;
+ spu->stop_callback = NULL;
+ spu->mfc_callback = NULL;
+ spu->pid = 0;
+ spu->tgid = 0;
+ ctx->ops = &spu_backing_ops;
+ spu->flags = 0;
+ spu->ctx = NULL;
+ spin_unlock_irq(&spu->register_lock);
+
+ spu_associate_mm(spu, NULL);
+
+ ctx->stats.slb_flt +=
+ (spu->stats.slb_flt - ctx->stats.slb_flt_base);
+ ctx->stats.class2_intr +=
+ (spu->stats.class2_intr - ctx->stats.class2_intr_base);
+
+ /* This maps the underlying spu state to idle */
+ spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+ ctx->spu = NULL;
+
+ if (spu_stopped(ctx, &status))
+ wake_up_all(&ctx->stop_wq);
+}
+
+/**
+ * spu_add_to_rq - add a context to the runqueue
+ * @ctx: context to add
+ */
+static void __spu_add_to_rq(struct spu_context *ctx)
+{
+ /*
+ * Unfortunately this code path can be called from multiple threads
+ * on behalf of a single context due to the way the problem state
+ * mmap support works.
+ *
+ * Fortunately we need to wake up all these threads at the same time
+ * and can simply skip the runqueue addition for every but the first
+ * thread getting into this codepath.
+ *
+ * It's still quite hacky, and long-term we should proxy all other
+ * threads through the owner thread so that spu_run is in control
+ * of all the scheduling activity for a given context.
+ */
+ if (list_empty(&ctx->rq)) {
+ list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
+ set_bit(ctx->prio, spu_prio->bitmap);
+ if (!spu_prio->nr_waiting++)
+ mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+ }
+}
+
+static void spu_add_to_rq(struct spu_context *ctx)
+{
+ spin_lock(&spu_prio->runq_lock);
+ __spu_add_to_rq(ctx);
+ spin_unlock(&spu_prio->runq_lock);
+}
+
+static void __spu_del_from_rq(struct spu_context *ctx)
+{
+ int prio = ctx->prio;
+
+ if (!list_empty(&ctx->rq)) {
+ if (!--spu_prio->nr_waiting)
+ del_timer(&spusched_timer);
+ list_del_init(&ctx->rq);
+
+ if (list_empty(&spu_prio->runq[prio]))
+ clear_bit(prio, spu_prio->bitmap);
+ }
+}
+
+void spu_del_from_rq(struct spu_context *ctx)
+{
+ spin_lock(&spu_prio->runq_lock);
+ __spu_del_from_rq(ctx);
+ spin_unlock(&spu_prio->runq_lock);
+}
+
+static void spu_prio_wait(struct spu_context *ctx)
+{
+ DEFINE_WAIT(wait);
+
+ /*
+ * The caller must explicitly wait for a context to be loaded
+ * if the nosched flag is set. If NOSCHED is not set, the caller
+ * queues the context and waits for an spu event or error.
+ */
+ BUG_ON(!(ctx->flags & SPU_CREATE_NOSCHED));
+
+ spin_lock(&spu_prio->runq_lock);
+ prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
+ if (!signal_pending(current)) {
+ __spu_add_to_rq(ctx);
+ spin_unlock(&spu_prio->runq_lock);
+ mutex_unlock(&ctx->state_mutex);
+ schedule();
+ mutex_lock(&ctx->state_mutex);
+ spin_lock(&spu_prio->runq_lock);
+ __spu_del_from_rq(ctx);
+ }
+ spin_unlock(&spu_prio->runq_lock);
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&ctx->stop_wq, &wait);
+}
+
+static struct spu *spu_get_idle(struct spu_context *ctx)
+{
+ struct spu *spu, *aff_ref_spu;
+ int node, n;
+
+ spu_context_nospu_trace(spu_get_idle__enter, ctx);
+
+ if (ctx->gang) {
+ mutex_lock(&ctx->gang->aff_mutex);
+ if (has_affinity(ctx)) {
+ aff_ref_spu = ctx->gang->aff_ref_spu;
+ atomic_inc(&ctx->gang->aff_sched_count);
+ mutex_unlock(&ctx->gang->aff_mutex);
+ node = aff_ref_spu->node;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ spu = ctx_location(aff_ref_spu, ctx->aff_offset, node);
+ if (spu && spu->alloc_state == SPU_FREE)
+ goto found;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+ atomic_dec(&ctx->gang->aff_sched_count);
+ goto not_found;
+ }
+ mutex_unlock(&ctx->gang->aff_mutex);
+ }
+ node = cpu_to_node(raw_smp_processor_id());
+ for (n = 0; n < MAX_NUMNODES; n++, node++) {
+ node = (node < MAX_NUMNODES) ? node : 0;
+ if (!node_allowed(ctx, node))
+ continue;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ if (spu->alloc_state == SPU_FREE)
+ goto found;
+ }
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ }
+
+ not_found:
+ spu_context_nospu_trace(spu_get_idle__not_found, ctx);
+ return NULL;
+
+ found:
+ spu->alloc_state = SPU_USED;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ spu_context_trace(spu_get_idle__found, ctx, spu);
+ spu_init_channels(spu);
+ return spu;
+}
+
+/**
+ * find_victim - find a lower priority context to preempt
+ * @ctx: canidate context for running
+ *
+ * Returns the freed physical spu to run the new context on.
+ */
+static struct spu *find_victim(struct spu_context *ctx)
+{
+ struct spu_context *victim = NULL;
+ struct spu *spu;
+ int node, n;
+
+ spu_context_nospu_trace(spu_find_victim__enter, ctx);
+
+ /*
+ * Look for a possible preemption candidate on the local node first.
+ * If there is no candidate look at the other nodes. This isn't
+ * exactly fair, but so far the whole spu scheduler tries to keep
+ * a strong node affinity. We might want to fine-tune this in
+ * the future.
+ */
+ restart:
+ node = cpu_to_node(raw_smp_processor_id());
+ for (n = 0; n < MAX_NUMNODES; n++, node++) {
+ node = (node < MAX_NUMNODES) ? node : 0;
+ if (!node_allowed(ctx, node))
+ continue;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ struct spu_context *tmp = spu->ctx;
+
+ if (tmp && tmp->prio > ctx->prio &&
+ !(tmp->flags & SPU_CREATE_NOSCHED) &&
+ (!victim || tmp->prio > victim->prio)) {
+ victim = spu->ctx;
+ }
+ }
+ if (victim)
+ get_spu_context(victim);
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+ if (victim) {
+ /*
+ * This nests ctx->state_mutex, but we always lock
+ * higher priority contexts before lower priority
+ * ones, so this is safe until we introduce
+ * priority inheritance schemes.
+ *
+ * XXX if the highest priority context is locked,
+ * this can loop a long time. Might be better to
+ * look at another context or give up after X retries.
+ */
+ if (!mutex_trylock(&victim->state_mutex)) {
+ put_spu_context(victim);
+ victim = NULL;
+ goto restart;
+ }
+
+ spu = victim->spu;
+ if (!spu || victim->prio <= ctx->prio) {
+ /*
+ * This race can happen because we've dropped
+ * the active list mutex. Not a problem, just
+ * restart the search.
+ */
+ mutex_unlock(&victim->state_mutex);
+ put_spu_context(victim);
+ victim = NULL;
+ goto restart;
+ }
+
+ spu_context_trace(__spu_deactivate__unload, ctx, spu);
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ cbe_spu_info[node].nr_active--;
+ spu_unbind_context(spu, victim);
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+ victim->stats.invol_ctx_switch++;
+ spu->stats.invol_ctx_switch++;
+ if (test_bit(SPU_SCHED_SPU_RUN, &victim->sched_flags))
+ spu_add_to_rq(victim);
+
+ mutex_unlock(&victim->state_mutex);
+ put_spu_context(victim);
+
+ return spu;
+ }
+ }
+
+ return NULL;
+}
+
+static void __spu_schedule(struct spu *spu, struct spu_context *ctx)
+{
+ int node = spu->node;
+ int success = 0;
+
+ spu_set_timeslice(ctx);
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ if (spu->ctx == NULL) {
+ spu_bind_context(spu, ctx);
+ cbe_spu_info[node].nr_active++;
+ spu->alloc_state = SPU_USED;
+ success = 1;
+ }
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+ if (success)
+ wake_up_all(&ctx->run_wq);
+ else
+ spu_add_to_rq(ctx);
+}
+
+static void spu_schedule(struct spu *spu, struct spu_context *ctx)
+{
+ /* not a candidate for interruptible because it's called either
+ from the scheduler thread or from spu_deactivate */
+ mutex_lock(&ctx->state_mutex);
+ if (ctx->state == SPU_STATE_SAVED)
+ __spu_schedule(spu, ctx);
+ spu_release(ctx);
+}
+
+/**
+ * spu_unschedule - remove a context from a spu, and possibly release it.
+ * @spu: The SPU to unschedule from
+ * @ctx: The context currently scheduled on the SPU
+ * @free_spu Whether to free the SPU for other contexts
+ *
+ * Unbinds the context @ctx from the SPU @spu. If @free_spu is non-zero, the
+ * SPU is made available for other contexts (ie, may be returned by
+ * spu_get_idle). If this is zero, the caller is expected to schedule another
+ * context to this spu.
+ *
+ * Should be called with ctx->state_mutex held.
+ */
+static void spu_unschedule(struct spu *spu, struct spu_context *ctx,
+ int free_spu)
+{
+ int node = spu->node;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ cbe_spu_info[node].nr_active--;
+ if (free_spu)
+ spu->alloc_state = SPU_FREE;
+ spu_unbind_context(spu, ctx);
+ ctx->stats.invol_ctx_switch++;
+ spu->stats.invol_ctx_switch++;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+}
+
+/**
+ * spu_activate - find a free spu for a context and execute it
+ * @ctx: spu context to schedule
+ * @flags: flags (currently ignored)
+ *
+ * Tries to find a free spu to run @ctx. If no free spu is available
+ * add the context to the runqueue so it gets woken up once an spu
+ * is available.
+ */
+int spu_activate(struct spu_context *ctx, unsigned long flags)
+{
+ struct spu *spu;
+
+ /*
+ * If there are multiple threads waiting for a single context
+ * only one actually binds the context while the others will
+ * only be able to acquire the state_mutex once the context
+ * already is in runnable state.
+ */
+ if (ctx->spu)
+ return 0;
+
+spu_activate_top:
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+
+ spu = spu_get_idle(ctx);
+ /*
+ * If this is a realtime thread we try to get it running by
+ * preempting a lower priority thread.
+ */
+ if (!spu && rt_prio(ctx->prio))
+ spu = find_victim(ctx);
+ if (spu) {
+ unsigned long runcntl;
+
+ runcntl = ctx->ops->runcntl_read(ctx);
+ __spu_schedule(spu, ctx);
+ if (runcntl & SPU_RUNCNTL_RUNNABLE)
+ spuctx_switch_state(ctx, SPU_UTIL_USER);
+
+ return 0;
+ }
+
+ if (ctx->flags & SPU_CREATE_NOSCHED) {
+ spu_prio_wait(ctx);
+ goto spu_activate_top;
+ }
+
+ spu_add_to_rq(ctx);
+
+ return 0;
+}
+
+/**
+ * grab_runnable_context - try to find a runnable context
+ *
+ * Remove the highest priority context on the runqueue and return it
+ * to the caller. Returns %NULL if no runnable context was found.
+ */
+static struct spu_context *grab_runnable_context(int prio, int node)
+{
+ struct spu_context *ctx;
+ int best;
+
+ spin_lock(&spu_prio->runq_lock);
+ best = find_first_bit(spu_prio->bitmap, prio);
+ while (best < prio) {
+ struct list_head *rq = &spu_prio->runq[best];
+
+ list_for_each_entry(ctx, rq, rq) {
+ /* XXX(hch): check for affinity here as well */
+ if (__node_allowed(ctx, node)) {
+ __spu_del_from_rq(ctx);
+ goto found;
+ }
+ }
+ best++;
+ }
+ ctx = NULL;
+ found:
+ spin_unlock(&spu_prio->runq_lock);
+ return ctx;
+}
+
+static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_context *new = NULL;
+
+ if (spu) {
+ new = grab_runnable_context(max_prio, spu->node);
+ if (new || force) {
+ spu_unschedule(spu, ctx, new == NULL);
+ if (new) {
+ if (new->flags & SPU_CREATE_NOSCHED)
+ wake_up(&new->stop_wq);
+ else {
+ spu_release(ctx);
+ spu_schedule(spu, new);
+ /* this one can't easily be made
+ interruptible */
+ mutex_lock(&ctx->state_mutex);
+ }
+ }
+ }
+ }
+
+ return new != NULL;
+}
+
+/**
+ * spu_deactivate - unbind a context from it's physical spu
+ * @ctx: spu context to unbind
+ *
+ * Unbind @ctx from the physical spu it is running on and schedule
+ * the highest priority context to run on the freed physical spu.
+ */
+void spu_deactivate(struct spu_context *ctx)
+{
+ spu_context_nospu_trace(spu_deactivate__enter, ctx);
+ __spu_deactivate(ctx, 1, MAX_PRIO);
+}
+
+/**
+ * spu_yield - yield a physical spu if others are waiting
+ * @ctx: spu context to yield
+ *
+ * Check if there is a higher priority context waiting and if yes
+ * unbind @ctx from the physical spu and schedule the highest
+ * priority context to run on the freed physical spu instead.
+ */
+void spu_yield(struct spu_context *ctx)
+{
+ spu_context_nospu_trace(spu_yield__enter, ctx);
+ if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
+ mutex_lock(&ctx->state_mutex);
+ __spu_deactivate(ctx, 0, MAX_PRIO);
+ mutex_unlock(&ctx->state_mutex);
+ }
+}
+
+static noinline void spusched_tick(struct spu_context *ctx)
+{
+ struct spu_context *new = NULL;
+ struct spu *spu = NULL;
+
+ if (spu_acquire(ctx))
+ BUG(); /* a kernel thread never has signals pending */
+
+ if (ctx->state != SPU_STATE_RUNNABLE)
+ goto out;
+ if (ctx->flags & SPU_CREATE_NOSCHED)
+ goto out;
+ if (ctx->policy == SCHED_FIFO)
+ goto out;
+
+ if (--ctx->time_slice && test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+ goto out;
+
+ spu = ctx->spu;
+
+ spu_context_trace(spusched_tick__preempt, ctx, spu);
+
+ new = grab_runnable_context(ctx->prio + 1, spu->node);
+ if (new) {
+ spu_unschedule(spu, ctx, 0);
+ if (test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+ spu_add_to_rq(ctx);
+ } else {
+ spu_context_nospu_trace(spusched_tick__newslice, ctx);
+ if (!ctx->time_slice)
+ ctx->time_slice++;
+ }
+out:
+ spu_release(ctx);
+
+ if (new)
+ spu_schedule(spu, new);
+}
+
+/**
+ * count_active_contexts - count nr of active tasks
+ *
+ * Return the number of tasks currently running or waiting to run.
+ *
+ * Note that we don't take runq_lock / list_mutex here. Reading
+ * a single 32bit value is atomic on powerpc, and we don't care
+ * about memory ordering issues here.
+ */
+static unsigned long count_active_contexts(void)
+{
+ int nr_active = 0, node;
+
+ for (node = 0; node < MAX_NUMNODES; node++)
+ nr_active += cbe_spu_info[node].nr_active;
+ nr_active += spu_prio->nr_waiting;
+
+ return nr_active;
+}
+
+/**
+ * spu_calc_load - update the avenrun load estimates.
+ *
+ * No locking against reading these values from userspace, as for
+ * the CPU loadavg code.
+ */
+static void spu_calc_load(void)
+{
+ unsigned long active_tasks; /* fixed-point */
+
+ active_tasks = count_active_contexts() * FIXED_1;
+ CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks);
+ CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks);
+ CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks);
+}
+
+static void spusched_wake(unsigned long data)
+{
+ mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+ wake_up_process(spusched_task);
+}
+
+static void spuloadavg_wake(unsigned long data)
+{
+ mod_timer(&spuloadavg_timer, jiffies + LOAD_FREQ);
+ spu_calc_load();
+}
+
+static int spusched_thread(void *unused)
+{
+ struct spu *spu;
+ int node;
+
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ for (node = 0; node < MAX_NUMNODES; node++) {
+ struct mutex *mtx = &cbe_spu_info[node].list_mutex;
+
+ mutex_lock(mtx);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus,
+ cbe_list) {
+ struct spu_context *ctx = spu->ctx;
+
+ if (ctx) {
+ get_spu_context(ctx);
+ mutex_unlock(mtx);
+ spusched_tick(ctx);
+ mutex_lock(mtx);
+ put_spu_context(ctx);
+ }
+ }
+ mutex_unlock(mtx);
+ }
+ }
+
+ return 0;
+}
+
+void spuctx_switch_state(struct spu_context *ctx,
+ enum spu_utilization_state new_state)
+{
+ unsigned long long curtime;
+ signed long long delta;
+ struct spu *spu;
+ enum spu_utilization_state old_state;
+ int node;
+
+ curtime = ktime_get_ns();
+ delta = curtime - ctx->stats.tstamp;
+
+ WARN_ON(!mutex_is_locked(&ctx->state_mutex));
+ WARN_ON(delta < 0);
+
+ spu = ctx->spu;
+ old_state = ctx->stats.util_state;
+ ctx->stats.util_state = new_state;
+ ctx->stats.tstamp = curtime;
+
+ /*
+ * Update the physical SPU utilization statistics.
+ */
+ if (spu) {
+ ctx->stats.times[old_state] += delta;
+ spu->stats.times[old_state] += delta;
+ spu->stats.util_state = new_state;
+ spu->stats.tstamp = curtime;
+ node = spu->node;
+ if (old_state == SPU_UTIL_USER)
+ atomic_dec(&cbe_spu_info[node].busy_spus);
+ if (new_state == SPU_UTIL_USER)
+ atomic_inc(&cbe_spu_info[node].busy_spus);
+ }
+}
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+static int show_spu_loadavg(struct seq_file *s, void *private)
+{
+ int a, b, c;
+
+ a = spu_avenrun[0] + (FIXED_1/200);
+ b = spu_avenrun[1] + (FIXED_1/200);
+ c = spu_avenrun[2] + (FIXED_1/200);
+
+ /*
+ * Note that last_pid doesn't really make much sense for the
+ * SPU loadavg (it even seems very odd on the CPU side...),
+ * but we include it here to have a 100% compatible interface.
+ */
+ seq_printf(s, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
+ LOAD_INT(a), LOAD_FRAC(a),
+ LOAD_INT(b), LOAD_FRAC(b),
+ LOAD_INT(c), LOAD_FRAC(c),
+ count_active_contexts(),
+ atomic_read(&nr_spu_contexts),
+ task_active_pid_ns(current)->last_pid);
+ return 0;
+}
+
+static int spu_loadavg_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, show_spu_loadavg, NULL);
+}
+
+static const struct file_operations spu_loadavg_fops = {
+ .open = spu_loadavg_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+int __init spu_sched_init(void)
+{
+ struct proc_dir_entry *entry;
+ int err = -ENOMEM, i;
+
+ spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
+ if (!spu_prio)
+ goto out;
+
+ for (i = 0; i < MAX_PRIO; i++) {
+ INIT_LIST_HEAD(&spu_prio->runq[i]);
+ __clear_bit(i, spu_prio->bitmap);
+ }
+ spin_lock_init(&spu_prio->runq_lock);
+
+ setup_timer(&spusched_timer, spusched_wake, 0);
+ setup_timer(&spuloadavg_timer, spuloadavg_wake, 0);
+
+ spusched_task = kthread_run(spusched_thread, NULL, "spusched");
+ if (IS_ERR(spusched_task)) {
+ err = PTR_ERR(spusched_task);
+ goto out_free_spu_prio;
+ }
+
+ mod_timer(&spuloadavg_timer, 0);
+
+ entry = proc_create("spu_loadavg", 0, NULL, &spu_loadavg_fops);
+ if (!entry)
+ goto out_stop_kthread;
+
+ pr_debug("spusched: tick: %d, min ticks: %d, default ticks: %d\n",
+ SPUSCHED_TICK, MIN_SPU_TIMESLICE, DEF_SPU_TIMESLICE);
+ return 0;
+
+ out_stop_kthread:
+ kthread_stop(spusched_task);
+ out_free_spu_prio:
+ kfree(spu_prio);
+ out:
+ return err;
+}
+
+void spu_sched_exit(void)
+{
+ struct spu *spu;
+ int node;
+
+ remove_proc_entry("spu_loadavg", NULL);
+
+ del_timer_sync(&spusched_timer);
+ del_timer_sync(&spuloadavg_timer);
+ kthread_stop(spusched_task);
+
+ for (node = 0; node < MAX_NUMNODES; node++) {
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
+ if (spu->alloc_state != SPU_FREE)
+ spu->alloc_state = SPU_FREE;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ }
+ kfree(spu_prio);
+}
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/spu_restore.c b/kernel/arch/powerpc/platforms/cell/spufs/spu_restore.c
new file mode 100644
index 000000000..72c905f1e
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/spu_restore.c
@@ -0,0 +1,336 @@
+/*
+ * spu_restore.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * SPU-side context restore sequence outlined in
+ * Synergistic Processor Element Book IV
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+
+#ifndef LS_SIZE
+#define LS_SIZE 0x40000 /* 256K (in bytes) */
+#endif
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+#include <spu_intrinsics.h>
+#include <asm/spu_csa.h>
+#include "spu_utils.h"
+
+#define BR_INSTR 0x327fff80 /* br -4 */
+#define NOP_INSTR 0x40200000 /* nop */
+#define HEQ_INSTR 0x7b000000 /* heq $0, $0 */
+#define STOP_INSTR 0x00000000 /* stop 0x0 */
+#define ILLEGAL_INSTR 0x00800000 /* illegal instr */
+#define RESTORE_COMPLETE 0x00003ffc /* stop 0x3ffc */
+
+static inline void fetch_regs_from_mem(addr64 lscsa_ea)
+{
+ unsigned int ls = (unsigned int)&regs_spill[0];
+ unsigned int size = sizeof(regs_spill);
+ unsigned int tag_id = 0;
+ unsigned int cmd = 0x40; /* GET */
+
+ spu_writech(MFC_LSA, ls);
+ spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+ spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+ spu_writech(MFC_Size, size);
+ spu_writech(MFC_TagID, tag_id);
+ spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void restore_upper_240kb(addr64 lscsa_ea)
+{
+ unsigned int ls = 16384;
+ unsigned int list = (unsigned int)&dma_list[0];
+ unsigned int size = sizeof(dma_list);
+ unsigned int tag_id = 0;
+ unsigned int cmd = 0x44; /* GETL */
+
+ /* Restore, Step 4:
+ * Enqueue the GETL command (tag 0) to the MFC SPU command
+ * queue to transfer the upper 240 kb of LS from CSA.
+ */
+ spu_writech(MFC_LSA, ls);
+ spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+ spu_writech(MFC_EAL, list);
+ spu_writech(MFC_Size, size);
+ spu_writech(MFC_TagID, tag_id);
+ spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void restore_decr(void)
+{
+ unsigned int offset;
+ unsigned int decr_running;
+ unsigned int decr;
+
+ /* Restore, Step 6(moved):
+ * If the LSCSA "decrementer running" flag is set
+ * then write the SPU_WrDec channel with the
+ * decrementer value from LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(decr_status);
+ decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING;
+ if (decr_running) {
+ offset = LSCSA_QW_OFFSET(decr);
+ decr = regs_spill[offset].slot[0];
+ spu_writech(SPU_WrDec, decr);
+ }
+}
+
+static inline void write_ppu_mb(void)
+{
+ unsigned int offset;
+ unsigned int data;
+
+ /* Restore, Step 11:
+ * Write the MFC_WrOut_MB channel with the PPU_MB
+ * data from LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(ppu_mb);
+ data = regs_spill[offset].slot[0];
+ spu_writech(SPU_WrOutMbox, data);
+}
+
+static inline void write_ppuint_mb(void)
+{
+ unsigned int offset;
+ unsigned int data;
+
+ /* Restore, Step 12:
+ * Write the MFC_WrInt_MB channel with the PPUINT_MB
+ * data from LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(ppuint_mb);
+ data = regs_spill[offset].slot[0];
+ spu_writech(SPU_WrOutIntrMbox, data);
+}
+
+static inline void restore_fpcr(void)
+{
+ unsigned int offset;
+ vector unsigned int fpcr;
+
+ /* Restore, Step 13:
+ * Restore the floating-point status and control
+ * register from the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(fpcr);
+ fpcr = regs_spill[offset].v;
+ spu_mtfpscr(fpcr);
+}
+
+static inline void restore_srr0(void)
+{
+ unsigned int offset;
+ unsigned int srr0;
+
+ /* Restore, Step 14:
+ * Restore the SPU SRR0 data from the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(srr0);
+ srr0 = regs_spill[offset].slot[0];
+ spu_writech(SPU_WrSRR0, srr0);
+}
+
+static inline void restore_event_mask(void)
+{
+ unsigned int offset;
+ unsigned int event_mask;
+
+ /* Restore, Step 15:
+ * Restore the SPU_RdEventMsk data from the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(event_mask);
+ event_mask = regs_spill[offset].slot[0];
+ spu_writech(SPU_WrEventMask, event_mask);
+}
+
+static inline void restore_tag_mask(void)
+{
+ unsigned int offset;
+ unsigned int tag_mask;
+
+ /* Restore, Step 16:
+ * Restore the SPU_RdTagMsk data from the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(tag_mask);
+ tag_mask = regs_spill[offset].slot[0];
+ spu_writech(MFC_WrTagMask, tag_mask);
+}
+
+static inline void restore_complete(void)
+{
+ extern void exit_fini(void);
+ unsigned int *exit_instrs = (unsigned int *)exit_fini;
+ unsigned int offset;
+ unsigned int stopped_status;
+ unsigned int stopped_code;
+
+ /* Restore, Step 18:
+ * Issue a stop-and-signal instruction with
+ * "good context restore" signal value.
+ *
+ * Restore, Step 19:
+ * There may be additional instructions placed
+ * here by the PPE Sequence for SPU Context
+ * Restore in order to restore the correct
+ * "stopped state".
+ *
+ * This step is handled here by analyzing the
+ * LSCSA.stopped_status and then modifying the
+ * exit() function to behave appropriately.
+ */
+
+ offset = LSCSA_QW_OFFSET(stopped_status);
+ stopped_status = regs_spill[offset].slot[0];
+ stopped_code = regs_spill[offset].slot[1];
+
+ switch (stopped_status) {
+ case SPU_STOPPED_STATUS_P_I:
+ /* SPU_Status[P,I]=1. Add illegal instruction
+ * followed by stop-and-signal instruction after
+ * end of restore code.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = ILLEGAL_INSTR;
+ exit_instrs[2] = STOP_INSTR | stopped_code;
+ break;
+ case SPU_STOPPED_STATUS_P_H:
+ /* SPU_Status[P,H]=1. Add 'heq $0, $0' followed
+ * by stop-and-signal instruction after end of
+ * restore code.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = HEQ_INSTR;
+ exit_instrs[2] = STOP_INSTR | stopped_code;
+ break;
+ case SPU_STOPPED_STATUS_S_P:
+ /* SPU_Status[S,P]=1. Add nop instruction
+ * followed by 'br -4' after end of restore
+ * code.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = STOP_INSTR | stopped_code;
+ exit_instrs[2] = NOP_INSTR;
+ exit_instrs[3] = BR_INSTR;
+ break;
+ case SPU_STOPPED_STATUS_S_I:
+ /* SPU_Status[S,I]=1. Add illegal instruction
+ * followed by 'br -4' after end of restore code.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = ILLEGAL_INSTR;
+ exit_instrs[2] = NOP_INSTR;
+ exit_instrs[3] = BR_INSTR;
+ break;
+ case SPU_STOPPED_STATUS_I:
+ /* SPU_Status[I]=1. Add illegal instruction followed
+ * by infinite loop after end of restore sequence.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = ILLEGAL_INSTR;
+ exit_instrs[2] = NOP_INSTR;
+ exit_instrs[3] = BR_INSTR;
+ break;
+ case SPU_STOPPED_STATUS_S:
+ /* SPU_Status[S]=1. Add two 'nop' instructions. */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = NOP_INSTR;
+ exit_instrs[2] = NOP_INSTR;
+ exit_instrs[3] = BR_INSTR;
+ break;
+ case SPU_STOPPED_STATUS_H:
+ /* SPU_Status[H]=1. Add 'heq $0, $0' instruction
+ * after end of restore code.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = HEQ_INSTR;
+ exit_instrs[2] = NOP_INSTR;
+ exit_instrs[3] = BR_INSTR;
+ break;
+ case SPU_STOPPED_STATUS_P:
+ /* SPU_Status[P]=1. Add stop-and-signal instruction
+ * after end of restore code.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = STOP_INSTR | stopped_code;
+ break;
+ case SPU_STOPPED_STATUS_R:
+ /* SPU_Status[I,S,H,P,R]=0. Add infinite loop. */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = NOP_INSTR;
+ exit_instrs[2] = NOP_INSTR;
+ exit_instrs[3] = BR_INSTR;
+ break;
+ default:
+ /* SPU_Status[R]=1. No additional instructions. */
+ break;
+ }
+ spu_sync();
+}
+
+/**
+ * main - entry point for SPU-side context restore.
+ *
+ * This code deviates from the documented sequence in the
+ * following aspects:
+ *
+ * 1. The EA for LSCSA is passed from PPE in the
+ * signal notification channels.
+ * 2. The register spill area is pulled by SPU
+ * into LS, rather than pushed by PPE.
+ * 3. All 128 registers are restored by exit().
+ * 4. The exit() function is modified at run
+ * time in order to properly restore the
+ * SPU_Status register.
+ */
+int main()
+{
+ addr64 lscsa_ea;
+
+ lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1);
+ lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2);
+ fetch_regs_from_mem(lscsa_ea);
+
+ set_event_mask(); /* Step 1. */
+ set_tag_mask(); /* Step 2. */
+ build_dma_list(lscsa_ea); /* Step 3. */
+ restore_upper_240kb(lscsa_ea); /* Step 4. */
+ /* Step 5: done by 'exit'. */
+ enqueue_putllc(lscsa_ea); /* Step 7. */
+ set_tag_update(); /* Step 8. */
+ read_tag_status(); /* Step 9. */
+ restore_decr(); /* moved Step 6. */
+ read_llar_status(); /* Step 10. */
+ write_ppu_mb(); /* Step 11. */
+ write_ppuint_mb(); /* Step 12. */
+ restore_fpcr(); /* Step 13. */
+ restore_srr0(); /* Step 14. */
+ restore_event_mask(); /* Step 15. */
+ restore_tag_mask(); /* Step 16. */
+ /* Step 17. done by 'exit'. */
+ restore_complete(); /* Step 18. */
+
+ return 0;
+}
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S b/kernel/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S
new file mode 100644
index 000000000..2905949de
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S
@@ -0,0 +1,116 @@
+/*
+ * crt0_r.S: Entry function for SPU-side context restore.
+ *
+ * Copyright (C) 2005 IBM
+ *
+ * Entry and exit function for SPU-side of the context restore
+ * sequence. Sets up an initial stack frame, then branches to
+ * 'main'. On return, restores all 128 registers from the LSCSA
+ * and exits.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <asm/spu_csa.h>
+
+.data
+.align 7
+.globl regs_spill
+regs_spill:
+.space SIZEOF_SPU_SPILL_REGS, 0x0
+
+.text
+.global _start
+_start:
+ /* Initialize the stack pointer to point to 16368
+ * (16kb-16). The back chain pointer is initialized
+ * to NULL.
+ */
+ il $0, 0
+ il $SP, 16368
+ stqd $0, 0($SP)
+
+ /* Allocate a minimum stack frame for the called main.
+ * This is needed so that main has a place to save the
+ * link register when it calls another function.
+ */
+ stqd $SP, -160($SP)
+ ai $SP, $SP, -160
+
+ /* Call the program's main function. */
+ brsl $0, main
+
+.global exit
+.global _exit
+exit:
+_exit:
+ /* SPU Context Restore, Step 5: Restore the remaining 112 GPRs. */
+ ila $3, regs_spill + 256
+restore_regs:
+ lqr $4, restore_reg_insts
+restore_reg_loop:
+ ai $4, $4, 4
+ .balignl 16, 0x40200000
+restore_reg_insts: /* must be quad-word aligned. */
+ lqd $16, 0($3)
+ lqd $17, 16($3)
+ lqd $18, 32($3)
+ lqd $19, 48($3)
+ andi $5, $4, 0x7F
+ stqr $4, restore_reg_insts
+ ai $3, $3, 64
+ brnz $5, restore_reg_loop
+
+ /* SPU Context Restore Step 17: Restore the first 16 GPRs. */
+ lqa $0, regs_spill + 0
+ lqa $1, regs_spill + 16
+ lqa $2, regs_spill + 32
+ lqa $3, regs_spill + 48
+ lqa $4, regs_spill + 64
+ lqa $5, regs_spill + 80
+ lqa $6, regs_spill + 96
+ lqa $7, regs_spill + 112
+ lqa $8, regs_spill + 128
+ lqa $9, regs_spill + 144
+ lqa $10, regs_spill + 160
+ lqa $11, regs_spill + 176
+ lqa $12, regs_spill + 192
+ lqa $13, regs_spill + 208
+ lqa $14, regs_spill + 224
+ lqa $15, regs_spill + 240
+
+ /* Under normal circumstances, the 'exit' function
+ * terminates with 'stop SPU_RESTORE_COMPLETE',
+ * indicating that the SPU-side restore code has
+ * completed.
+ *
+ * However it is possible that instructions immediately
+ * following the 'stop 0x3ffc' have been modified at run
+ * time so as to recreate the exact SPU_Status settings
+ * from the application, e.g. illegal instruciton, halt,
+ * etc.
+ */
+.global exit_fini
+.global _exit_fini
+exit_fini:
+_exit_fini:
+ stop SPU_RESTORE_COMPLETE
+ stop 0
+ stop 0
+ stop 0
+
+ /* Pad the size of this crt0.o to be multiple of 16 bytes. */
+.balignl 16, 0x0
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped b/kernel/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
new file mode 100644
index 000000000..f383b027e
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
@@ -0,0 +1,935 @@
+/*
+ * spu_restore_dump.h: Copyright (C) 2005 IBM.
+ * Hex-dump auto generated from spu_restore.c.
+ * Do not edit!
+ */
+static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = {
+0x40800000,
+0x409ff801,
+0x24000080,
+0x24fd8081,
+0x1cd80081,
+0x33001180,
+0x42034003,
+0x33800284,
+0x1c010204,
+0x40200000,
+0x40200000,
+0x40200000,
+0x34000190,
+0x34004191,
+0x34008192,
+0x3400c193,
+0x141fc205,
+0x23fffd84,
+0x1c100183,
+0x217ffa85,
+0x3080b000,
+0x3080b201,
+0x3080b402,
+0x3080b603,
+0x3080b804,
+0x3080ba05,
+0x3080bc06,
+0x3080be07,
+0x3080c008,
+0x3080c209,
+0x3080c40a,
+0x3080c60b,
+0x3080c80c,
+0x3080ca0d,
+0x3080cc0e,
+0x3080ce0f,
+0x00003ffc,
+0x00000000,
+0x00000000,
+0x00000000,
+0x01a00182,
+0x3ec00083,
+0xb0a14103,
+0x01a00204,
+0x3ec10083,
+0x4202c002,
+0xb0a14203,
+0x21a00802,
+0x3fbf028a,
+0x3f20050a,
+0x3fbe0502,
+0x3fe30102,
+0x21a00882,
+0x3f82028b,
+0x3fe3058b,
+0x3fbf0584,
+0x3f200204,
+0x3fbe0204,
+0x3fe30204,
+0x04000203,
+0x21a00903,
+0x40848002,
+0x21a00982,
+0x40800003,
+0x21a00a03,
+0x40802002,
+0x21a00a82,
+0x21a00083,
+0x40800082,
+0x21a00b02,
+0x10002612,
+0x42a00003,
+0x42074006,
+0x1800c204,
+0x40a00008,
+0x40800789,
+0x1c010305,
+0x34000302,
+0x1cffc489,
+0x3ec00303,
+0x3ec00287,
+0xb0408403,
+0x24000302,
+0x34000282,
+0x1c020306,
+0xb0408207,
+0x18020204,
+0x24000282,
+0x217ffa09,
+0x04000402,
+0x21a00802,
+0x3fbe0504,
+0x3fe30204,
+0x21a00884,
+0x42074002,
+0x21a00902,
+0x40803c03,
+0x21a00983,
+0x04000485,
+0x21a00a05,
+0x40802202,
+0x21a00a82,
+0x21a00805,
+0x21a00884,
+0x3fbf0582,
+0x3f200102,
+0x3fbe0102,
+0x3fe30102,
+0x21a00902,
+0x40804003,
+0x21a00983,
+0x21a00a05,
+0x40805a02,
+0x21a00a82,
+0x40800083,
+0x21a00b83,
+0x01a00c02,
+0x30809c03,
+0x34000182,
+0x14004102,
+0x21002082,
+0x01a00d82,
+0x3080a003,
+0x34000182,
+0x21a00e02,
+0x3080a203,
+0x34000182,
+0x21a00f02,
+0x3080a403,
+0x34000182,
+0x77400100,
+0x3080a603,
+0x34000182,
+0x21a00702,
+0x3080a803,
+0x34000182,
+0x21a00082,
+0x3080aa03,
+0x34000182,
+0x21a00b02,
+0x4020007f,
+0x3080ae02,
+0x42004805,
+0x3080ac04,
+0x34000103,
+0x34000202,
+0x1cffc183,
+0x3b810106,
+0x0f608184,
+0x42013802,
+0x5c020183,
+0x38810102,
+0x3b810102,
+0x21000e83,
+0x4020007f,
+0x35000100,
+0x00000470,
+0x000002f8,
+0x00000430,
+0x00000360,
+0x000002f8,
+0x000003c8,
+0x000004a8,
+0x00000298,
+0x00000360,
+0x00200000,
+0x409ffe02,
+0x30801203,
+0x40800208,
+0x3ec40084,
+0x40800407,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x38820282,
+0x41004003,
+0xb0408189,
+0x28820282,
+0x3881c282,
+0xb0408304,
+0x2881c282,
+0x00400000,
+0x40800003,
+0x35000000,
+0x30809e03,
+0x34000182,
+0x21a00382,
+0x4020007f,
+0x327fde00,
+0x409ffe02,
+0x30801203,
+0x40800206,
+0x3ec40084,
+0x40800407,
+0x40800608,
+0x3ac1828a,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x38818282,
+0x41004003,
+0xb040818a,
+0x10005b0b,
+0x41201003,
+0x28818282,
+0x3881c282,
+0xb0408184,
+0x41193f83,
+0x60ffc003,
+0x2881c282,
+0x38820282,
+0xb0408189,
+0x28820282,
+0x327fef80,
+0x409ffe02,
+0x30801203,
+0x40800207,
+0x3ec40086,
+0x4120100b,
+0x10005b14,
+0x40800404,
+0x3ac1c289,
+0x40800608,
+0xb060c106,
+0x3ac10286,
+0x3ac2028a,
+0x20801203,
+0x3881c282,
+0x41193f83,
+0x60ffc003,
+0xb0408589,
+0x2881c282,
+0x38810282,
+0xb0408586,
+0x28810282,
+0x38820282,
+0xb040818a,
+0x28820282,
+0x4020007f,
+0x327fe280,
+0x409ffe02,
+0x30801203,
+0x40800207,
+0x3ec40084,
+0x40800408,
+0x10005b14,
+0x40800609,
+0x3ac1c28a,
+0x3ac2028b,
+0xb060c104,
+0x3ac24284,
+0x20801203,
+0x41201003,
+0x3881c282,
+0xb040830a,
+0x2881c282,
+0x38820282,
+0xb040818b,
+0x41193f83,
+0x60ffc003,
+0x28820282,
+0x38824282,
+0xb0408184,
+0x28824282,
+0x4020007f,
+0x327fd580,
+0x409ffe02,
+0x1000658e,
+0x40800206,
+0x30801203,
+0x40800407,
+0x3ec40084,
+0x40800608,
+0x3ac1828a,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x413d8003,
+0x38818282,
+0x4020007f,
+0x327fd800,
+0x409ffe03,
+0x30801202,
+0x40800207,
+0x3ec40084,
+0x10005b09,
+0x3ac1c288,
+0xb0408184,
+0x4020007f,
+0x4020007f,
+0x20801202,
+0x3881c282,
+0xb0408308,
+0x2881c282,
+0x327fc680,
+0x409ffe02,
+0x1000588b,
+0x40800208,
+0x30801203,
+0x40800407,
+0x3ec40084,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x413d8003,
+0x38820282,
+0x327fbd80,
+0x00200000,
+0x00000da0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000d90,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000db0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000dc0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000d80,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000df0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000de0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000dd0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000e04,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000e00,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+};
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/spu_save.c b/kernel/arch/powerpc/platforms/cell/spufs/spu_save.c
new file mode 100644
index 000000000..ae95cc170
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/spu_save.c
@@ -0,0 +1,195 @@
+/*
+ * spu_save.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * SPU-side context save sequence outlined in
+ * Synergistic Processor Element Book IV
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+
+#ifndef LS_SIZE
+#define LS_SIZE 0x40000 /* 256K (in bytes) */
+#endif
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+#include <spu_intrinsics.h>
+#include <asm/spu_csa.h>
+#include "spu_utils.h"
+
+static inline void save_event_mask(void)
+{
+ unsigned int offset;
+
+ /* Save, Step 2:
+ * Read the SPU_RdEventMsk channel and save to the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(event_mask);
+ regs_spill[offset].slot[0] = spu_readch(SPU_RdEventMask);
+}
+
+static inline void save_tag_mask(void)
+{
+ unsigned int offset;
+
+ /* Save, Step 3:
+ * Read the SPU_RdTagMsk channel and save to the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(tag_mask);
+ regs_spill[offset].slot[0] = spu_readch(MFC_RdTagMask);
+}
+
+static inline void save_upper_240kb(addr64 lscsa_ea)
+{
+ unsigned int ls = 16384;
+ unsigned int list = (unsigned int)&dma_list[0];
+ unsigned int size = sizeof(dma_list);
+ unsigned int tag_id = 0;
+ unsigned int cmd = 0x24; /* PUTL */
+
+ /* Save, Step 7:
+ * Enqueue the PUTL command (tag 0) to the MFC SPU command
+ * queue to transfer the remaining 240 kb of LS to CSA.
+ */
+ spu_writech(MFC_LSA, ls);
+ spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+ spu_writech(MFC_EAL, list);
+ spu_writech(MFC_Size, size);
+ spu_writech(MFC_TagID, tag_id);
+ spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void save_fpcr(void)
+{
+ // vector unsigned int fpcr;
+ unsigned int offset;
+
+ /* Save, Step 9:
+ * Issue the floating-point status and control register
+ * read instruction, and save to the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(fpcr);
+ regs_spill[offset].v = spu_mffpscr();
+}
+
+static inline void save_decr(void)
+{
+ unsigned int offset;
+
+ /* Save, Step 10:
+ * Read and save the SPU_RdDec channel data to
+ * the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(decr);
+ regs_spill[offset].slot[0] = spu_readch(SPU_RdDec);
+}
+
+static inline void save_srr0(void)
+{
+ unsigned int offset;
+
+ /* Save, Step 11:
+ * Read and save the SPU_WSRR0 channel data to
+ * the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(srr0);
+ regs_spill[offset].slot[0] = spu_readch(SPU_RdSRR0);
+}
+
+static inline void spill_regs_to_mem(addr64 lscsa_ea)
+{
+ unsigned int ls = (unsigned int)&regs_spill[0];
+ unsigned int size = sizeof(regs_spill);
+ unsigned int tag_id = 0;
+ unsigned int cmd = 0x20; /* PUT */
+
+ /* Save, Step 13:
+ * Enqueue a PUT command (tag 0) to send the LSCSA
+ * to the CSA.
+ */
+ spu_writech(MFC_LSA, ls);
+ spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+ spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+ spu_writech(MFC_Size, size);
+ spu_writech(MFC_TagID, tag_id);
+ spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void enqueue_sync(addr64 lscsa_ea)
+{
+ unsigned int tag_id = 0;
+ unsigned int cmd = 0xCC;
+
+ /* Save, Step 14:
+ * Enqueue an MFC_SYNC command (tag 0).
+ */
+ spu_writech(MFC_TagID, tag_id);
+ spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void save_complete(void)
+{
+ /* Save, Step 18:
+ * Issue a stop-and-signal instruction indicating
+ * "save complete". Note: This function will not
+ * return!!
+ */
+ spu_stop(SPU_SAVE_COMPLETE);
+}
+
+/**
+ * main - entry point for SPU-side context save.
+ *
+ * This code deviates from the documented sequence as follows:
+ *
+ * 1. The EA for LSCSA is passed from PPE in the
+ * signal notification channels.
+ * 2. All 128 registers are saved by crt0.o.
+ */
+int main()
+{
+ addr64 lscsa_ea;
+
+ lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1);
+ lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2);
+
+ /* Step 1: done by exit(). */
+ save_event_mask(); /* Step 2. */
+ save_tag_mask(); /* Step 3. */
+ set_event_mask(); /* Step 4. */
+ set_tag_mask(); /* Step 5. */
+ build_dma_list(lscsa_ea); /* Step 6. */
+ save_upper_240kb(lscsa_ea); /* Step 7. */
+ /* Step 8: done by exit(). */
+ save_fpcr(); /* Step 9. */
+ save_decr(); /* Step 10. */
+ save_srr0(); /* Step 11. */
+ enqueue_putllc(lscsa_ea); /* Step 12. */
+ spill_regs_to_mem(lscsa_ea); /* Step 13. */
+ enqueue_sync(lscsa_ea); /* Step 14. */
+ set_tag_update(); /* Step 15. */
+ read_tag_status(); /* Step 16. */
+ read_llar_status(); /* Step 17. */
+ save_complete(); /* Step 18. */
+
+ return 0;
+}
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S b/kernel/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S
new file mode 100644
index 000000000..6659d6a66
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S
@@ -0,0 +1,102 @@
+/*
+ * crt0_s.S: Entry function for SPU-side context save.
+ *
+ * Copyright (C) 2005 IBM
+ *
+ * Entry function for SPU-side of the context save sequence.
+ * Saves all 128 GPRs, sets up an initial stack frame, then
+ * branches to 'main'.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <asm/spu_csa.h>
+
+.data
+.align 7
+.globl regs_spill
+regs_spill:
+.space SIZEOF_SPU_SPILL_REGS, 0x0
+
+.text
+.global _start
+_start:
+ /* SPU Context Save Step 1: Save the first 16 GPRs. */
+ stqa $0, regs_spill + 0
+ stqa $1, regs_spill + 16
+ stqa $2, regs_spill + 32
+ stqa $3, regs_spill + 48
+ stqa $4, regs_spill + 64
+ stqa $5, regs_spill + 80
+ stqa $6, regs_spill + 96
+ stqa $7, regs_spill + 112
+ stqa $8, regs_spill + 128
+ stqa $9, regs_spill + 144
+ stqa $10, regs_spill + 160
+ stqa $11, regs_spill + 176
+ stqa $12, regs_spill + 192
+ stqa $13, regs_spill + 208
+ stqa $14, regs_spill + 224
+ stqa $15, regs_spill + 240
+
+ /* SPU Context Save, Step 8: Save the remaining 112 GPRs. */
+ ila $3, regs_spill + 256
+save_regs:
+ lqr $4, save_reg_insts
+save_reg_loop:
+ ai $4, $4, 4
+ .balignl 16, 0x40200000
+save_reg_insts: /* must be quad-word aligned. */
+ stqd $16, 0($3)
+ stqd $17, 16($3)
+ stqd $18, 32($3)
+ stqd $19, 48($3)
+ andi $5, $4, 0x7F
+ stqr $4, save_reg_insts
+ ai $3, $3, 64
+ brnz $5, save_reg_loop
+
+ /* Initialize the stack pointer to point to 16368
+ * (16kb-16). The back chain pointer is initialized
+ * to NULL.
+ */
+ il $0, 0
+ il $SP, 16368
+ stqd $0, 0($SP)
+
+ /* Allocate a minimum stack frame for the called main.
+ * This is needed so that main has a place to save the
+ * link register when it calls another function.
+ */
+ stqd $SP, -160($SP)
+ ai $SP, $SP, -160
+
+ /* Call the program's main function. */
+ brsl $0, main
+
+ /* In this case main should not return; if it does
+ * there has been an error in the sequence. Execute
+ * stop-and-signal with code=0.
+ */
+.global exit
+.global _exit
+exit:
+_exit:
+ stop 0x0
+
+ /* Pad the size of this crt0.o to be multiple of 16 bytes. */
+.balignl 16, 0x0
+
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped b/kernel/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped
new file mode 100644
index 000000000..b9f81ac8a
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped
@@ -0,0 +1,743 @@
+/*
+ * spu_save_dump.h: Copyright (C) 2005 IBM.
+ * Hex-dump auto generated from spu_save.c.
+ * Do not edit!
+ */
+static unsigned int spu_save_code[] __attribute__((__aligned__(128))) = {
+0x20805000,
+0x20805201,
+0x20805402,
+0x20805603,
+0x20805804,
+0x20805a05,
+0x20805c06,
+0x20805e07,
+0x20806008,
+0x20806209,
+0x2080640a,
+0x2080660b,
+0x2080680c,
+0x20806a0d,
+0x20806c0e,
+0x20806e0f,
+0x4201c003,
+0x33800184,
+0x1c010204,
+0x40200000,
+0x24000190,
+0x24004191,
+0x24008192,
+0x2400c193,
+0x141fc205,
+0x23fffd84,
+0x1c100183,
+0x217ffb85,
+0x40800000,
+0x409ff801,
+0x24000080,
+0x24fd8081,
+0x1cd80081,
+0x33000180,
+0x00000000,
+0x00000000,
+0x01a00182,
+0x3ec00083,
+0xb1c38103,
+0x01a00204,
+0x3ec10082,
+0x4201400d,
+0xb1c38202,
+0x01a00583,
+0x34218682,
+0x3ed80684,
+0xb0408184,
+0x24218682,
+0x01a00603,
+0x00200000,
+0x34214682,
+0x3ed40684,
+0xb0408184,
+0x40800003,
+0x24214682,
+0x21a00083,
+0x40800082,
+0x21a00b02,
+0x4020007f,
+0x1000251e,
+0x42a00002,
+0x32800008,
+0x4205c00c,
+0x00200000,
+0x40a0000b,
+0x3f82070f,
+0x4080020a,
+0x40800709,
+0x3fe3078f,
+0x3fbf0783,
+0x3f200183,
+0x3fbe0183,
+0x3fe30187,
+0x18008387,
+0x4205c002,
+0x3ac30404,
+0x1cffc489,
+0x00200000,
+0x18008403,
+0x38830402,
+0x4cffc486,
+0x3ac28185,
+0xb0408584,
+0x28830402,
+0x1c020408,
+0x38828182,
+0xb0408385,
+0x1802c387,
+0x28828182,
+0x217ff886,
+0x04000582,
+0x32800007,
+0x21a00802,
+0x3fbf0705,
+0x3f200285,
+0x3fbe0285,
+0x3fe30285,
+0x21a00885,
+0x04000603,
+0x21a00903,
+0x40803c02,
+0x21a00982,
+0x04000386,
+0x21a00a06,
+0x40801202,
+0x21a00a82,
+0x73000003,
+0x24200683,
+0x01a00404,
+0x00200000,
+0x34204682,
+0x3ec40683,
+0xb0408203,
+0x24204682,
+0x01a00783,
+0x00200000,
+0x3421c682,
+0x3edc0684,
+0xb0408184,
+0x2421c682,
+0x21a00806,
+0x21a00885,
+0x3fbf0784,
+0x3f200204,
+0x3fbe0204,
+0x3fe30204,
+0x21a00904,
+0x40804002,
+0x21a00982,
+0x21a00a06,
+0x40805a02,
+0x21a00a82,
+0x04000683,
+0x21a00803,
+0x21a00885,
+0x21a00904,
+0x40848002,
+0x21a00982,
+0x21a00a06,
+0x40801002,
+0x21a00a82,
+0x21a00a06,
+0x40806602,
+0x00200000,
+0x35800009,
+0x21a00a82,
+0x40800083,
+0x21a00b83,
+0x01a00c02,
+0x01a00d83,
+0x00003ffb,
+0x40800003,
+0x4020007f,
+0x35000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+};
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/spu_utils.h b/kernel/arch/powerpc/platforms/cell/spufs/spu_utils.h
new file mode 100644
index 000000000..58359feb6
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/spu_utils.h
@@ -0,0 +1,160 @@
+/*
+ * utils.h: Utilities for SPU-side of the context switch operation.
+ *
+ * (C) Copyright IBM 2005
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _SPU_CONTEXT_UTILS_H_
+#define _SPU_CONTEXT_UTILS_H_
+
+/*
+ * 64-bit safe EA.
+ */
+typedef union {
+ unsigned long long ull;
+ unsigned int ui[2];
+} addr64;
+
+/*
+ * 128-bit register template.
+ */
+typedef union {
+ unsigned int slot[4];
+ vector unsigned int v;
+} spu_reg128v;
+
+/*
+ * DMA list structure.
+ */
+struct dma_list_elem {
+ unsigned int size;
+ unsigned int ea_low;
+};
+
+/*
+ * Declare storage for 8-byte aligned DMA list.
+ */
+struct dma_list_elem dma_list[15] __attribute__ ((aligned(8)));
+
+/*
+ * External definition for storage
+ * declared in crt0.
+ */
+extern spu_reg128v regs_spill[NR_SPU_SPILL_REGS];
+
+/*
+ * Compute LSCSA byte offset for a given field.
+ */
+static struct spu_lscsa *dummy = (struct spu_lscsa *)0;
+#define LSCSA_BYTE_OFFSET(_field) \
+ ((char *)(&(dummy->_field)) - (char *)(&(dummy->gprs[0].slot[0])))
+#define LSCSA_QW_OFFSET(_field) (LSCSA_BYTE_OFFSET(_field) >> 4)
+
+static inline void set_event_mask(void)
+{
+ unsigned int event_mask = 0;
+
+ /* Save, Step 4:
+ * Restore, Step 1:
+ * Set the SPU_RdEventMsk channel to zero to mask
+ * all events.
+ */
+ spu_writech(SPU_WrEventMask, event_mask);
+}
+
+static inline void set_tag_mask(void)
+{
+ unsigned int tag_mask = 1;
+
+ /* Save, Step 5:
+ * Restore, Step 2:
+ * Set the SPU_WrTagMsk channel to '01' to unmask
+ * only tag group 0.
+ */
+ spu_writech(MFC_WrTagMask, tag_mask);
+}
+
+static inline void build_dma_list(addr64 lscsa_ea)
+{
+ unsigned int ea_low;
+ int i;
+
+ /* Save, Step 6:
+ * Restore, Step 3:
+ * Update the effective address for the CSA in the
+ * pre-canned DMA-list in local storage.
+ */
+ ea_low = lscsa_ea.ui[1];
+ ea_low += LSCSA_BYTE_OFFSET(ls[16384]);
+
+ for (i = 0; i < 15; i++, ea_low += 16384) {
+ dma_list[i].size = 16384;
+ dma_list[i].ea_low = ea_low;
+ }
+}
+
+static inline void enqueue_putllc(addr64 lscsa_ea)
+{
+ unsigned int ls = 0;
+ unsigned int size = 128;
+ unsigned int tag_id = 0;
+ unsigned int cmd = 0xB4; /* PUTLLC */
+
+ /* Save, Step 12:
+ * Restore, Step 7:
+ * Send a PUTLLC (tag 0) command to the MFC using
+ * an effective address in the CSA in order to
+ * remove any possible lock-line reservation.
+ */
+ spu_writech(MFC_LSA, ls);
+ spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+ spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+ spu_writech(MFC_Size, size);
+ spu_writech(MFC_TagID, tag_id);
+ spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void set_tag_update(void)
+{
+ unsigned int update_any = 1;
+
+ /* Save, Step 15:
+ * Restore, Step 8:
+ * Write the MFC_TagUpdate channel with '01'.
+ */
+ spu_writech(MFC_WrTagUpdate, update_any);
+}
+
+static inline void read_tag_status(void)
+{
+ /* Save, Step 16:
+ * Restore, Step 9:
+ * Read the MFC_TagStat channel data.
+ */
+ spu_readch(MFC_RdTagStat);
+}
+
+static inline void read_llar_status(void)
+{
+ /* Save, Step 17:
+ * Restore, Step 10:
+ * Read the MFC_AtomicStat channel data.
+ */
+ spu_readch(MFC_RdAtomicStat);
+}
+
+#endif /* _SPU_CONTEXT_UTILS_H_ */
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/spufs.h b/kernel/arch/powerpc/platforms/cell/spufs/spufs.h
new file mode 100644
index 000000000..bcfd6f063
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -0,0 +1,376 @@
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef SPUFS_H
+#define SPUFS_H
+
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/cpumask.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_info.h>
+
+#define SPUFS_PS_MAP_SIZE 0x20000
+#define SPUFS_MFC_MAP_SIZE 0x1000
+#define SPUFS_CNTL_MAP_SIZE 0x1000
+#define SPUFS_SIGNAL_MAP_SIZE PAGE_SIZE
+#define SPUFS_MSS_MAP_SIZE 0x1000
+
+/* The magic number for our file system */
+enum {
+ SPUFS_MAGIC = 0x23c9b64e,
+};
+
+struct spu_context_ops;
+struct spu_gang;
+
+/* ctx->sched_flags */
+enum {
+ SPU_SCHED_NOTIFY_ACTIVE,
+ SPU_SCHED_WAS_ACTIVE, /* was active upon spu_acquire_saved() */
+ SPU_SCHED_SPU_RUN, /* context is within spu_run */
+};
+
+enum {
+ SWITCH_LOG_BUFSIZE = 4096,
+};
+
+enum {
+ SWITCH_LOG_START,
+ SWITCH_LOG_STOP,
+ SWITCH_LOG_EXIT,
+};
+
+struct switch_log {
+ wait_queue_head_t wait;
+ unsigned long head;
+ unsigned long tail;
+ struct switch_log_entry {
+ struct timespec tstamp;
+ s32 spu_id;
+ u32 type;
+ u32 val;
+ u64 timebase;
+ } log[];
+};
+
+struct spu_context {
+ struct spu *spu; /* pointer to a physical SPU */
+ struct spu_state csa; /* SPU context save area. */
+ spinlock_t mmio_lock; /* protects mmio access */
+ struct address_space *local_store; /* local store mapping. */
+ struct address_space *mfc; /* 'mfc' area mappings. */
+ struct address_space *cntl; /* 'control' area mappings. */
+ struct address_space *signal1; /* 'signal1' area mappings. */
+ struct address_space *signal2; /* 'signal2' area mappings. */
+ struct address_space *mss; /* 'mss' area mappings. */
+ struct address_space *psmap; /* 'psmap' area mappings. */
+ struct mutex mapping_lock;
+ u64 object_id; /* user space pointer for oprofile */
+
+ enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state;
+ struct mutex state_mutex;
+ struct mutex run_mutex;
+
+ struct mm_struct *owner;
+
+ struct kref kref;
+ wait_queue_head_t ibox_wq;
+ wait_queue_head_t wbox_wq;
+ wait_queue_head_t stop_wq;
+ wait_queue_head_t mfc_wq;
+ wait_queue_head_t run_wq;
+ struct fasync_struct *ibox_fasync;
+ struct fasync_struct *wbox_fasync;
+ struct fasync_struct *mfc_fasync;
+ u32 tagwait;
+ struct spu_context_ops *ops;
+ struct work_struct reap_work;
+ unsigned long flags;
+ unsigned long event_return;
+
+ struct list_head gang_list;
+ struct spu_gang *gang;
+ struct kref *prof_priv_kref;
+ void ( * prof_priv_release) (struct kref *kref);
+
+ /* owner thread */
+ pid_t tid;
+
+ /* scheduler fields */
+ struct list_head rq;
+ unsigned int time_slice;
+ unsigned long sched_flags;
+ cpumask_t cpus_allowed;
+ int policy;
+ int prio;
+ int last_ran;
+
+ /* statistics */
+ struct {
+ /* updates protected by ctx->state_mutex */
+ enum spu_utilization_state util_state;
+ unsigned long long tstamp; /* time of last state switch */
+ unsigned long long times[SPU_UTIL_MAX];
+ unsigned long long vol_ctx_switch;
+ unsigned long long invol_ctx_switch;
+ unsigned long long min_flt;
+ unsigned long long maj_flt;
+ unsigned long long hash_flt;
+ unsigned long long slb_flt;
+ unsigned long long slb_flt_base; /* # at last ctx switch */
+ unsigned long long class2_intr;
+ unsigned long long class2_intr_base; /* # at last ctx switch */
+ unsigned long long libassist;
+ } stats;
+
+ /* context switch log */
+ struct switch_log *switch_log;
+
+ struct list_head aff_list;
+ int aff_head;
+ int aff_offset;
+};
+
+struct spu_gang {
+ struct list_head list;
+ struct mutex mutex;
+ struct kref kref;
+ int contexts;
+
+ struct spu_context *aff_ref_ctx;
+ struct list_head aff_list_head;
+ struct mutex aff_mutex;
+ int aff_flags;
+ struct spu *aff_ref_spu;
+ atomic_t aff_sched_count;
+};
+
+/* Flag bits for spu_gang aff_flags */
+#define AFF_OFFSETS_SET 1
+#define AFF_MERGED 2
+
+struct mfc_dma_command {
+ int32_t pad; /* reserved */
+ uint32_t lsa; /* local storage address */
+ uint64_t ea; /* effective address */
+ uint16_t size; /* transfer size */
+ uint16_t tag; /* command tag */
+ uint16_t class; /* class ID */
+ uint16_t cmd; /* command opcode */
+};
+
+
+/* SPU context query/set operations. */
+struct spu_context_ops {
+ int (*mbox_read) (struct spu_context * ctx, u32 * data);
+ u32(*mbox_stat_read) (struct spu_context * ctx);
+ unsigned int (*mbox_stat_poll)(struct spu_context *ctx,
+ unsigned int events);
+ int (*ibox_read) (struct spu_context * ctx, u32 * data);
+ int (*wbox_write) (struct spu_context * ctx, u32 data);
+ u32(*signal1_read) (struct spu_context * ctx);
+ void (*signal1_write) (struct spu_context * ctx, u32 data);
+ u32(*signal2_read) (struct spu_context * ctx);
+ void (*signal2_write) (struct spu_context * ctx, u32 data);
+ void (*signal1_type_set) (struct spu_context * ctx, u64 val);
+ u64(*signal1_type_get) (struct spu_context * ctx);
+ void (*signal2_type_set) (struct spu_context * ctx, u64 val);
+ u64(*signal2_type_get) (struct spu_context * ctx);
+ u32(*npc_read) (struct spu_context * ctx);
+ void (*npc_write) (struct spu_context * ctx, u32 data);
+ u32(*status_read) (struct spu_context * ctx);
+ char*(*get_ls) (struct spu_context * ctx);
+ void (*privcntl_write) (struct spu_context *ctx, u64 data);
+ u32 (*runcntl_read) (struct spu_context * ctx);
+ void (*runcntl_write) (struct spu_context * ctx, u32 data);
+ void (*runcntl_stop) (struct spu_context * ctx);
+ void (*master_start) (struct spu_context * ctx);
+ void (*master_stop) (struct spu_context * ctx);
+ int (*set_mfc_query)(struct spu_context * ctx, u32 mask, u32 mode);
+ u32 (*read_mfc_tagstatus)(struct spu_context * ctx);
+ u32 (*get_mfc_free_elements)(struct spu_context *ctx);
+ int (*send_mfc_command)(struct spu_context * ctx,
+ struct mfc_dma_command * cmd);
+ void (*dma_info_read) (struct spu_context * ctx,
+ struct spu_dma_info * info);
+ void (*proxydma_info_read) (struct spu_context * ctx,
+ struct spu_proxydma_info * info);
+ void (*restart_dma)(struct spu_context *ctx);
+};
+
+extern struct spu_context_ops spu_hw_ops;
+extern struct spu_context_ops spu_backing_ops;
+
+struct spufs_inode_info {
+ struct spu_context *i_ctx;
+ struct spu_gang *i_gang;
+ struct inode vfs_inode;
+ int i_openers;
+};
+#define SPUFS_I(inode) \
+ container_of(inode, struct spufs_inode_info, vfs_inode)
+
+struct spufs_tree_descr {
+ const char *name;
+ const struct file_operations *ops;
+ umode_t mode;
+ size_t size;
+};
+
+extern const struct spufs_tree_descr spufs_dir_contents[];
+extern const struct spufs_tree_descr spufs_dir_nosched_contents[];
+extern const struct spufs_tree_descr spufs_dir_debug_contents[];
+
+/* system call implementation */
+extern struct spufs_calls spufs_calls;
+struct coredump_params;
+long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
+long spufs_create(struct path *nd, struct dentry *dentry, unsigned int flags,
+ umode_t mode, struct file *filp);
+/* ELF coredump callbacks for writing SPU ELF notes */
+extern int spufs_coredump_extra_notes_size(void);
+extern int spufs_coredump_extra_notes_write(struct coredump_params *cprm);
+
+extern const struct file_operations spufs_context_fops;
+
+/* gang management */
+struct spu_gang *alloc_spu_gang(void);
+struct spu_gang *get_spu_gang(struct spu_gang *gang);
+int put_spu_gang(struct spu_gang *gang);
+void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx);
+void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
+
+/* fault handling */
+int spufs_handle_class1(struct spu_context *ctx);
+int spufs_handle_class0(struct spu_context *ctx);
+
+/* affinity */
+struct spu *affinity_check(struct spu_context *ctx);
+
+/* context management */
+extern atomic_t nr_spu_contexts;
+static inline int __must_check spu_acquire(struct spu_context *ctx)
+{
+ return mutex_lock_interruptible(&ctx->state_mutex);
+}
+
+static inline void spu_release(struct spu_context *ctx)
+{
+ mutex_unlock(&ctx->state_mutex);
+}
+
+struct spu_context * alloc_spu_context(struct spu_gang *gang);
+void destroy_spu_context(struct kref *kref);
+struct spu_context * get_spu_context(struct spu_context *ctx);
+int put_spu_context(struct spu_context *ctx);
+void spu_unmap_mappings(struct spu_context *ctx);
+
+void spu_forget(struct spu_context *ctx);
+int __must_check spu_acquire_saved(struct spu_context *ctx);
+void spu_release_saved(struct spu_context *ctx);
+
+int spu_stopped(struct spu_context *ctx, u32 * stat);
+void spu_del_from_rq(struct spu_context *ctx);
+int spu_activate(struct spu_context *ctx, unsigned long flags);
+void spu_deactivate(struct spu_context *ctx);
+void spu_yield(struct spu_context *ctx);
+void spu_switch_notify(struct spu *spu, struct spu_context *ctx);
+void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
+ u32 type, u32 val);
+void spu_set_timeslice(struct spu_context *ctx);
+void spu_update_sched_info(struct spu_context *ctx);
+void __spu_update_sched_info(struct spu_context *ctx);
+int __init spu_sched_init(void);
+void spu_sched_exit(void);
+
+extern char *isolated_loader;
+
+/*
+ * spufs_wait
+ * Same as wait_event_interruptible(), except that here
+ * we need to call spu_release(ctx) before sleeping, and
+ * then spu_acquire(ctx) when awoken.
+ *
+ * Returns with state_mutex re-acquired when successful or
+ * with -ERESTARTSYS and the state_mutex dropped when interrupted.
+ */
+
+#define spufs_wait(wq, condition) \
+({ \
+ int __ret = 0; \
+ DEFINE_WAIT(__wait); \
+ for (;;) { \
+ prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ spu_release(ctx); \
+ if (signal_pending(current)) { \
+ __ret = -ERESTARTSYS; \
+ break; \
+ } \
+ schedule(); \
+ __ret = spu_acquire(ctx); \
+ if (__ret) \
+ break; \
+ } \
+ finish_wait(&(wq), &__wait); \
+ __ret; \
+})
+
+size_t spu_wbox_write(struct spu_context *ctx, u32 data);
+size_t spu_ibox_read(struct spu_context *ctx, u32 *data);
+
+/* irq callback funcs. */
+void spufs_ibox_callback(struct spu *spu);
+void spufs_wbox_callback(struct spu *spu);
+void spufs_stop_callback(struct spu *spu, int irq);
+void spufs_mfc_callback(struct spu *spu);
+void spufs_dma_callback(struct spu *spu, int type);
+
+extern struct spu_coredump_calls spufs_coredump_calls;
+struct spufs_coredump_reader {
+ char *name;
+ ssize_t (*read)(struct spu_context *ctx,
+ char __user *buffer, size_t size, loff_t *pos);
+ u64 (*get)(struct spu_context *ctx);
+ size_t size;
+};
+extern const struct spufs_coredump_reader spufs_coredump_read[];
+extern int spufs_coredump_num_notes;
+
+extern int spu_init_csa(struct spu_state *csa);
+extern void spu_fini_csa(struct spu_state *csa);
+extern int spu_save(struct spu_state *prev, struct spu *spu);
+extern int spu_restore(struct spu_state *new, struct spu *spu);
+extern int spu_switch(struct spu_state *prev, struct spu_state *new,
+ struct spu *spu);
+extern int spu_alloc_lscsa(struct spu_state *csa);
+extern void spu_free_lscsa(struct spu_state *csa);
+
+extern void spuctx_switch_state(struct spu_context *ctx,
+ enum spu_utilization_state new_state);
+
+#endif
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/sputrace.h b/kernel/arch/powerpc/platforms/cell/spufs/sputrace.h
new file mode 100644
index 000000000..db2656aa4
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/sputrace.h
@@ -0,0 +1,39 @@
+#if !defined(_TRACE_SPUFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SPUFS_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM spufs
+
+TRACE_EVENT(spufs_context,
+ TP_PROTO(struct spu_context *ctx, struct spu *spu, const char *name),
+ TP_ARGS(ctx, spu, name),
+
+ TP_STRUCT__entry(
+ __field(const char *, name)
+ __field(int, owner_tid)
+ __field(int, number)
+ ),
+
+ TP_fast_assign(
+ __entry->name = name;
+ __entry->owner_tid = ctx->tid;
+ __entry->number = spu ? spu->number : -1;
+ ),
+
+ TP_printk("%s (ctxthread = %d, spu = %d)",
+ __entry->name, __entry->owner_tid, __entry->number)
+);
+
+#define spu_context_trace(name, ctx, spu) \
+ trace_spufs_context(ctx, spu, __stringify(name))
+#define spu_context_nospu_trace(name, ctx) \
+ trace_spufs_context(ctx, NULL, __stringify(name))
+
+#endif /* _TRACE_SPUFS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE sputrace
+#include <trace/define_trace.h>
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/switch.c b/kernel/arch/powerpc/platforms/cell/spufs/switch.c
new file mode 100644
index 000000000..dde35551e
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/switch.c
@@ -0,0 +1,2222 @@
+/*
+ * spu_switch.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * Host-side part of SPU context switch sequence outlined in
+ * Synergistic Processor Element, Book IV.
+ *
+ * A fully premptive switch of an SPE is very expensive in terms
+ * of time and system resources. SPE Book IV indicates that SPE
+ * allocation should follow a "serially reusable device" model,
+ * in which the SPE is assigned a task until it completes. When
+ * this is not possible, this sequence may be used to premptively
+ * save, and then later (optionally) restore the context of a
+ * program executing on an SPE.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu_context.h>
+
+#include "spufs.h"
+
+#include "spu_save_dump.h"
+#include "spu_restore_dump.h"
+
+#if 0
+#define POLL_WHILE_TRUE(_c) { \
+ do { \
+ } while (_c); \
+ }
+#else
+#define RELAX_SPIN_COUNT 1000
+#define POLL_WHILE_TRUE(_c) { \
+ do { \
+ int _i; \
+ for (_i=0; _i<RELAX_SPIN_COUNT && (_c); _i++) { \
+ cpu_relax(); \
+ } \
+ if (unlikely(_c)) yield(); \
+ else break; \
+ } while (_c); \
+ }
+#endif /* debug */
+
+#define POLL_WHILE_FALSE(_c) POLL_WHILE_TRUE(!(_c))
+
+static inline void acquire_spu_lock(struct spu *spu)
+{
+ /* Save, Step 1:
+ * Restore, Step 1:
+ * Acquire SPU-specific mutual exclusion lock.
+ * TBD.
+ */
+}
+
+static inline void release_spu_lock(struct spu *spu)
+{
+ /* Restore, Step 76:
+ * Release SPU-specific mutual exclusion lock.
+ * TBD.
+ */
+}
+
+static inline int check_spu_isolate(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ u32 isolate_state;
+
+ /* Save, Step 2:
+ * Save, Step 6:
+ * If SPU_Status[E,L,IS] any field is '1', this
+ * SPU is in isolate state and cannot be context
+ * saved at this time.
+ */
+ isolate_state = SPU_STATUS_ISOLATED_STATE |
+ SPU_STATUS_ISOLATED_LOAD_STATUS | SPU_STATUS_ISOLATED_EXIT_STATUS;
+ return (in_be32(&prob->spu_status_R) & isolate_state) ? 1 : 0;
+}
+
+static inline void disable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 3:
+ * Restore, Step 2:
+ * Save INT_Mask_class0 in CSA.
+ * Write INT_MASK_class0 with value of 0.
+ * Save INT_Mask_class1 in CSA.
+ * Write INT_MASK_class1 with value of 0.
+ * Save INT_Mask_class2 in CSA.
+ * Write INT_MASK_class2 with value of 0.
+ * Synchronize all three interrupts to be sure
+ * we no longer execute a handler on another CPU.
+ */
+ spin_lock_irq(&spu->register_lock);
+ if (csa) {
+ csa->priv1.int_mask_class0_RW = spu_int_mask_get(spu, 0);
+ csa->priv1.int_mask_class1_RW = spu_int_mask_get(spu, 1);
+ csa->priv1.int_mask_class2_RW = spu_int_mask_get(spu, 2);
+ }
+ spu_int_mask_set(spu, 0, 0ul);
+ spu_int_mask_set(spu, 1, 0ul);
+ spu_int_mask_set(spu, 2, 0ul);
+ eieio();
+ spin_unlock_irq(&spu->register_lock);
+
+ /*
+ * This flag needs to be set before calling synchronize_irq so
+ * that the update will be visible to the relevant handlers
+ * via a simple load.
+ */
+ set_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
+ clear_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags);
+ synchronize_irq(spu->irqs[0]);
+ synchronize_irq(spu->irqs[1]);
+ synchronize_irq(spu->irqs[2]);
+}
+
+static inline void set_watchdog_timer(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 4:
+ * Restore, Step 25.
+ * Set a software watchdog timer, which specifies the
+ * maximum allowable time for a context save sequence.
+ *
+ * For present, this implementation will not set a global
+ * watchdog timer, as virtualization & variable system load
+ * may cause unpredictable execution times.
+ */
+}
+
+static inline void inhibit_user_access(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 5:
+ * Restore, Step 3:
+ * Inhibit user-space access (if provided) to this
+ * SPU by unmapping the virtual pages assigned to
+ * the SPU memory-mapped I/O (MMIO) for problem
+ * state. TBD.
+ */
+}
+
+static inline void set_switch_pending(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 7:
+ * Restore, Step 5:
+ * Set a software context switch pending flag.
+ * Done above in Step 3 - disable_interrupts().
+ */
+}
+
+static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 8:
+ * Suspend DMA and save MFC_CNTL.
+ */
+ switch (in_be64(&priv2->mfc_control_RW) &
+ MFC_CNTL_SUSPEND_DMA_STATUS_MASK) {
+ case MFC_CNTL_SUSPEND_IN_PROGRESS:
+ POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+ MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+ MFC_CNTL_SUSPEND_COMPLETE);
+ /* fall through */
+ case MFC_CNTL_SUSPEND_COMPLETE:
+ if (csa)
+ csa->priv2.mfc_control_RW =
+ in_be64(&priv2->mfc_control_RW) |
+ MFC_CNTL_SUSPEND_DMA_QUEUE;
+ break;
+ case MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION:
+ out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
+ POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+ MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+ MFC_CNTL_SUSPEND_COMPLETE);
+ if (csa)
+ csa->priv2.mfc_control_RW =
+ in_be64(&priv2->mfc_control_RW) &
+ ~MFC_CNTL_SUSPEND_DMA_QUEUE &
+ ~MFC_CNTL_SUSPEND_MASK;
+ break;
+ }
+}
+
+static inline void save_spu_runcntl(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 9:
+ * Save SPU_Runcntl in the CSA. This value contains
+ * the "Application Desired State".
+ */
+ csa->prob.spu_runcntl_RW = in_be32(&prob->spu_runcntl_RW);
+}
+
+static inline void save_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 10:
+ * Save MFC_SR1 in the CSA.
+ */
+ csa->priv1.mfc_sr1_RW = spu_mfc_sr1_get(spu);
+}
+
+static inline void save_spu_status(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 11:
+ * Read SPU_Status[R], and save to CSA.
+ */
+ if ((in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) == 0) {
+ csa->prob.spu_status_R = in_be32(&prob->spu_status_R);
+ } else {
+ u32 stopped;
+
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+ eieio();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ stopped =
+ SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
+ SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+ if ((in_be32(&prob->spu_status_R) & stopped) == 0)
+ csa->prob.spu_status_R = SPU_STATUS_RUNNING;
+ else
+ csa->prob.spu_status_R = in_be32(&prob->spu_status_R);
+ }
+}
+
+static inline void save_mfc_stopped_status(struct spu_state *csa,
+ struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ const u64 mask = MFC_CNTL_DECREMENTER_RUNNING |
+ MFC_CNTL_DMA_QUEUES_EMPTY;
+
+ /* Save, Step 12:
+ * Read MFC_CNTL[Ds]. Update saved copy of
+ * CSA.MFC_CNTL[Ds].
+ *
+ * update: do the same with MFC_CNTL[Q].
+ */
+ csa->priv2.mfc_control_RW &= ~mask;
+ csa->priv2.mfc_control_RW |= in_be64(&priv2->mfc_control_RW) & mask;
+}
+
+static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 13:
+ * Write MFC_CNTL[Dh] set to a '1' to halt
+ * the decrementer.
+ */
+ out_be64(&priv2->mfc_control_RW,
+ MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK);
+ eieio();
+}
+
+static inline void save_timebase(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 14:
+ * Read PPE Timebase High and Timebase low registers
+ * and save in CSA. TBD.
+ */
+ csa->suspend_time = get_cycles();
+}
+
+static inline void remove_other_spu_access(struct spu_state *csa,
+ struct spu *spu)
+{
+ /* Save, Step 15:
+ * Remove other SPU access to this SPU by unmapping
+ * this SPU's pages from their address space. TBD.
+ */
+}
+
+static inline void do_mfc_mssync(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 16:
+ * Restore, Step 11.
+ * Write SPU_MSSync register. Poll SPU_MSSync[P]
+ * for a value of 0.
+ */
+ out_be64(&prob->spc_mssync_RW, 1UL);
+ POLL_WHILE_TRUE(in_be64(&prob->spc_mssync_RW) & MS_SYNC_PENDING);
+}
+
+static inline void issue_mfc_tlbie(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 17:
+ * Restore, Step 12.
+ * Restore, Step 48.
+ * Write TLB_Invalidate_Entry[IS,VPN,L,Lp]=0 register.
+ * Then issue a PPE sync instruction.
+ */
+ spu_tlb_invalidate(spu);
+ mb();
+}
+
+static inline void handle_pending_interrupts(struct spu_state *csa,
+ struct spu *spu)
+{
+ /* Save, Step 18:
+ * Handle any pending interrupts from this SPU
+ * here. This is OS or hypervisor specific. One
+ * option is to re-enable interrupts to handle any
+ * pending interrupts, with the interrupt handlers
+ * recognizing the software Context Switch Pending
+ * flag, to ensure the SPU execution or MFC command
+ * queue is not restarted. TBD.
+ */
+}
+
+static inline void save_mfc_queues(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ int i;
+
+ /* Save, Step 19:
+ * If MFC_Cntl[Se]=0 then save
+ * MFC command queues.
+ */
+ if ((in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DMA_QUEUES_EMPTY) == 0) {
+ for (i = 0; i < 8; i++) {
+ csa->priv2.puq[i].mfc_cq_data0_RW =
+ in_be64(&priv2->puq[i].mfc_cq_data0_RW);
+ csa->priv2.puq[i].mfc_cq_data1_RW =
+ in_be64(&priv2->puq[i].mfc_cq_data1_RW);
+ csa->priv2.puq[i].mfc_cq_data2_RW =
+ in_be64(&priv2->puq[i].mfc_cq_data2_RW);
+ csa->priv2.puq[i].mfc_cq_data3_RW =
+ in_be64(&priv2->puq[i].mfc_cq_data3_RW);
+ }
+ for (i = 0; i < 16; i++) {
+ csa->priv2.spuq[i].mfc_cq_data0_RW =
+ in_be64(&priv2->spuq[i].mfc_cq_data0_RW);
+ csa->priv2.spuq[i].mfc_cq_data1_RW =
+ in_be64(&priv2->spuq[i].mfc_cq_data1_RW);
+ csa->priv2.spuq[i].mfc_cq_data2_RW =
+ in_be64(&priv2->spuq[i].mfc_cq_data2_RW);
+ csa->priv2.spuq[i].mfc_cq_data3_RW =
+ in_be64(&priv2->spuq[i].mfc_cq_data3_RW);
+ }
+ }
+}
+
+static inline void save_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 20:
+ * Save the PPU_QueryMask register
+ * in the CSA.
+ */
+ csa->prob.dma_querymask_RW = in_be32(&prob->dma_querymask_RW);
+}
+
+static inline void save_ppu_querytype(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 21:
+ * Save the PPU_QueryType register
+ * in the CSA.
+ */
+ csa->prob.dma_querytype_RW = in_be32(&prob->dma_querytype_RW);
+}
+
+static inline void save_ppu_tagstatus(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save the Prxy_TagStatus register in the CSA.
+ *
+ * It is unnecessary to restore dma_tagstatus_R, however,
+ * dma_tagstatus_R in the CSA is accessed via backing_ops, so
+ * we must save it.
+ */
+ csa->prob.dma_tagstatus_R = in_be32(&prob->dma_tagstatus_R);
+}
+
+static inline void save_mfc_csr_tsq(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 22:
+ * Save the MFC_CSR_TSQ register
+ * in the LSCSA.
+ */
+ csa->priv2.spu_tag_status_query_RW =
+ in_be64(&priv2->spu_tag_status_query_RW);
+}
+
+static inline void save_mfc_csr_cmd(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 23:
+ * Save the MFC_CSR_CMD1 and MFC_CSR_CMD2
+ * registers in the CSA.
+ */
+ csa->priv2.spu_cmd_buf1_RW = in_be64(&priv2->spu_cmd_buf1_RW);
+ csa->priv2.spu_cmd_buf2_RW = in_be64(&priv2->spu_cmd_buf2_RW);
+}
+
+static inline void save_mfc_csr_ato(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 24:
+ * Save the MFC_CSR_ATO register in
+ * the CSA.
+ */
+ csa->priv2.spu_atomic_status_RW = in_be64(&priv2->spu_atomic_status_RW);
+}
+
+static inline void save_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 25:
+ * Save the MFC_TCLASS_ID register in
+ * the CSA.
+ */
+ csa->priv1.mfc_tclass_id_RW = spu_mfc_tclass_id_get(spu);
+}
+
+static inline void set_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 26:
+ * Restore, Step 23.
+ * Write the MFC_TCLASS_ID register with
+ * the value 0x10000000.
+ */
+ spu_mfc_tclass_id_set(spu, 0x10000000);
+ eieio();
+}
+
+static inline void purge_mfc_queue(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 27:
+ * Restore, Step 14.
+ * Write MFC_CNTL[Pc]=1 (purge queue).
+ */
+ out_be64(&priv2->mfc_control_RW,
+ MFC_CNTL_PURGE_DMA_REQUEST |
+ MFC_CNTL_SUSPEND_MASK);
+ eieio();
+}
+
+static inline void wait_purge_complete(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 28:
+ * Poll MFC_CNTL[Ps] until value '11' is read
+ * (purge complete).
+ */
+ POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+ MFC_CNTL_PURGE_DMA_STATUS_MASK) ==
+ MFC_CNTL_PURGE_DMA_COMPLETE);
+}
+
+static inline void setup_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 30:
+ * Restore, Step 18:
+ * Write MFC_SR1 with MFC_SR1[D=0,S=1] and
+ * MFC_SR1[TL,R,Pr,T] set correctly for the
+ * OS specific environment.
+ *
+ * Implementation note: The SPU-side code
+ * for save/restore is privileged, so the
+ * MFC_SR1[Pr] bit is not set.
+ *
+ */
+ spu_mfc_sr1_set(spu, (MFC_STATE1_MASTER_RUN_CONTROL_MASK |
+ MFC_STATE1_RELOCATE_MASK |
+ MFC_STATE1_BUS_TLBIE_MASK));
+}
+
+static inline void save_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 31:
+ * Save SPU_NPC in the CSA.
+ */
+ csa->prob.spu_npc_RW = in_be32(&prob->spu_npc_RW);
+}
+
+static inline void save_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 32:
+ * Save SPU_PrivCntl in the CSA.
+ */
+ csa->priv2.spu_privcntl_RW = in_be64(&priv2->spu_privcntl_RW);
+}
+
+static inline void reset_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 33:
+ * Restore, Step 16:
+ * Write SPU_PrivCntl[S,Le,A] fields reset to 0.
+ */
+ out_be64(&priv2->spu_privcntl_RW, 0UL);
+ eieio();
+}
+
+static inline void save_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 34:
+ * Save SPU_LSLR in the CSA.
+ */
+ csa->priv2.spu_lslr_RW = in_be64(&priv2->spu_lslr_RW);
+}
+
+static inline void reset_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 35:
+ * Restore, Step 17.
+ * Reset SPU_LSLR.
+ */
+ out_be64(&priv2->spu_lslr_RW, LS_ADDR_MASK);
+ eieio();
+}
+
+static inline void save_spu_cfg(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 36:
+ * Save SPU_Cfg in the CSA.
+ */
+ csa->priv2.spu_cfg_RW = in_be64(&priv2->spu_cfg_RW);
+}
+
+static inline void save_pm_trace(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 37:
+ * Save PM_Trace_Tag_Wait_Mask in the CSA.
+ * Not performed by this implementation.
+ */
+}
+
+static inline void save_mfc_rag(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 38:
+ * Save RA_GROUP_ID register and the
+ * RA_ENABLE reigster in the CSA.
+ */
+ csa->priv1.resource_allocation_groupID_RW =
+ spu_resource_allocation_groupID_get(spu);
+ csa->priv1.resource_allocation_enable_RW =
+ spu_resource_allocation_enable_get(spu);
+}
+
+static inline void save_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 39:
+ * Save MB_Stat register in the CSA.
+ */
+ csa->prob.mb_stat_R = in_be32(&prob->mb_stat_R);
+}
+
+static inline void save_ppu_mb(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 40:
+ * Save the PPU_MB register in the CSA.
+ */
+ csa->prob.pu_mb_R = in_be32(&prob->pu_mb_R);
+}
+
+static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 41:
+ * Save the PPUINT_MB register in the CSA.
+ */
+ csa->priv2.puint_mb_R = in_be64(&priv2->puint_mb_R);
+}
+
+static inline void save_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+ int i;
+
+ /* Save, Step 42:
+ */
+
+ /* Save CH 1, without channel count */
+ out_be64(&priv2->spu_chnlcntptr_RW, 1);
+ csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW);
+
+ /* Save the following CH: [0,3,4,24,25,27] */
+ for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+ idx = ch_indices[i];
+ out_be64(&priv2->spu_chnlcntptr_RW, idx);
+ eieio();
+ csa->spu_chnldata_RW[idx] = in_be64(&priv2->spu_chnldata_RW);
+ csa->spu_chnlcnt_RW[idx] = in_be64(&priv2->spu_chnlcnt_RW);
+ out_be64(&priv2->spu_chnldata_RW, 0UL);
+ out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+ eieio();
+ }
+}
+
+static inline void save_spu_mb(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ int i;
+
+ /* Save, Step 43:
+ * Save SPU Read Mailbox Channel.
+ */
+ out_be64(&priv2->spu_chnlcntptr_RW, 29UL);
+ eieio();
+ csa->spu_chnlcnt_RW[29] = in_be64(&priv2->spu_chnlcnt_RW);
+ for (i = 0; i < 4; i++) {
+ csa->spu_mailbox_data[i] = in_be64(&priv2->spu_chnldata_RW);
+ }
+ out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+ eieio();
+}
+
+static inline void save_mfc_cmd(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 44:
+ * Save MFC_CMD Channel.
+ */
+ out_be64(&priv2->spu_chnlcntptr_RW, 21UL);
+ eieio();
+ csa->spu_chnlcnt_RW[21] = in_be64(&priv2->spu_chnlcnt_RW);
+ eieio();
+}
+
+static inline void reset_ch(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 ch_indices[4] = { 21UL, 23UL, 28UL, 30UL };
+ u64 ch_counts[4] = { 16UL, 1UL, 1UL, 1UL };
+ u64 idx;
+ int i;
+
+ /* Save, Step 45:
+ * Reset the following CH: [21, 23, 28, 30]
+ */
+ for (i = 0; i < 4; i++) {
+ idx = ch_indices[i];
+ out_be64(&priv2->spu_chnlcntptr_RW, idx);
+ eieio();
+ out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+ eieio();
+ }
+}
+
+static inline void resume_mfc_queue(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 46:
+ * Restore, Step 25.
+ * Write MFC_CNTL[Sc]=0 (resume queue processing).
+ */
+ out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE);
+}
+
+static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu,
+ unsigned int *code, int code_size)
+{
+ /* Save, Step 47:
+ * Restore, Step 30.
+ * If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All
+ * register, then initialize SLB_VSID and SLB_ESID
+ * to provide access to SPU context save code and
+ * LSCSA.
+ *
+ * This implementation places both the context
+ * switch code and LSCSA in kernel address space.
+ *
+ * Further this implementation assumes that the
+ * MFC_SR1[R]=1 (in other words, assume that
+ * translation is desired by OS environment).
+ */
+ spu_invalidate_slbs(spu);
+ spu_setup_kernel_slbs(spu, csa->lscsa, code, code_size);
+}
+
+static inline void set_switch_active(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 48:
+ * Restore, Step 23.
+ * Change the software context switch pending flag
+ * to context switch active. This implementation does
+ * not uses a switch active flag.
+ *
+ * Now that we have saved the mfc in the csa, we can add in the
+ * restart command if an exception occurred.
+ */
+ if (test_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags))
+ csa->priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND;
+ clear_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
+ mb();
+}
+
+static inline void enable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+ unsigned long class1_mask = CLASS1_ENABLE_SEGMENT_FAULT_INTR |
+ CLASS1_ENABLE_STORAGE_FAULT_INTR;
+
+ /* Save, Step 49:
+ * Restore, Step 22:
+ * Reset and then enable interrupts, as
+ * needed by OS.
+ *
+ * This implementation enables only class1
+ * (translation) interrupts.
+ */
+ spin_lock_irq(&spu->register_lock);
+ spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+ spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK);
+ spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+ spu_int_mask_set(spu, 0, 0ul);
+ spu_int_mask_set(spu, 1, class1_mask);
+ spu_int_mask_set(spu, 2, 0ul);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static inline int send_mfc_dma(struct spu *spu, unsigned long ea,
+ unsigned int ls_offset, unsigned int size,
+ unsigned int tag, unsigned int rclass,
+ unsigned int cmd)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ union mfc_tag_size_class_cmd command;
+ unsigned int transfer_size;
+ volatile unsigned int status = 0x0;
+
+ while (size > 0) {
+ transfer_size =
+ (size > MFC_MAX_DMA_SIZE) ? MFC_MAX_DMA_SIZE : size;
+ command.u.mfc_size = transfer_size;
+ command.u.mfc_tag = tag;
+ command.u.mfc_rclassid = rclass;
+ command.u.mfc_cmd = cmd;
+ do {
+ out_be32(&prob->mfc_lsa_W, ls_offset);
+ out_be64(&prob->mfc_ea_W, ea);
+ out_be64(&prob->mfc_union_W.all64, command.all64);
+ status =
+ in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32);
+ if (unlikely(status & 0x2)) {
+ cpu_relax();
+ }
+ } while (status & 0x3);
+ size -= transfer_size;
+ ea += transfer_size;
+ ls_offset += transfer_size;
+ }
+ return 0;
+}
+
+static inline void save_ls_16kb(struct spu_state *csa, struct spu *spu)
+{
+ unsigned long addr = (unsigned long)&csa->lscsa->ls[0];
+ unsigned int ls_offset = 0x0;
+ unsigned int size = 16384;
+ unsigned int tag = 0;
+ unsigned int rclass = 0;
+ unsigned int cmd = MFC_PUT_CMD;
+
+ /* Save, Step 50:
+ * Issue a DMA command to copy the first 16K bytes
+ * of local storage to the CSA.
+ */
+ send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void set_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 51:
+ * Restore, Step 31.
+ * Write SPU_NPC[IE]=0 and SPU_NPC[LSA] to entry
+ * point address of context save code in local
+ * storage.
+ *
+ * This implementation uses SPU-side save/restore
+ * programs with entry points at LSA of 0.
+ */
+ out_be32(&prob->spu_npc_RW, 0);
+ eieio();
+}
+
+static inline void set_signot1(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ union {
+ u64 ull;
+ u32 ui[2];
+ } addr64;
+
+ /* Save, Step 52:
+ * Restore, Step 32:
+ * Write SPU_Sig_Notify_1 register with upper 32-bits
+ * of the CSA.LSCSA effective address.
+ */
+ addr64.ull = (u64) csa->lscsa;
+ out_be32(&prob->signal_notify1, addr64.ui[0]);
+ eieio();
+}
+
+static inline void set_signot2(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ union {
+ u64 ull;
+ u32 ui[2];
+ } addr64;
+
+ /* Save, Step 53:
+ * Restore, Step 33:
+ * Write SPU_Sig_Notify_2 register with lower 32-bits
+ * of the CSA.LSCSA effective address.
+ */
+ addr64.ull = (u64) csa->lscsa;
+ out_be32(&prob->signal_notify2, addr64.ui[1]);
+ eieio();
+}
+
+static inline void send_save_code(struct spu_state *csa, struct spu *spu)
+{
+ unsigned long addr = (unsigned long)&spu_save_code[0];
+ unsigned int ls_offset = 0x0;
+ unsigned int size = sizeof(spu_save_code);
+ unsigned int tag = 0;
+ unsigned int rclass = 0;
+ unsigned int cmd = MFC_GETFS_CMD;
+
+ /* Save, Step 54:
+ * Issue a DMA command to copy context save code
+ * to local storage and start SPU.
+ */
+ send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void set_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 55:
+ * Restore, Step 38.
+ * Write PPU_QueryMask=1 (enable Tag Group 0)
+ * and issue eieio instruction.
+ */
+ out_be32(&prob->dma_querymask_RW, MFC_TAGID_TO_TAGMASK(0));
+ eieio();
+}
+
+static inline void wait_tag_complete(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ u32 mask = MFC_TAGID_TO_TAGMASK(0);
+ unsigned long flags;
+
+ /* Save, Step 56:
+ * Restore, Step 39.
+ * Restore, Step 39.
+ * Restore, Step 46.
+ * Poll PPU_TagStatus[gn] until 01 (Tag group 0 complete)
+ * or write PPU_QueryType[TS]=01 and wait for Tag Group
+ * Complete Interrupt. Write INT_Stat_Class0 or
+ * INT_Stat_Class2 with value of 'handled'.
+ */
+ POLL_WHILE_FALSE(in_be32(&prob->dma_tagstatus_R) & mask);
+
+ local_irq_save(flags);
+ spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+ spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+ local_irq_restore(flags);
+}
+
+static inline void wait_spu_stopped(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ unsigned long flags;
+
+ /* Save, Step 57:
+ * Restore, Step 40.
+ * Poll until SPU_Status[R]=0 or wait for SPU Class 0
+ * or SPU Class 2 interrupt. Write INT_Stat_class0
+ * or INT_Stat_class2 with value of handled.
+ */
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING);
+
+ local_irq_save(flags);
+ spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+ spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+ local_irq_restore(flags);
+}
+
+static inline int check_save_status(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ u32 complete;
+
+ /* Save, Step 54:
+ * If SPU_Status[P]=1 and SPU_Status[SC] = "success",
+ * context save succeeded, otherwise context save
+ * failed.
+ */
+ complete = ((SPU_SAVE_COMPLETE << SPU_STOP_STATUS_SHIFT) |
+ SPU_STATUS_STOPPED_BY_STOP);
+ return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0;
+}
+
+static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 4:
+ * If required, notify the "using application" that
+ * the SPU task has been terminated. TBD.
+ */
+}
+
+static inline void suspend_mfc_and_halt_decr(struct spu_state *csa,
+ struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 7:
+ * Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend
+ * the queue and halt the decrementer.
+ */
+ out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE |
+ MFC_CNTL_DECREMENTER_HALTED);
+ eieio();
+}
+
+static inline void wait_suspend_mfc_complete(struct spu_state *csa,
+ struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 8:
+ * Restore, Step 47.
+ * Poll MFC_CNTL[Ss] until 11 is returned.
+ */
+ POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+ MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+ MFC_CNTL_SUSPEND_COMPLETE);
+}
+
+static inline int suspend_spe(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Restore, Step 9:
+ * If SPU_Status[R]=1, stop SPU execution
+ * and wait for stop to complete.
+ *
+ * Returns 1 if SPU_Status[R]=1 on entry.
+ * 0 otherwise
+ */
+ if (in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) {
+ if (in_be32(&prob->spu_status_R) &
+ SPU_STATUS_ISOLATED_EXIT_STATUS) {
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ }
+ if ((in_be32(&prob->spu_status_R) &
+ SPU_STATUS_ISOLATED_LOAD_STATUS)
+ || (in_be32(&prob->spu_status_R) &
+ SPU_STATUS_ISOLATED_STATE)) {
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+ eieio();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ out_be32(&prob->spu_runcntl_RW, 0x2);
+ eieio();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ }
+ if (in_be32(&prob->spu_status_R) &
+ SPU_STATUS_WAITING_FOR_CHANNEL) {
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+ eieio();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+static inline void clear_spu_status(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Restore, Step 10:
+ * If SPU_Status[R]=0 and SPU_Status[E,L,IS]=1,
+ * release SPU from isolate state.
+ */
+ if (!(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING)) {
+ if (in_be32(&prob->spu_status_R) &
+ SPU_STATUS_ISOLATED_EXIT_STATUS) {
+ spu_mfc_sr1_set(spu,
+ MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+ eieio();
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+ eieio();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ }
+ if ((in_be32(&prob->spu_status_R) &
+ SPU_STATUS_ISOLATED_LOAD_STATUS)
+ || (in_be32(&prob->spu_status_R) &
+ SPU_STATUS_ISOLATED_STATE)) {
+ spu_mfc_sr1_set(spu,
+ MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+ eieio();
+ out_be32(&prob->spu_runcntl_RW, 0x2);
+ eieio();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ }
+ }
+}
+
+static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+ u64 idx;
+ int i;
+
+ /* Restore, Step 20:
+ */
+
+ /* Reset CH 1 */
+ out_be64(&priv2->spu_chnlcntptr_RW, 1);
+ out_be64(&priv2->spu_chnldata_RW, 0UL);
+
+ /* Reset the following CH: [0,3,4,24,25,27] */
+ for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+ idx = ch_indices[i];
+ out_be64(&priv2->spu_chnlcntptr_RW, idx);
+ eieio();
+ out_be64(&priv2->spu_chnldata_RW, 0UL);
+ out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+ eieio();
+ }
+}
+
+static inline void reset_ch_part2(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 ch_indices[5] = { 21UL, 23UL, 28UL, 29UL, 30UL };
+ u64 ch_counts[5] = { 16UL, 1UL, 1UL, 0UL, 1UL };
+ u64 idx;
+ int i;
+
+ /* Restore, Step 21:
+ * Reset the following CH: [21, 23, 28, 29, 30]
+ */
+ for (i = 0; i < 5; i++) {
+ idx = ch_indices[i];
+ out_be64(&priv2->spu_chnlcntptr_RW, idx);
+ eieio();
+ out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+ eieio();
+ }
+}
+
+static inline void setup_spu_status_part1(struct spu_state *csa,
+ struct spu *spu)
+{
+ u32 status_P = SPU_STATUS_STOPPED_BY_STOP;
+ u32 status_I = SPU_STATUS_INVALID_INSTR;
+ u32 status_H = SPU_STATUS_STOPPED_BY_HALT;
+ u32 status_S = SPU_STATUS_SINGLE_STEP;
+ u32 status_S_I = SPU_STATUS_SINGLE_STEP | SPU_STATUS_INVALID_INSTR;
+ u32 status_S_P = SPU_STATUS_SINGLE_STEP | SPU_STATUS_STOPPED_BY_STOP;
+ u32 status_P_H = SPU_STATUS_STOPPED_BY_HALT |SPU_STATUS_STOPPED_BY_STOP;
+ u32 status_P_I = SPU_STATUS_STOPPED_BY_STOP |SPU_STATUS_INVALID_INSTR;
+ u32 status_code;
+
+ /* Restore, Step 27:
+ * If the CSA.SPU_Status[I,S,H,P]=1 then add the correct
+ * instruction sequence to the end of the SPU based restore
+ * code (after the "context restored" stop and signal) to
+ * restore the correct SPU status.
+ *
+ * NOTE: Rather than modifying the SPU executable, we
+ * instead add a new 'stopped_status' field to the
+ * LSCSA. The SPU-side restore reads this field and
+ * takes the appropriate action when exiting.
+ */
+
+ status_code =
+ (csa->prob.spu_status_R >> SPU_STOP_STATUS_SHIFT) & 0xFFFF;
+ if ((csa->prob.spu_status_R & status_P_I) == status_P_I) {
+
+ /* SPU_Status[P,I]=1 - Illegal Instruction followed
+ * by Stop and Signal instruction, followed by 'br -4'.
+ *
+ */
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_I;
+ csa->lscsa->stopped_status.slot[1] = status_code;
+
+ } else if ((csa->prob.spu_status_R & status_P_H) == status_P_H) {
+
+ /* SPU_Status[P,H]=1 - Halt Conditional, followed
+ * by Stop and Signal instruction, followed by
+ * 'br -4'.
+ */
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_H;
+ csa->lscsa->stopped_status.slot[1] = status_code;
+
+ } else if ((csa->prob.spu_status_R & status_S_P) == status_S_P) {
+
+ /* SPU_Status[S,P]=1 - Stop and Signal instruction
+ * followed by 'br -4'.
+ */
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_P;
+ csa->lscsa->stopped_status.slot[1] = status_code;
+
+ } else if ((csa->prob.spu_status_R & status_S_I) == status_S_I) {
+
+ /* SPU_Status[S,I]=1 - Illegal instruction followed
+ * by 'br -4'.
+ */
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_I;
+ csa->lscsa->stopped_status.slot[1] = status_code;
+
+ } else if ((csa->prob.spu_status_R & status_P) == status_P) {
+
+ /* SPU_Status[P]=1 - Stop and Signal instruction
+ * followed by 'br -4'.
+ */
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P;
+ csa->lscsa->stopped_status.slot[1] = status_code;
+
+ } else if ((csa->prob.spu_status_R & status_H) == status_H) {
+
+ /* SPU_Status[H]=1 - Halt Conditional, followed
+ * by 'br -4'.
+ */
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_H;
+
+ } else if ((csa->prob.spu_status_R & status_S) == status_S) {
+
+ /* SPU_Status[S]=1 - Two nop instructions.
+ */
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S;
+
+ } else if ((csa->prob.spu_status_R & status_I) == status_I) {
+
+ /* SPU_Status[I]=1 - Illegal instruction followed
+ * by 'br -4'.
+ */
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_I;
+
+ }
+}
+
+static inline void setup_spu_status_part2(struct spu_state *csa,
+ struct spu *spu)
+{
+ u32 mask;
+
+ /* Restore, Step 28:
+ * If the CSA.SPU_Status[I,S,H,P,R]=0 then
+ * add a 'br *' instruction to the end of
+ * the SPU based restore code.
+ *
+ * NOTE: Rather than modifying the SPU executable, we
+ * instead add a new 'stopped_status' field to the
+ * LSCSA. The SPU-side restore reads this field and
+ * takes the appropriate action when exiting.
+ */
+ mask = SPU_STATUS_INVALID_INSTR |
+ SPU_STATUS_SINGLE_STEP |
+ SPU_STATUS_STOPPED_BY_HALT |
+ SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING;
+ if (!(csa->prob.spu_status_R & mask)) {
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_R;
+ }
+}
+
+static inline void restore_mfc_rag(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 29:
+ * Restore RA_GROUP_ID register and the
+ * RA_ENABLE reigster from the CSA.
+ */
+ spu_resource_allocation_groupID_set(spu,
+ csa->priv1.resource_allocation_groupID_RW);
+ spu_resource_allocation_enable_set(spu,
+ csa->priv1.resource_allocation_enable_RW);
+}
+
+static inline void send_restore_code(struct spu_state *csa, struct spu *spu)
+{
+ unsigned long addr = (unsigned long)&spu_restore_code[0];
+ unsigned int ls_offset = 0x0;
+ unsigned int size = sizeof(spu_restore_code);
+ unsigned int tag = 0;
+ unsigned int rclass = 0;
+ unsigned int cmd = MFC_GETFS_CMD;
+
+ /* Restore, Step 37:
+ * Issue MFC DMA command to copy context
+ * restore code to local storage.
+ */
+ send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void setup_decr(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 34:
+ * If CSA.MFC_CNTL[Ds]=1 (decrementer was
+ * running) then adjust decrementer, set
+ * decrementer running status in LSCSA,
+ * and set decrementer "wrapped" status
+ * in LSCSA.
+ */
+ if (csa->priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) {
+ cycles_t resume_time = get_cycles();
+ cycles_t delta_time = resume_time - csa->suspend_time;
+
+ csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING;
+ if (csa->lscsa->decr.slot[0] < delta_time) {
+ csa->lscsa->decr_status.slot[0] |=
+ SPU_DECR_STATUS_WRAPPED;
+ }
+
+ csa->lscsa->decr.slot[0] -= delta_time;
+ } else {
+ csa->lscsa->decr_status.slot[0] = 0;
+ }
+}
+
+static inline void setup_ppu_mb(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 35:
+ * Copy the CSA.PU_MB data into the LSCSA.
+ */
+ csa->lscsa->ppu_mb.slot[0] = csa->prob.pu_mb_R;
+}
+
+static inline void setup_ppuint_mb(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 36:
+ * Copy the CSA.PUINT_MB data into the LSCSA.
+ */
+ csa->lscsa->ppuint_mb.slot[0] = csa->priv2.puint_mb_R;
+}
+
+static inline int check_restore_status(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ u32 complete;
+
+ /* Restore, Step 40:
+ * If SPU_Status[P]=1 and SPU_Status[SC] = "success",
+ * context restore succeeded, otherwise context restore
+ * failed.
+ */
+ complete = ((SPU_RESTORE_COMPLETE << SPU_STOP_STATUS_SHIFT) |
+ SPU_STATUS_STOPPED_BY_STOP);
+ return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0;
+}
+
+static inline void restore_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 41:
+ * Restore SPU_PrivCntl from the CSA.
+ */
+ out_be64(&priv2->spu_privcntl_RW, csa->priv2.spu_privcntl_RW);
+ eieio();
+}
+
+static inline void restore_status_part1(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ u32 mask;
+
+ /* Restore, Step 42:
+ * If any CSA.SPU_Status[I,S,H,P]=1, then
+ * restore the error or single step state.
+ */
+ mask = SPU_STATUS_INVALID_INSTR |
+ SPU_STATUS_SINGLE_STEP |
+ SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+ if (csa->prob.spu_status_R & mask) {
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+ eieio();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ }
+}
+
+static inline void restore_status_part2(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ u32 mask;
+
+ /* Restore, Step 43:
+ * If all CSA.SPU_Status[I,S,H,P,R]=0 then write
+ * SPU_RunCntl[R0R1]='01', wait for SPU_Status[R]=1,
+ * then write '00' to SPU_RunCntl[R0R1] and wait
+ * for SPU_Status[R]=0.
+ */
+ mask = SPU_STATUS_INVALID_INSTR |
+ SPU_STATUS_SINGLE_STEP |
+ SPU_STATUS_STOPPED_BY_HALT |
+ SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING;
+ if (!(csa->prob.spu_status_R & mask)) {
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+ eieio();
+ POLL_WHILE_FALSE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+ eieio();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ }
+}
+
+static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu)
+{
+ unsigned long addr = (unsigned long)&csa->lscsa->ls[0];
+ unsigned int ls_offset = 0x0;
+ unsigned int size = 16384;
+ unsigned int tag = 0;
+ unsigned int rclass = 0;
+ unsigned int cmd = MFC_GET_CMD;
+
+ /* Restore, Step 44:
+ * Issue a DMA command to restore the first
+ * 16kb of local storage from CSA.
+ */
+ send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void suspend_mfc(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 47.
+ * Write MFC_Cntl[Sc,Sm]='1','0' to suspend
+ * the queue.
+ */
+ out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
+ eieio();
+}
+
+static inline void clear_interrupts(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 49:
+ * Write INT_MASK_class0 with value of 0.
+ * Write INT_MASK_class1 with value of 0.
+ * Write INT_MASK_class2 with value of 0.
+ * Write INT_STAT_class0 with value of -1.
+ * Write INT_STAT_class1 with value of -1.
+ * Write INT_STAT_class2 with value of -1.
+ */
+ spin_lock_irq(&spu->register_lock);
+ spu_int_mask_set(spu, 0, 0ul);
+ spu_int_mask_set(spu, 1, 0ul);
+ spu_int_mask_set(spu, 2, 0ul);
+ spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+ spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK);
+ spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static inline void restore_mfc_queues(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ int i;
+
+ /* Restore, Step 50:
+ * If MFC_Cntl[Se]!=0 then restore
+ * MFC command queues.
+ */
+ if ((csa->priv2.mfc_control_RW & MFC_CNTL_DMA_QUEUES_EMPTY_MASK) == 0) {
+ for (i = 0; i < 8; i++) {
+ out_be64(&priv2->puq[i].mfc_cq_data0_RW,
+ csa->priv2.puq[i].mfc_cq_data0_RW);
+ out_be64(&priv2->puq[i].mfc_cq_data1_RW,
+ csa->priv2.puq[i].mfc_cq_data1_RW);
+ out_be64(&priv2->puq[i].mfc_cq_data2_RW,
+ csa->priv2.puq[i].mfc_cq_data2_RW);
+ out_be64(&priv2->puq[i].mfc_cq_data3_RW,
+ csa->priv2.puq[i].mfc_cq_data3_RW);
+ }
+ for (i = 0; i < 16; i++) {
+ out_be64(&priv2->spuq[i].mfc_cq_data0_RW,
+ csa->priv2.spuq[i].mfc_cq_data0_RW);
+ out_be64(&priv2->spuq[i].mfc_cq_data1_RW,
+ csa->priv2.spuq[i].mfc_cq_data1_RW);
+ out_be64(&priv2->spuq[i].mfc_cq_data2_RW,
+ csa->priv2.spuq[i].mfc_cq_data2_RW);
+ out_be64(&priv2->spuq[i].mfc_cq_data3_RW,
+ csa->priv2.spuq[i].mfc_cq_data3_RW);
+ }
+ }
+ eieio();
+}
+
+static inline void restore_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Restore, Step 51:
+ * Restore the PPU_QueryMask register from CSA.
+ */
+ out_be32(&prob->dma_querymask_RW, csa->prob.dma_querymask_RW);
+ eieio();
+}
+
+static inline void restore_ppu_querytype(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Restore, Step 52:
+ * Restore the PPU_QueryType register from CSA.
+ */
+ out_be32(&prob->dma_querytype_RW, csa->prob.dma_querytype_RW);
+ eieio();
+}
+
+static inline void restore_mfc_csr_tsq(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 53:
+ * Restore the MFC_CSR_TSQ register from CSA.
+ */
+ out_be64(&priv2->spu_tag_status_query_RW,
+ csa->priv2.spu_tag_status_query_RW);
+ eieio();
+}
+
+static inline void restore_mfc_csr_cmd(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 54:
+ * Restore the MFC_CSR_CMD1 and MFC_CSR_CMD2
+ * registers from CSA.
+ */
+ out_be64(&priv2->spu_cmd_buf1_RW, csa->priv2.spu_cmd_buf1_RW);
+ out_be64(&priv2->spu_cmd_buf2_RW, csa->priv2.spu_cmd_buf2_RW);
+ eieio();
+}
+
+static inline void restore_mfc_csr_ato(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 55:
+ * Restore the MFC_CSR_ATO register from CSA.
+ */
+ out_be64(&priv2->spu_atomic_status_RW, csa->priv2.spu_atomic_status_RW);
+}
+
+static inline void restore_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 56:
+ * Restore the MFC_TCLASS_ID register from CSA.
+ */
+ spu_mfc_tclass_id_set(spu, csa->priv1.mfc_tclass_id_RW);
+ eieio();
+}
+
+static inline void set_llr_event(struct spu_state *csa, struct spu *spu)
+{
+ u64 ch0_cnt, ch0_data;
+ u64 ch1_data;
+
+ /* Restore, Step 57:
+ * Set the Lock Line Reservation Lost Event by:
+ * 1. OR CSA.SPU_Event_Status with bit 21 (Lr) set to 1.
+ * 2. If CSA.SPU_Channel_0_Count=0 and
+ * CSA.SPU_Wr_Event_Mask[Lr]=1 and
+ * CSA.SPU_Event_Status[Lr]=0 then set
+ * CSA.SPU_Event_Status_Count=1.
+ */
+ ch0_cnt = csa->spu_chnlcnt_RW[0];
+ ch0_data = csa->spu_chnldata_RW[0];
+ ch1_data = csa->spu_chnldata_RW[1];
+ csa->spu_chnldata_RW[0] |= MFC_LLR_LOST_EVENT;
+ if ((ch0_cnt == 0) && !(ch0_data & MFC_LLR_LOST_EVENT) &&
+ (ch1_data & MFC_LLR_LOST_EVENT)) {
+ csa->spu_chnlcnt_RW[0] = 1;
+ }
+}
+
+static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 58:
+ * If the status of the CSA software decrementer
+ * "wrapped" flag is set, OR in a '1' to
+ * CSA.SPU_Event_Status[Tm].
+ */
+ if (!(csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED))
+ return;
+
+ if ((csa->spu_chnlcnt_RW[0] == 0) &&
+ (csa->spu_chnldata_RW[1] & 0x20) &&
+ !(csa->spu_chnldata_RW[0] & 0x20))
+ csa->spu_chnlcnt_RW[0] = 1;
+
+ csa->spu_chnldata_RW[0] |= 0x20;
+}
+
+static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+ int i;
+
+ /* Restore, Step 59:
+ * Restore the following CH: [0,3,4,24,25,27]
+ */
+ for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+ idx = ch_indices[i];
+ out_be64(&priv2->spu_chnlcntptr_RW, idx);
+ eieio();
+ out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[idx]);
+ out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[idx]);
+ eieio();
+ }
+}
+
+static inline void restore_ch_part2(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 ch_indices[3] = { 9UL, 21UL, 23UL };
+ u64 ch_counts[3] = { 1UL, 16UL, 1UL };
+ u64 idx;
+ int i;
+
+ /* Restore, Step 60:
+ * Restore the following CH: [9,21,23].
+ */
+ ch_counts[0] = 1UL;
+ ch_counts[1] = csa->spu_chnlcnt_RW[21];
+ ch_counts[2] = 1UL;
+ for (i = 0; i < 3; i++) {
+ idx = ch_indices[i];
+ out_be64(&priv2->spu_chnlcntptr_RW, idx);
+ eieio();
+ out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+ eieio();
+ }
+}
+
+static inline void restore_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 61:
+ * Restore the SPU_LSLR register from CSA.
+ */
+ out_be64(&priv2->spu_lslr_RW, csa->priv2.spu_lslr_RW);
+ eieio();
+}
+
+static inline void restore_spu_cfg(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 62:
+ * Restore the SPU_Cfg register from CSA.
+ */
+ out_be64(&priv2->spu_cfg_RW, csa->priv2.spu_cfg_RW);
+ eieio();
+}
+
+static inline void restore_pm_trace(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 63:
+ * Restore PM_Trace_Tag_Wait_Mask from CSA.
+ * Not performed by this implementation.
+ */
+}
+
+static inline void restore_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Restore, Step 64:
+ * Restore SPU_NPC from CSA.
+ */
+ out_be32(&prob->spu_npc_RW, csa->prob.spu_npc_RW);
+ eieio();
+}
+
+static inline void restore_spu_mb(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ int i;
+
+ /* Restore, Step 65:
+ * Restore MFC_RdSPU_MB from CSA.
+ */
+ out_be64(&priv2->spu_chnlcntptr_RW, 29UL);
+ eieio();
+ out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[29]);
+ for (i = 0; i < 4; i++) {
+ out_be64(&priv2->spu_chnldata_RW, csa->spu_mailbox_data[i]);
+ }
+ eieio();
+}
+
+static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ u32 dummy = 0;
+
+ /* Restore, Step 66:
+ * If CSA.MB_Stat[P]=0 (mailbox empty) then
+ * read from the PPU_MB register.
+ */
+ if ((csa->prob.mb_stat_R & 0xFF) == 0) {
+ dummy = in_be32(&prob->pu_mb_R);
+ eieio();
+ }
+}
+
+static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 dummy = 0UL;
+
+ /* Restore, Step 66:
+ * If CSA.MB_Stat[I]=0 (mailbox empty) then
+ * read from the PPUINT_MB register.
+ */
+ if ((csa->prob.mb_stat_R & 0xFF0000) == 0) {
+ dummy = in_be64(&priv2->puint_mb_R);
+ eieio();
+ spu_int_stat_clear(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+ eieio();
+ }
+}
+
+static inline void restore_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 69:
+ * Restore the MFC_SR1 register from CSA.
+ */
+ spu_mfc_sr1_set(spu, csa->priv1.mfc_sr1_RW);
+ eieio();
+}
+
+static inline void set_int_route(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_context *ctx = spu->ctx;
+
+ spu_cpu_affinity_set(spu, ctx->last_ran);
+}
+
+static inline void restore_other_spu_access(struct spu_state *csa,
+ struct spu *spu)
+{
+ /* Restore, Step 70:
+ * Restore other SPU mappings to this SPU. TBD.
+ */
+}
+
+static inline void restore_spu_runcntl(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Restore, Step 71:
+ * If CSA.SPU_Status[R]=1 then write
+ * SPU_RunCntl[R0R1]='01'.
+ */
+ if (csa->prob.spu_status_R & SPU_STATUS_RUNNING) {
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+ eieio();
+ }
+}
+
+static inline void restore_mfc_cntl(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 72:
+ * Restore the MFC_CNTL register for the CSA.
+ */
+ out_be64(&priv2->mfc_control_RW, csa->priv2.mfc_control_RW);
+ eieio();
+
+ /*
+ * The queue is put back into the same state that was evident prior to
+ * the context switch. The suspend flag is added to the saved state in
+ * the csa, if the operational state was suspending or suspended. In
+ * this case, the code that suspended the mfc is responsible for
+ * continuing it. Note that SPE faults do not change the operational
+ * state of the spu.
+ */
+}
+
+static inline void enable_user_access(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 73:
+ * Enable user-space access (if provided) to this
+ * SPU by mapping the virtual pages assigned to
+ * the SPU memory-mapped I/O (MMIO) for problem
+ * state. TBD.
+ */
+}
+
+static inline void reset_switch_active(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 74:
+ * Reset the "context switch active" flag.
+ * Not performed by this implementation.
+ */
+}
+
+static inline void reenable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 75:
+ * Re-enable SPU interrupts.
+ */
+ spin_lock_irq(&spu->register_lock);
+ spu_int_mask_set(spu, 0, csa->priv1.int_mask_class0_RW);
+ spu_int_mask_set(spu, 1, csa->priv1.int_mask_class1_RW);
+ spu_int_mask_set(spu, 2, csa->priv1.int_mask_class2_RW);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static int quiece_spu(struct spu_state *prev, struct spu *spu)
+{
+ /*
+ * Combined steps 2-18 of SPU context save sequence, which
+ * quiesce the SPU state (disable SPU execution, MFC command
+ * queues, decrementer, SPU interrupts, etc.).
+ *
+ * Returns 0 on success.
+ * 2 if failed step 2.
+ * 6 if failed step 6.
+ */
+
+ if (check_spu_isolate(prev, spu)) { /* Step 2. */
+ return 2;
+ }
+ disable_interrupts(prev, spu); /* Step 3. */
+ set_watchdog_timer(prev, spu); /* Step 4. */
+ inhibit_user_access(prev, spu); /* Step 5. */
+ if (check_spu_isolate(prev, spu)) { /* Step 6. */
+ return 6;
+ }
+ set_switch_pending(prev, spu); /* Step 7. */
+ save_mfc_cntl(prev, spu); /* Step 8. */
+ save_spu_runcntl(prev, spu); /* Step 9. */
+ save_mfc_sr1(prev, spu); /* Step 10. */
+ save_spu_status(prev, spu); /* Step 11. */
+ save_mfc_stopped_status(prev, spu); /* Step 12. */
+ halt_mfc_decr(prev, spu); /* Step 13. */
+ save_timebase(prev, spu); /* Step 14. */
+ remove_other_spu_access(prev, spu); /* Step 15. */
+ do_mfc_mssync(prev, spu); /* Step 16. */
+ issue_mfc_tlbie(prev, spu); /* Step 17. */
+ handle_pending_interrupts(prev, spu); /* Step 18. */
+
+ return 0;
+}
+
+static void save_csa(struct spu_state *prev, struct spu *spu)
+{
+ /*
+ * Combine steps 19-44 of SPU context save sequence, which
+ * save regions of the privileged & problem state areas.
+ */
+
+ save_mfc_queues(prev, spu); /* Step 19. */
+ save_ppu_querymask(prev, spu); /* Step 20. */
+ save_ppu_querytype(prev, spu); /* Step 21. */
+ save_ppu_tagstatus(prev, spu); /* NEW. */
+ save_mfc_csr_tsq(prev, spu); /* Step 22. */
+ save_mfc_csr_cmd(prev, spu); /* Step 23. */
+ save_mfc_csr_ato(prev, spu); /* Step 24. */
+ save_mfc_tclass_id(prev, spu); /* Step 25. */
+ set_mfc_tclass_id(prev, spu); /* Step 26. */
+ save_mfc_cmd(prev, spu); /* Step 26a - moved from 44. */
+ purge_mfc_queue(prev, spu); /* Step 27. */
+ wait_purge_complete(prev, spu); /* Step 28. */
+ setup_mfc_sr1(prev, spu); /* Step 30. */
+ save_spu_npc(prev, spu); /* Step 31. */
+ save_spu_privcntl(prev, spu); /* Step 32. */
+ reset_spu_privcntl(prev, spu); /* Step 33. */
+ save_spu_lslr(prev, spu); /* Step 34. */
+ reset_spu_lslr(prev, spu); /* Step 35. */
+ save_spu_cfg(prev, spu); /* Step 36. */
+ save_pm_trace(prev, spu); /* Step 37. */
+ save_mfc_rag(prev, spu); /* Step 38. */
+ save_ppu_mb_stat(prev, spu); /* Step 39. */
+ save_ppu_mb(prev, spu); /* Step 40. */
+ save_ppuint_mb(prev, spu); /* Step 41. */
+ save_ch_part1(prev, spu); /* Step 42. */
+ save_spu_mb(prev, spu); /* Step 43. */
+ reset_ch(prev, spu); /* Step 45. */
+}
+
+static void save_lscsa(struct spu_state *prev, struct spu *spu)
+{
+ /*
+ * Perform steps 46-57 of SPU context save sequence,
+ * which save regions of the local store and register
+ * file.
+ */
+
+ resume_mfc_queue(prev, spu); /* Step 46. */
+ /* Step 47. */
+ setup_mfc_slbs(prev, spu, spu_save_code, sizeof(spu_save_code));
+ set_switch_active(prev, spu); /* Step 48. */
+ enable_interrupts(prev, spu); /* Step 49. */
+ save_ls_16kb(prev, spu); /* Step 50. */
+ set_spu_npc(prev, spu); /* Step 51. */
+ set_signot1(prev, spu); /* Step 52. */
+ set_signot2(prev, spu); /* Step 53. */
+ send_save_code(prev, spu); /* Step 54. */
+ set_ppu_querymask(prev, spu); /* Step 55. */
+ wait_tag_complete(prev, spu); /* Step 56. */
+ wait_spu_stopped(prev, spu); /* Step 57. */
+}
+
+static void force_spu_isolate_exit(struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Stop SPE execution and wait for completion. */
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+ iobarrier_rw();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING);
+
+ /* Restart SPE master runcntl. */
+ spu_mfc_sr1_set(spu, MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+ iobarrier_w();
+
+ /* Initiate isolate exit request and wait for completion. */
+ out_be64(&priv2->spu_privcntl_RW, 4LL);
+ iobarrier_w();
+ out_be32(&prob->spu_runcntl_RW, 2);
+ iobarrier_rw();
+ POLL_WHILE_FALSE((in_be32(&prob->spu_status_R)
+ & SPU_STATUS_STOPPED_BY_STOP));
+
+ /* Reset load request to normal. */
+ out_be64(&priv2->spu_privcntl_RW, SPU_PRIVCNT_LOAD_REQUEST_NORMAL);
+ iobarrier_w();
+}
+
+/**
+ * stop_spu_isolate
+ * Check SPU run-control state and force isolated
+ * exit function as necessary.
+ */
+static void stop_spu_isolate(struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ if (in_be32(&prob->spu_status_R) & SPU_STATUS_ISOLATED_STATE) {
+ /* The SPU is in isolated state; the only way
+ * to get it out is to perform an isolated
+ * exit (clean) operation.
+ */
+ force_spu_isolate_exit(spu);
+ }
+}
+
+static void harvest(struct spu_state *prev, struct spu *spu)
+{
+ /*
+ * Perform steps 2-25 of SPU context restore sequence,
+ * which resets an SPU either after a failed save, or
+ * when using SPU for first time.
+ */
+
+ disable_interrupts(prev, spu); /* Step 2. */
+ inhibit_user_access(prev, spu); /* Step 3. */
+ terminate_spu_app(prev, spu); /* Step 4. */
+ set_switch_pending(prev, spu); /* Step 5. */
+ stop_spu_isolate(spu); /* NEW. */
+ remove_other_spu_access(prev, spu); /* Step 6. */
+ suspend_mfc_and_halt_decr(prev, spu); /* Step 7. */
+ wait_suspend_mfc_complete(prev, spu); /* Step 8. */
+ if (!suspend_spe(prev, spu)) /* Step 9. */
+ clear_spu_status(prev, spu); /* Step 10. */
+ do_mfc_mssync(prev, spu); /* Step 11. */
+ issue_mfc_tlbie(prev, spu); /* Step 12. */
+ handle_pending_interrupts(prev, spu); /* Step 13. */
+ purge_mfc_queue(prev, spu); /* Step 14. */
+ wait_purge_complete(prev, spu); /* Step 15. */
+ reset_spu_privcntl(prev, spu); /* Step 16. */
+ reset_spu_lslr(prev, spu); /* Step 17. */
+ setup_mfc_sr1(prev, spu); /* Step 18. */
+ spu_invalidate_slbs(spu); /* Step 19. */
+ reset_ch_part1(prev, spu); /* Step 20. */
+ reset_ch_part2(prev, spu); /* Step 21. */
+ enable_interrupts(prev, spu); /* Step 22. */
+ set_switch_active(prev, spu); /* Step 23. */
+ set_mfc_tclass_id(prev, spu); /* Step 24. */
+ resume_mfc_queue(prev, spu); /* Step 25. */
+}
+
+static void restore_lscsa(struct spu_state *next, struct spu *spu)
+{
+ /*
+ * Perform steps 26-40 of SPU context restore sequence,
+ * which restores regions of the local store and register
+ * file.
+ */
+
+ set_watchdog_timer(next, spu); /* Step 26. */
+ setup_spu_status_part1(next, spu); /* Step 27. */
+ setup_spu_status_part2(next, spu); /* Step 28. */
+ restore_mfc_rag(next, spu); /* Step 29. */
+ /* Step 30. */
+ setup_mfc_slbs(next, spu, spu_restore_code, sizeof(spu_restore_code));
+ set_spu_npc(next, spu); /* Step 31. */
+ set_signot1(next, spu); /* Step 32. */
+ set_signot2(next, spu); /* Step 33. */
+ setup_decr(next, spu); /* Step 34. */
+ setup_ppu_mb(next, spu); /* Step 35. */
+ setup_ppuint_mb(next, spu); /* Step 36. */
+ send_restore_code(next, spu); /* Step 37. */
+ set_ppu_querymask(next, spu); /* Step 38. */
+ wait_tag_complete(next, spu); /* Step 39. */
+ wait_spu_stopped(next, spu); /* Step 40. */
+}
+
+static void restore_csa(struct spu_state *next, struct spu *spu)
+{
+ /*
+ * Combine steps 41-76 of SPU context restore sequence, which
+ * restore regions of the privileged & problem state areas.
+ */
+
+ restore_spu_privcntl(next, spu); /* Step 41. */
+ restore_status_part1(next, spu); /* Step 42. */
+ restore_status_part2(next, spu); /* Step 43. */
+ restore_ls_16kb(next, spu); /* Step 44. */
+ wait_tag_complete(next, spu); /* Step 45. */
+ suspend_mfc(next, spu); /* Step 46. */
+ wait_suspend_mfc_complete(next, spu); /* Step 47. */
+ issue_mfc_tlbie(next, spu); /* Step 48. */
+ clear_interrupts(next, spu); /* Step 49. */
+ restore_mfc_queues(next, spu); /* Step 50. */
+ restore_ppu_querymask(next, spu); /* Step 51. */
+ restore_ppu_querytype(next, spu); /* Step 52. */
+ restore_mfc_csr_tsq(next, spu); /* Step 53. */
+ restore_mfc_csr_cmd(next, spu); /* Step 54. */
+ restore_mfc_csr_ato(next, spu); /* Step 55. */
+ restore_mfc_tclass_id(next, spu); /* Step 56. */
+ set_llr_event(next, spu); /* Step 57. */
+ restore_decr_wrapped(next, spu); /* Step 58. */
+ restore_ch_part1(next, spu); /* Step 59. */
+ restore_ch_part2(next, spu); /* Step 60. */
+ restore_spu_lslr(next, spu); /* Step 61. */
+ restore_spu_cfg(next, spu); /* Step 62. */
+ restore_pm_trace(next, spu); /* Step 63. */
+ restore_spu_npc(next, spu); /* Step 64. */
+ restore_spu_mb(next, spu); /* Step 65. */
+ check_ppu_mb_stat(next, spu); /* Step 66. */
+ check_ppuint_mb_stat(next, spu); /* Step 67. */
+ spu_invalidate_slbs(spu); /* Modified Step 68. */
+ restore_mfc_sr1(next, spu); /* Step 69. */
+ set_int_route(next, spu); /* NEW */
+ restore_other_spu_access(next, spu); /* Step 70. */
+ restore_spu_runcntl(next, spu); /* Step 71. */
+ restore_mfc_cntl(next, spu); /* Step 72. */
+ enable_user_access(next, spu); /* Step 73. */
+ reset_switch_active(next, spu); /* Step 74. */
+ reenable_interrupts(next, spu); /* Step 75. */
+}
+
+static int __do_spu_save(struct spu_state *prev, struct spu *spu)
+{
+ int rc;
+
+ /*
+ * SPU context save can be broken into three phases:
+ *
+ * (a) quiesce [steps 2-16].
+ * (b) save of CSA, performed by PPE [steps 17-42]
+ * (c) save of LSCSA, mostly performed by SPU [steps 43-52].
+ *
+ * Returns 0 on success.
+ * 2,6 if failed to quiece SPU
+ * 53 if SPU-side of save failed.
+ */
+
+ rc = quiece_spu(prev, spu); /* Steps 2-16. */
+ switch (rc) {
+ default:
+ case 2:
+ case 6:
+ harvest(prev, spu);
+ return rc;
+ break;
+ case 0:
+ break;
+ }
+ save_csa(prev, spu); /* Steps 17-43. */
+ save_lscsa(prev, spu); /* Steps 44-53. */
+ return check_save_status(prev, spu); /* Step 54. */
+}
+
+static int __do_spu_restore(struct spu_state *next, struct spu *spu)
+{
+ int rc;
+
+ /*
+ * SPU context restore can be broken into three phases:
+ *
+ * (a) harvest (or reset) SPU [steps 2-24].
+ * (b) restore LSCSA [steps 25-40], mostly performed by SPU.
+ * (c) restore CSA [steps 41-76], performed by PPE.
+ *
+ * The 'harvest' step is not performed here, but rather
+ * as needed below.
+ */
+
+ restore_lscsa(next, spu); /* Steps 24-39. */
+ rc = check_restore_status(next, spu); /* Step 40. */
+ switch (rc) {
+ default:
+ /* Failed. Return now. */
+ return rc;
+ break;
+ case 0:
+ /* Fall through to next step. */
+ break;
+ }
+ restore_csa(next, spu);
+
+ return 0;
+}
+
+/**
+ * spu_save - SPU context save, with locking.
+ * @prev: pointer to SPU context save area, to be saved.
+ * @spu: pointer to SPU iomem structure.
+ *
+ * Acquire locks, perform the save operation then return.
+ */
+int spu_save(struct spu_state *prev, struct spu *spu)
+{
+ int rc;
+
+ acquire_spu_lock(spu); /* Step 1. */
+ rc = __do_spu_save(prev, spu); /* Steps 2-53. */
+ release_spu_lock(spu);
+ if (rc != 0 && rc != 2 && rc != 6) {
+ panic("%s failed on SPU[%d], rc=%d.\n",
+ __func__, spu->number, rc);
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(spu_save);
+
+/**
+ * spu_restore - SPU context restore, with harvest and locking.
+ * @new: pointer to SPU context save area, to be restored.
+ * @spu: pointer to SPU iomem structure.
+ *
+ * Perform harvest + restore, as we may not be coming
+ * from a previous successful save operation, and the
+ * hardware state is unknown.
+ */
+int spu_restore(struct spu_state *new, struct spu *spu)
+{
+ int rc;
+
+ acquire_spu_lock(spu);
+ harvest(NULL, spu);
+ spu->slb_replace = 0;
+ rc = __do_spu_restore(new, spu);
+ release_spu_lock(spu);
+ if (rc) {
+ panic("%s failed on SPU[%d] rc=%d.\n",
+ __func__, spu->number, rc);
+ }
+ return rc;
+}
+EXPORT_SYMBOL_GPL(spu_restore);
+
+static void init_prob(struct spu_state *csa)
+{
+ csa->spu_chnlcnt_RW[9] = 1;
+ csa->spu_chnlcnt_RW[21] = 16;
+ csa->spu_chnlcnt_RW[23] = 1;
+ csa->spu_chnlcnt_RW[28] = 1;
+ csa->spu_chnlcnt_RW[30] = 1;
+ csa->prob.spu_runcntl_RW = SPU_RUNCNTL_STOP;
+ csa->prob.mb_stat_R = 0x000400;
+}
+
+static void init_priv1(struct spu_state *csa)
+{
+ /* Enable decode, relocate, tlbie response, master runcntl. */
+ csa->priv1.mfc_sr1_RW = MFC_STATE1_LOCAL_STORAGE_DECODE_MASK |
+ MFC_STATE1_MASTER_RUN_CONTROL_MASK |
+ MFC_STATE1_PROBLEM_STATE_MASK |
+ MFC_STATE1_RELOCATE_MASK | MFC_STATE1_BUS_TLBIE_MASK;
+
+ /* Enable OS-specific set of interrupts. */
+ csa->priv1.int_mask_class0_RW = CLASS0_ENABLE_DMA_ALIGNMENT_INTR |
+ CLASS0_ENABLE_INVALID_DMA_COMMAND_INTR |
+ CLASS0_ENABLE_SPU_ERROR_INTR;
+ csa->priv1.int_mask_class1_RW = CLASS1_ENABLE_SEGMENT_FAULT_INTR |
+ CLASS1_ENABLE_STORAGE_FAULT_INTR;
+ csa->priv1.int_mask_class2_RW = CLASS2_ENABLE_SPU_STOP_INTR |
+ CLASS2_ENABLE_SPU_HALT_INTR |
+ CLASS2_ENABLE_SPU_DMA_TAG_GROUP_COMPLETE_INTR;
+}
+
+static void init_priv2(struct spu_state *csa)
+{
+ csa->priv2.spu_lslr_RW = LS_ADDR_MASK;
+ csa->priv2.mfc_control_RW = MFC_CNTL_RESUME_DMA_QUEUE |
+ MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION |
+ MFC_CNTL_DMA_QUEUES_EMPTY_MASK;
+}
+
+/**
+ * spu_alloc_csa - allocate and initialize an SPU context save area.
+ *
+ * Allocate and initialize the contents of an SPU context save area.
+ * This includes enabling address translation, interrupt masks, etc.,
+ * as appropriate for the given OS environment.
+ *
+ * Note that storage for the 'lscsa' is allocated separately,
+ * as it is by far the largest of the context save regions,
+ * and may need to be pinned or otherwise specially aligned.
+ */
+int spu_init_csa(struct spu_state *csa)
+{
+ int rc;
+
+ if (!csa)
+ return -EINVAL;
+ memset(csa, 0, sizeof(struct spu_state));
+
+ rc = spu_alloc_lscsa(csa);
+ if (rc)
+ return rc;
+
+ spin_lock_init(&csa->register_lock);
+
+ init_prob(csa);
+ init_priv1(csa);
+ init_priv2(csa);
+
+ return 0;
+}
+
+void spu_fini_csa(struct spu_state *csa)
+{
+ spu_free_lscsa(csa);
+}
diff --git a/kernel/arch/powerpc/platforms/cell/spufs/syscalls.c b/kernel/arch/powerpc/platforms/cell/spufs/syscalls.c
new file mode 100644
index 000000000..a87200a53
--- /dev/null
+++ b/kernel/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -0,0 +1,88 @@
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/export.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+
+#include "spufs.h"
+
+/**
+ * sys_spu_run - run code loaded into an SPU
+ *
+ * @unpc: next program counter for the SPU
+ * @ustatus: status of the SPU
+ *
+ * This system call transfers the control of execution of a
+ * user space thread to an SPU. It will return when the
+ * SPU has finished executing or when it hits an error
+ * condition and it will be interrupted if a signal needs
+ * to be delivered to a handler in user space.
+ *
+ * The next program counter is set to the passed value
+ * before the SPU starts fetching code and the user space
+ * pointer gets updated with the new value when returning
+ * from kernel space.
+ *
+ * The status value returned from spu_run reflects the
+ * value of the spu_status register after the SPU has stopped.
+ *
+ */
+static long do_spu_run(struct file *filp,
+ __u32 __user *unpc,
+ __u32 __user *ustatus)
+{
+ long ret;
+ struct spufs_inode_info *i;
+ u32 npc, status;
+
+ ret = -EFAULT;
+ if (get_user(npc, unpc))
+ goto out;
+
+ /* check if this file was created by spu_create */
+ ret = -EINVAL;
+ if (filp->f_op != &spufs_context_fops)
+ goto out;
+
+ i = SPUFS_I(file_inode(filp));
+ ret = spufs_run_spu(i->i_ctx, &npc, &status);
+
+ if (put_user(npc, unpc))
+ ret = -EFAULT;
+
+ if (ustatus && put_user(status, ustatus))
+ ret = -EFAULT;
+out:
+ return ret;
+}
+
+static long do_spu_create(const char __user *pathname, unsigned int flags,
+ umode_t mode, struct file *neighbor)
+{
+ struct path path;
+ struct dentry *dentry;
+ int ret;
+
+ dentry = user_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY);
+ ret = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+ ret = spufs_create(&path, dentry, flags, mode, neighbor);
+ done_path_create(&path, dentry);
+ }
+
+ return ret;
+}
+
+struct spufs_calls spufs_calls = {
+ .create_thread = do_spu_create,
+ .spu_run = do_spu_run,
+ .notify_spus_active = do_notify_spus_active,
+ .owner = THIS_MODULE,
+#ifdef CONFIG_COREDUMP
+ .coredump_extra_notes_size = spufs_coredump_extra_notes_size,
+ .coredump_extra_notes_write = spufs_coredump_extra_notes_write,
+#endif
+};