From 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 Mon Sep 17 00:00:00 2001 From: Yunhong Jiang Date: Tue, 4 Aug 2015 12:17:53 -0700 Subject: Add the rt linux 4.1.3-rt3 as base Import the rt linux 4.1.3-rt3 as OPNFV kvm base. It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and the base is: commit 0917f823c59692d751951bf5ea699a2d1e2f26a2 Author: Sebastian Andrzej Siewior Date: Sat Jul 25 12:13:34 2015 +0200 Prepare v4.1.3-rt3 Signed-off-by: Sebastian Andrzej Siewior We lose all the git history this way and it's not good. We should apply another opnfv project repo in future. Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423 Signed-off-by: Yunhong Jiang --- kernel/arch/x86/pci/Makefile | 28 + kernel/arch/x86/pci/acpi.c | 536 ++++++++++++++ kernel/arch/x86/pci/amd_bus.c | 421 +++++++++++ kernel/arch/x86/pci/broadcom_bus.c | 116 +++ kernel/arch/x86/pci/bus_numa.c | 154 ++++ kernel/arch/x86/pci/bus_numa.h | 26 + kernel/arch/x86/pci/ce4100.c | 331 +++++++++ kernel/arch/x86/pci/common.c | 700 ++++++++++++++++++ kernel/arch/x86/pci/direct.c | 315 ++++++++ kernel/arch/x86/pci/early.c | 111 +++ kernel/arch/x86/pci/fixup.c | 555 ++++++++++++++ kernel/arch/x86/pci/i386.c | 455 ++++++++++++ kernel/arch/x86/pci/init.c | 44 ++ kernel/arch/x86/pci/intel_mid_pci.c | 326 +++++++++ kernel/arch/x86/pci/irq.c | 1279 +++++++++++++++++++++++++++++++++ kernel/arch/x86/pci/legacy.c | 72 ++ kernel/arch/x86/pci/mmconfig-shared.c | 818 +++++++++++++++++++++ kernel/arch/x86/pci/mmconfig_32.c | 157 ++++ kernel/arch/x86/pci/mmconfig_64.c | 153 ++++ kernel/arch/x86/pci/numachip.c | 129 ++++ kernel/arch/x86/pci/olpc.c | 315 ++++++++ kernel/arch/x86/pci/pcbios.c | 455 ++++++++++++ kernel/arch/x86/pci/sta2x11-fixup.c | 364 ++++++++++ kernel/arch/x86/pci/xen.c | 581 +++++++++++++++ 24 files changed, 8441 insertions(+) create mode 100644 kernel/arch/x86/pci/Makefile create mode 100644 kernel/arch/x86/pci/acpi.c create mode 100644 kernel/arch/x86/pci/amd_bus.c create mode 100644 kernel/arch/x86/pci/broadcom_bus.c create mode 100644 kernel/arch/x86/pci/bus_numa.c create mode 100644 kernel/arch/x86/pci/bus_numa.h create mode 100644 kernel/arch/x86/pci/ce4100.c create mode 100644 kernel/arch/x86/pci/common.c create mode 100644 kernel/arch/x86/pci/direct.c create mode 100644 kernel/arch/x86/pci/early.c create mode 100644 kernel/arch/x86/pci/fixup.c create mode 100644 kernel/arch/x86/pci/i386.c create mode 100644 kernel/arch/x86/pci/init.c create mode 100644 kernel/arch/x86/pci/intel_mid_pci.c create mode 100644 kernel/arch/x86/pci/irq.c create mode 100644 kernel/arch/x86/pci/legacy.c create mode 100644 kernel/arch/x86/pci/mmconfig-shared.c create mode 100644 kernel/arch/x86/pci/mmconfig_32.c create mode 100644 kernel/arch/x86/pci/mmconfig_64.c create mode 100644 kernel/arch/x86/pci/numachip.c create mode 100644 kernel/arch/x86/pci/olpc.c create mode 100644 kernel/arch/x86/pci/pcbios.c create mode 100644 kernel/arch/x86/pci/sta2x11-fixup.c create mode 100644 kernel/arch/x86/pci/xen.c (limited to 'kernel/arch/x86/pci') diff --git a/kernel/arch/x86/pci/Makefile b/kernel/arch/x86/pci/Makefile new file mode 100644 index 000000000..5c6fc3577 --- /dev/null +++ b/kernel/arch/x86/pci/Makefile @@ -0,0 +1,28 @@ +obj-y := i386.o init.o + +obj-$(CONFIG_PCI_BIOS) += pcbios.o +obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o +obj-$(CONFIG_PCI_DIRECT) += direct.o +obj-$(CONFIG_PCI_OLPC) += olpc.o +obj-$(CONFIG_PCI_XEN) += xen.o + +obj-y += fixup.o +obj-$(CONFIG_X86_INTEL_CE) += ce4100.o +obj-$(CONFIG_ACPI) += acpi.o +obj-y += legacy.o irq.o + +obj-$(CONFIG_STA2X11) += sta2x11-fixup.o + +obj-$(CONFIG_X86_NUMACHIP) += numachip.o + +obj-$(CONFIG_X86_INTEL_MID) += intel_mid_pci.o + +obj-y += common.o early.o +obj-y += bus_numa.o + +obj-$(CONFIG_AMD_NB) += amd_bus.o +obj-$(CONFIG_PCI_CNB20LE_QUIRK) += broadcom_bus.o + +ifeq ($(CONFIG_PCI_DEBUG),y) +EXTRA_CFLAGS += -DDEBUG +endif diff --git a/kernel/arch/x86/pci/acpi.c b/kernel/arch/x86/pci/acpi.c new file mode 100644 index 000000000..ff9911707 --- /dev/null +++ b/kernel/arch/x86/pci/acpi.c @@ -0,0 +1,536 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +struct pci_root_info { + struct acpi_device *bridge; + char name[16]; + struct pci_sysdata sd; +#ifdef CONFIG_PCI_MMCONFIG + bool mcfg_added; + u16 segment; + u8 start_bus; + u8 end_bus; +#endif +}; + +static bool pci_use_crs = true; +static bool pci_ignore_seg = false; + +static int __init set_use_crs(const struct dmi_system_id *id) +{ + pci_use_crs = true; + return 0; +} + +static int __init set_nouse_crs(const struct dmi_system_id *id) +{ + pci_use_crs = false; + return 0; +} + +static int __init set_ignore_seg(const struct dmi_system_id *id) +{ + printk(KERN_INFO "PCI: %s detected: ignoring ACPI _SEG\n", id->ident); + pci_ignore_seg = true; + return 0; +} + +static const struct dmi_system_id pci_crs_quirks[] __initconst = { + /* http://bugzilla.kernel.org/show_bug.cgi?id=14183 */ + { + .callback = set_use_crs, + .ident = "IBM System x3800", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "x3800"), + }, + }, + /* https://bugzilla.kernel.org/show_bug.cgi?id=16007 */ + /* 2006 AMD HT/VIA system with two host bridges */ + { + .callback = set_use_crs, + .ident = "ASRock ALiveSATA2-GLAN", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "ALiveSATA2-GLAN"), + }, + }, + /* https://bugzilla.kernel.org/show_bug.cgi?id=30552 */ + /* 2006 AMD HT/VIA system with two host bridges */ + { + .callback = set_use_crs, + .ident = "ASUS M2V-MX SE", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "M2V-MX SE"), + DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), + }, + }, + /* https://bugzilla.kernel.org/show_bug.cgi?id=42619 */ + { + .callback = set_use_crs, + .ident = "MSI MS-7253", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"), + DMI_MATCH(DMI_BOARD_NAME, "MS-7253"), + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), + }, + }, + /* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/931368 */ + /* https://bugs.launchpad.net/ubuntu/+source/alsa-driver/+bug/1033299 */ + { + .callback = set_use_crs, + .ident = "Foxconn K8M890-8237A", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Foxconn"), + DMI_MATCH(DMI_BOARD_NAME, "K8M890-8237A"), + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), + }, + }, + + /* Now for the blacklist.. */ + + /* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */ + { + .callback = set_nouse_crs, + .ident = "Dell Studio 1557", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Studio 1557"), + DMI_MATCH(DMI_BIOS_VERSION, "A09"), + }, + }, + /* https://bugzilla.redhat.com/show_bug.cgi?id=769657 */ + { + .callback = set_nouse_crs, + .ident = "Thinkpad SL510", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_BOARD_NAME, "2847DFG"), + DMI_MATCH(DMI_BIOS_VERSION, "6JET85WW (1.43 )"), + }, + }, + + /* https://bugzilla.kernel.org/show_bug.cgi?id=15362 */ + { + .callback = set_ignore_seg, + .ident = "HP xw9300", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP xw9300 Workstation"), + }, + }, + {} +}; + +void __init pci_acpi_crs_quirks(void) +{ + int year; + + if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year < 2008) { + if (iomem_resource.end <= 0xffffffff) + pci_use_crs = false; + } + + dmi_check_system(pci_crs_quirks); + + /* + * If the user specifies "pci=use_crs" or "pci=nocrs" explicitly, that + * takes precedence over anything we figured out above. + */ + if (pci_probe & PCI_ROOT_NO_CRS) + pci_use_crs = false; + else if (pci_probe & PCI_USE__CRS) + pci_use_crs = true; + + printk(KERN_INFO "PCI: %s host bridge windows from ACPI; " + "if necessary, use \"pci=%s\" and report a bug\n", + pci_use_crs ? "Using" : "Ignoring", + pci_use_crs ? "nocrs" : "use_crs"); +} + +#ifdef CONFIG_PCI_MMCONFIG +static int check_segment(u16 seg, struct device *dev, char *estr) +{ + if (seg) { + dev_err(dev, + "%s can't access PCI configuration " + "space under this host bridge.\n", + estr); + return -EIO; + } + + /* + * Failure in adding MMCFG information is not fatal, + * just can't access extended configuration space of + * devices under this host bridge. + */ + dev_warn(dev, + "%s can't access extended PCI configuration " + "space under this bridge.\n", + estr); + + return 0; +} + +static int setup_mcfg_map(struct pci_root_info *info, u16 seg, u8 start, + u8 end, phys_addr_t addr) +{ + int result; + struct device *dev = &info->bridge->dev; + + info->start_bus = start; + info->end_bus = end; + info->mcfg_added = false; + + /* return success if MMCFG is not in use */ + if (raw_pci_ext_ops && raw_pci_ext_ops != &pci_mmcfg) + return 0; + + if (!(pci_probe & PCI_PROBE_MMCONF)) + return check_segment(seg, dev, "MMCONFIG is disabled,"); + + result = pci_mmconfig_insert(dev, seg, start, end, addr); + if (result == 0) { + /* enable MMCFG if it hasn't been enabled yet */ + if (raw_pci_ext_ops == NULL) + raw_pci_ext_ops = &pci_mmcfg; + info->mcfg_added = true; + } else if (result != -EEXIST) + return check_segment(seg, dev, + "fail to add MMCONFIG information,"); + + return 0; +} + +static void teardown_mcfg_map(struct pci_root_info *info) +{ + if (info->mcfg_added) { + pci_mmconfig_delete(info->segment, info->start_bus, + info->end_bus); + info->mcfg_added = false; + } +} +#else +static int setup_mcfg_map(struct pci_root_info *info, + u16 seg, u8 start, u8 end, + phys_addr_t addr) +{ + return 0; +} +static void teardown_mcfg_map(struct pci_root_info *info) +{ +} +#endif + +static void validate_resources(struct device *dev, struct list_head *crs_res, + unsigned long type) +{ + LIST_HEAD(list); + struct resource *res1, *res2, *root = NULL; + struct resource_entry *tmp, *entry, *entry2; + + BUG_ON((type & (IORESOURCE_MEM | IORESOURCE_IO)) == 0); + root = (type & IORESOURCE_MEM) ? &iomem_resource : &ioport_resource; + + list_splice_init(crs_res, &list); + resource_list_for_each_entry_safe(entry, tmp, &list) { + bool free = false; + resource_size_t end; + + res1 = entry->res; + if (!(res1->flags & type)) + goto next; + + /* Exclude non-addressable range or non-addressable portion */ + end = min(res1->end, root->end); + if (end <= res1->start) { + dev_info(dev, "host bridge window %pR (ignored, not CPU addressable)\n", + res1); + free = true; + goto next; + } else if (res1->end != end) { + dev_info(dev, "host bridge window %pR ([%#llx-%#llx] ignored, not CPU addressable)\n", + res1, (unsigned long long)end + 1, + (unsigned long long)res1->end); + res1->end = end; + } + + resource_list_for_each_entry(entry2, crs_res) { + res2 = entry2->res; + if (!(res2->flags & type)) + continue; + + /* + * I don't like throwing away windows because then + * our resources no longer match the ACPI _CRS, but + * the kernel resource tree doesn't allow overlaps. + */ + if (resource_overlaps(res1, res2)) { + res2->start = min(res1->start, res2->start); + res2->end = max(res1->end, res2->end); + dev_info(dev, "host bridge window expanded to %pR; %pR ignored\n", + res2, res1); + free = true; + goto next; + } + } + +next: + resource_list_del(entry); + if (free) + resource_list_free_entry(entry); + else + resource_list_add_tail(entry, crs_res); + } +} + +static void add_resources(struct pci_root_info *info, + struct list_head *resources, + struct list_head *crs_res) +{ + struct resource_entry *entry, *tmp; + struct resource *res, *conflict, *root = NULL; + + validate_resources(&info->bridge->dev, crs_res, IORESOURCE_MEM); + validate_resources(&info->bridge->dev, crs_res, IORESOURCE_IO); + + resource_list_for_each_entry_safe(entry, tmp, crs_res) { + res = entry->res; + if (res->flags & IORESOURCE_MEM) + root = &iomem_resource; + else if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + BUG_ON(res); + + conflict = insert_resource_conflict(root, res); + if (conflict) { + dev_info(&info->bridge->dev, + "ignoring host bridge window %pR (conflicts with %s %pR)\n", + res, conflict->name, conflict); + resource_list_destroy_entry(entry); + } + } + + list_splice_tail(crs_res, resources); +} + +static void release_pci_root_info(struct pci_host_bridge *bridge) +{ + struct resource *res; + struct resource_entry *entry; + struct pci_root_info *info = bridge->release_data; + + resource_list_for_each_entry(entry, &bridge->windows) { + res = entry->res; + if (res->parent && + (res->flags & (IORESOURCE_MEM | IORESOURCE_IO))) + release_resource(res); + } + + teardown_mcfg_map(info); + kfree(info); +} + +/* + * An IO port or MMIO resource assigned to a PCI host bridge may be + * consumed by the host bridge itself or available to its child + * bus/devices. The ACPI specification defines a bit (Producer/Consumer) + * to tell whether the resource is consumed by the host bridge itself, + * but firmware hasn't used that bit consistently, so we can't rely on it. + * + * On x86 and IA64 platforms, all IO port and MMIO resources are assumed + * to be available to child bus/devices except one special case: + * IO port [0xCF8-0xCFF] is consumed by the host bridge itself + * to access PCI configuration space. + * + * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF]. + */ +static bool resource_is_pcicfg_ioport(struct resource *res) +{ + return (res->flags & IORESOURCE_IO) && + res->start == 0xCF8 && res->end == 0xCFF; +} + +static void probe_pci_root_info(struct pci_root_info *info, + struct acpi_device *device, + int busnum, int domain, + struct list_head *list) +{ + int ret; + struct resource_entry *entry, *tmp; + + sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum); + info->bridge = device; + ret = acpi_dev_get_resources(device, list, + acpi_dev_filter_resource_type_cb, + (void *)(IORESOURCE_IO | IORESOURCE_MEM)); + if (ret < 0) + dev_warn(&device->dev, + "failed to parse _CRS method, error code %d\n", ret); + else if (ret == 0) + dev_dbg(&device->dev, + "no IO and memory resources present in _CRS\n"); + else + resource_list_for_each_entry_safe(entry, tmp, list) { + if ((entry->res->flags & IORESOURCE_DISABLED) || + resource_is_pcicfg_ioport(entry->res)) + resource_list_destroy_entry(entry); + else + entry->res->name = info->name; + } +} + +struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) +{ + struct acpi_device *device = root->device; + struct pci_root_info *info; + int domain = root->segment; + int busnum = root->secondary.start; + struct resource_entry *res_entry; + LIST_HEAD(crs_res); + LIST_HEAD(resources); + struct pci_bus *bus; + struct pci_sysdata *sd; + int node; + + if (pci_ignore_seg) + domain = 0; + + if (domain && !pci_domains_supported) { + printk(KERN_WARNING "pci_bus %04x:%02x: " + "ignored (multiple domains not supported)\n", + domain, busnum); + return NULL; + } + + node = acpi_get_node(device->handle); + if (node == NUMA_NO_NODE) { + node = x86_pci_root_bus_node(busnum); + if (node != 0 && node != NUMA_NO_NODE) + dev_info(&device->dev, FW_BUG "no _PXM; falling back to node %d from hardware (may be inconsistent with ACPI node numbers)\n", + node); + } + + if (node != NUMA_NO_NODE && !node_online(node)) + node = NUMA_NO_NODE; + + info = kzalloc_node(sizeof(*info), GFP_KERNEL, node); + if (!info) { + printk(KERN_WARNING "pci_bus %04x:%02x: " + "ignored (out of memory)\n", domain, busnum); + return NULL; + } + + sd = &info->sd; + sd->domain = domain; + sd->node = node; + sd->companion = device; + + bus = pci_find_bus(domain, busnum); + if (bus) { + /* + * If the desired bus has been scanned already, replace + * its bus->sysdata. + */ + memcpy(bus->sysdata, sd, sizeof(*sd)); + kfree(info); + } else { + /* insert busn res at first */ + pci_add_resource(&resources, &root->secondary); + + /* + * _CRS with no apertures is normal, so only fall back to + * defaults or native bridge info if we're ignoring _CRS. + */ + probe_pci_root_info(info, device, busnum, domain, &crs_res); + if (pci_use_crs) { + add_resources(info, &resources, &crs_res); + } else { + resource_list_for_each_entry(res_entry, &crs_res) + dev_printk(KERN_DEBUG, &device->dev, + "host bridge window %pR (ignored)\n", + res_entry->res); + resource_list_free(&crs_res); + x86_pci_root_bus_resources(busnum, &resources); + } + + if (!setup_mcfg_map(info, domain, (u8)root->secondary.start, + (u8)root->secondary.end, root->mcfg_addr)) + bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, + sd, &resources); + + if (bus) { + pci_scan_child_bus(bus); + pci_set_host_bridge_release( + to_pci_host_bridge(bus->bridge), + release_pci_root_info, info); + } else { + resource_list_free(&resources); + teardown_mcfg_map(info); + kfree(info); + } + } + + /* After the PCI-E bus has been walked and all devices discovered, + * configure any settings of the fabric that might be necessary. + */ + if (bus) { + struct pci_bus *child; + list_for_each_entry(child, &bus->children, node) + pcie_bus_configure_settings(child); + } + + if (bus && node != NUMA_NO_NODE) + dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node); + + return bus; +} + +int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) +{ + /* + * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL + * here, pci_create_root_bus() has been called by someone else and + * sysdata is likely to be different from what we expect. Let it go in + * that case. + */ + if (!bridge->dev.parent) { + struct pci_sysdata *sd = bridge->bus->sysdata; + ACPI_COMPANION_SET(&bridge->dev, sd->companion); + } + return 0; +} + +int __init pci_acpi_init(void) +{ + struct pci_dev *dev = NULL; + + if (acpi_noirq) + return -ENODEV; + + printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); + acpi_irq_penalty_init(); + pcibios_enable_irq = acpi_pci_irq_enable; + pcibios_disable_irq = acpi_pci_irq_disable; + x86_init.pci.init_irq = x86_init_noop; + + if (pci_routeirq) { + /* + * PCI IRQ routing is set up by pci_enable_device(), but we + * also do it here in case there are still broken drivers that + * don't use pci_enable_device(). + */ + printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n"); + for_each_pci_dev(dev) + acpi_pci_irq_enable(dev); + } + + return 0; +} diff --git a/kernel/arch/x86/pci/amd_bus.c b/kernel/arch/x86/pci/amd_bus.c new file mode 100644 index 000000000..c20d2cc7e --- /dev/null +++ b/kernel/arch/x86/pci/amd_bus.c @@ -0,0 +1,421 @@ +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include "bus_numa.h" + +#define AMD_NB_F0_NODE_ID 0x60 +#define AMD_NB_F0_UNIT_ID 0x64 +#define AMD_NB_F1_CONFIG_MAP_REG 0xe0 + +#define RANGE_NUM 16 +#define AMD_NB_F1_CONFIG_MAP_RANGES 4 + +struct amd_hostbridge { + u32 bus; + u32 slot; + u32 device; +}; + +/* + * IMPORTANT NOTE: + * hb_probes[] and early_root_info_init() is in maintenance mode. + * It only supports K8, Fam10h, Fam11h, and Fam15h_00h-0fh . + * Future processor will rely on information in ACPI. + */ +static struct amd_hostbridge hb_probes[] __initdata = { + { 0, 0x18, 0x1100 }, /* K8 */ + { 0, 0x18, 0x1200 }, /* Family10h */ + { 0xff, 0, 0x1200 }, /* Family10h */ + { 0, 0x18, 0x1300 }, /* Family11h */ + { 0, 0x18, 0x1600 }, /* Family15h */ +}; + +static struct pci_root_info __init *find_pci_root_info(int node, int link) +{ + struct pci_root_info *info; + + /* find the position */ + list_for_each_entry(info, &pci_root_infos, list) + if (info->node == node && info->link == link) + return info; + + return NULL; +} + +/** + * early_root_info_init() + * called before pcibios_scan_root and pci_scan_bus + * fills the mp_bus_to_cpumask array based according + * to the LDT Bus Number Registers found in the northbridge. + */ +static int __init early_root_info_init(void) +{ + int i; + unsigned bus; + unsigned slot; + int node; + int link; + int def_node; + int def_link; + struct pci_root_info *info; + u32 reg; + u64 start; + u64 end; + struct range range[RANGE_NUM]; + u64 val; + u32 address; + bool found; + struct resource fam10h_mmconf_res, *fam10h_mmconf; + u64 fam10h_mmconf_start; + u64 fam10h_mmconf_end; + + if (!early_pci_allowed()) + return -1; + + found = false; + for (i = 0; i < ARRAY_SIZE(hb_probes); i++) { + u32 id; + u16 device; + u16 vendor; + + bus = hb_probes[i].bus; + slot = hb_probes[i].slot; + id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID); + vendor = id & 0xffff; + device = (id>>16) & 0xffff; + + if (vendor != PCI_VENDOR_ID_AMD) + continue; + + if (hb_probes[i].device == device) { + found = true; + break; + } + } + + if (!found) + return 0; + + /* + * We should learn topology and routing information from _PXM and + * _CRS methods in the ACPI namespace. We extract node numbers + * here to work around BIOSes that don't supply _PXM. + */ + for (i = 0; i < AMD_NB_F1_CONFIG_MAP_RANGES; i++) { + int min_bus; + int max_bus; + reg = read_pci_config(bus, slot, 1, + AMD_NB_F1_CONFIG_MAP_REG + (i << 2)); + + /* Check if that register is enabled for bus range */ + if ((reg & 7) != 3) + continue; + + min_bus = (reg >> 16) & 0xff; + max_bus = (reg >> 24) & 0xff; + node = (reg >> 4) & 0x07; + link = (reg >> 8) & 0x03; + + info = alloc_pci_root_info(min_bus, max_bus, node, link); + } + + /* + * The following code extracts routing information for use on old + * systems where Linux doesn't automatically use host bridge _CRS + * methods (or when the user specifies "pci=nocrs"). + * + * We only do this through Fam11h, because _CRS should be enough on + * newer systems. + */ + if (boot_cpu_data.x86 > 0x11) + return 0; + + /* get the default node and link for left over res */ + reg = read_pci_config(bus, slot, 0, AMD_NB_F0_NODE_ID); + def_node = (reg >> 8) & 0x07; + reg = read_pci_config(bus, slot, 0, AMD_NB_F0_UNIT_ID); + def_link = (reg >> 8) & 0x03; + + memset(range, 0, sizeof(range)); + add_range(range, RANGE_NUM, 0, 0, 0xffff + 1); + /* io port resource */ + for (i = 0; i < 4; i++) { + reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3)); + if (!(reg & 3)) + continue; + + start = reg & 0xfff000; + reg = read_pci_config(bus, slot, 1, 0xc4 + (i << 3)); + node = reg & 0x07; + link = (reg >> 4) & 0x03; + end = (reg & 0xfff000) | 0xfff; + + info = find_pci_root_info(node, link); + if (!info) + continue; /* not found */ + + printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", + node, link, start, end); + + /* kernel only handle 16 bit only */ + if (end > 0xffff) + end = 0xffff; + update_res(info, start, end, IORESOURCE_IO, 1); + subtract_range(range, RANGE_NUM, start, end + 1); + } + /* add left over io port range to def node/link, [0, 0xffff] */ + /* find the position */ + info = find_pci_root_info(def_node, def_link); + if (info) { + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + + update_res(info, range[i].start, range[i].end - 1, + IORESOURCE_IO, 1); + } + } + + memset(range, 0, sizeof(range)); + /* 0xfd00000000-0xffffffffff for HT */ + end = cap_resource((0xfdULL<<32) - 1); + end++; + add_range(range, RANGE_NUM, 0, 0, end); + + /* need to take out [0, TOM) for RAM*/ + address = MSR_K8_TOP_MEM1; + rdmsrl(address, val); + end = (val & 0xffffff800000ULL); + printk(KERN_INFO "TOM: %016llx aka %lldM\n", end, end>>20); + if (end < (1ULL<<32)) + subtract_range(range, RANGE_NUM, 0, end); + + /* get mmconfig */ + fam10h_mmconf = amd_get_mmconfig_range(&fam10h_mmconf_res); + /* need to take out mmconf range */ + if (fam10h_mmconf) { + printk(KERN_DEBUG "Fam 10h mmconf %pR\n", fam10h_mmconf); + fam10h_mmconf_start = fam10h_mmconf->start; + fam10h_mmconf_end = fam10h_mmconf->end; + subtract_range(range, RANGE_NUM, fam10h_mmconf_start, + fam10h_mmconf_end + 1); + } else { + fam10h_mmconf_start = 0; + fam10h_mmconf_end = 0; + } + + /* mmio resource */ + for (i = 0; i < 8; i++) { + reg = read_pci_config(bus, slot, 1, 0x80 + (i << 3)); + if (!(reg & 3)) + continue; + + start = reg & 0xffffff00; /* 39:16 on 31:8*/ + start <<= 8; + reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); + node = reg & 0x07; + link = (reg >> 4) & 0x03; + end = (reg & 0xffffff00); + end <<= 8; + end |= 0xffff; + + info = find_pci_root_info(node, link); + + if (!info) + continue; + + printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]", + node, link, start, end); + /* + * some sick allocation would have range overlap with fam10h + * mmconf range, so need to update start and end. + */ + if (fam10h_mmconf_end) { + int changed = 0; + u64 endx = 0; + if (start >= fam10h_mmconf_start && + start <= fam10h_mmconf_end) { + start = fam10h_mmconf_end + 1; + changed = 1; + } + + if (end >= fam10h_mmconf_start && + end <= fam10h_mmconf_end) { + end = fam10h_mmconf_start - 1; + changed = 1; + } + + if (start < fam10h_mmconf_start && + end > fam10h_mmconf_end) { + /* we got a hole */ + endx = fam10h_mmconf_start - 1; + update_res(info, start, endx, IORESOURCE_MEM, 0); + subtract_range(range, RANGE_NUM, start, + endx + 1); + printk(KERN_CONT " ==> [%llx, %llx]", start, endx); + start = fam10h_mmconf_end + 1; + changed = 1; + } + if (changed) { + if (start <= end) { + printk(KERN_CONT " %s [%llx, %llx]", endx ? "and" : "==>", start, end); + } else { + printk(KERN_CONT "%s\n", endx?"":" ==> none"); + continue; + } + } + } + + update_res(info, cap_resource(start), cap_resource(end), + IORESOURCE_MEM, 1); + subtract_range(range, RANGE_NUM, start, end + 1); + printk(KERN_CONT "\n"); + } + + /* need to take out [4G, TOM2) for RAM*/ + /* SYS_CFG */ + address = MSR_K8_SYSCFG; + rdmsrl(address, val); + /* TOP_MEM2 is enabled? */ + if (val & (1<<21)) { + /* TOP_MEM2 */ + address = MSR_K8_TOP_MEM2; + rdmsrl(address, val); + end = (val & 0xffffff800000ULL); + printk(KERN_INFO "TOM2: %016llx aka %lldM\n", end, end>>20); + subtract_range(range, RANGE_NUM, 1ULL<<32, end); + } + + /* + * add left over mmio range to def node/link ? + * that is tricky, just record range in from start_min to 4G + */ + info = find_pci_root_info(def_node, def_link); + if (info) { + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + + update_res(info, cap_resource(range[i].start), + cap_resource(range[i].end - 1), + IORESOURCE_MEM, 1); + } + } + + list_for_each_entry(info, &pci_root_infos, list) { + int busnum; + struct pci_root_res *root_res; + + busnum = info->busn.start; + printk(KERN_DEBUG "bus: %pR on node %x link %x\n", + &info->busn, info->node, info->link); + list_for_each_entry(root_res, &info->resources, list) + printk(KERN_DEBUG "bus: %02x %pR\n", + busnum, &root_res->res); + } + + return 0; +} + +#define ENABLE_CF8_EXT_CFG (1ULL << 46) + +static void enable_pci_io_ecs(void *unused) +{ + u64 reg; + rdmsrl(MSR_AMD64_NB_CFG, reg); + if (!(reg & ENABLE_CF8_EXT_CFG)) { + reg |= ENABLE_CF8_EXT_CFG; + wrmsrl(MSR_AMD64_NB_CFG, reg); + } +} + +static int amd_cpu_notify(struct notifier_block *self, unsigned long action, + void *hcpu) +{ + int cpu = (long)hcpu; + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + smp_call_function_single(cpu, enable_pci_io_ecs, NULL, 0); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block amd_cpu_notifier = { + .notifier_call = amd_cpu_notify, +}; + +static void __init pci_enable_pci_io_ecs(void) +{ +#ifdef CONFIG_AMD_NB + unsigned int i, n; + + for (n = i = 0; !n && amd_nb_bus_dev_ranges[i].dev_limit; ++i) { + u8 bus = amd_nb_bus_dev_ranges[i].bus; + u8 slot = amd_nb_bus_dev_ranges[i].dev_base; + u8 limit = amd_nb_bus_dev_ranges[i].dev_limit; + + for (; slot < limit; ++slot) { + u32 val = read_pci_config(bus, slot, 3, 0); + + if (!early_is_amd_nb(val)) + continue; + + val = read_pci_config(bus, slot, 3, 0x8c); + if (!(val & (ENABLE_CF8_EXT_CFG >> 32))) { + val |= ENABLE_CF8_EXT_CFG >> 32; + write_pci_config(bus, slot, 3, 0x8c, val); + } + ++n; + } + } +#endif +} + +static int __init pci_io_ecs_init(void) +{ + int cpu; + + /* assume all cpus from fam10h have IO ECS */ + if (boot_cpu_data.x86 < 0x10) + return 0; + + /* Try the PCI method first. */ + if (early_pci_allowed()) + pci_enable_pci_io_ecs(); + + cpu_notifier_register_begin(); + for_each_online_cpu(cpu) + amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE, + (void *)(long)cpu); + __register_cpu_notifier(&amd_cpu_notifier); + cpu_notifier_register_done(); + + pci_probe |= PCI_HAS_IO_ECS; + + return 0; +} + +static int __init amd_postcore_init(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return 0; + + early_root_info_init(); + pci_io_ecs_init(); + + return 0; +} + +postcore_initcall(amd_postcore_init); diff --git a/kernel/arch/x86/pci/broadcom_bus.c b/kernel/arch/x86/pci/broadcom_bus.c new file mode 100644 index 000000000..bb461cfd0 --- /dev/null +++ b/kernel/arch/x86/pci/broadcom_bus.c @@ -0,0 +1,116 @@ +/* + * Read address ranges from a Broadcom CNB20LE Host Bridge + * + * Copyright (c) 2010 Ira W. Snyder + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "bus_numa.h" + +static void __init cnb20le_res(u8 bus, u8 slot, u8 func) +{ + struct pci_root_info *info; + struct pci_root_res *root_res; + struct resource res; + u16 word1, word2; + u8 fbus, lbus; + + /* read the PCI bus numbers */ + fbus = read_pci_config_byte(bus, slot, func, 0x44); + lbus = read_pci_config_byte(bus, slot, func, 0x45); + info = alloc_pci_root_info(fbus, lbus, 0, 0); + + /* + * Add the legacy IDE ports on bus 0 + * + * These do not exist anywhere in the bridge registers, AFAICT. I do + * not have the datasheet, so this is the best I can do. + */ + if (fbus == 0) { + update_res(info, 0x01f0, 0x01f7, IORESOURCE_IO, 0); + update_res(info, 0x03f6, 0x03f6, IORESOURCE_IO, 0); + update_res(info, 0x0170, 0x0177, IORESOURCE_IO, 0); + update_res(info, 0x0376, 0x0376, IORESOURCE_IO, 0); + update_res(info, 0xffa0, 0xffaf, IORESOURCE_IO, 0); + } + + /* read the non-prefetchable memory window */ + word1 = read_pci_config_16(bus, slot, func, 0xc0); + word2 = read_pci_config_16(bus, slot, func, 0xc2); + if (word1 != word2) { + res.start = (word1 << 16) | 0x0000; + res.end = (word2 << 16) | 0xffff; + res.flags = IORESOURCE_MEM; + update_res(info, res.start, res.end, res.flags, 0); + } + + /* read the prefetchable memory window */ + word1 = read_pci_config_16(bus, slot, func, 0xc4); + word2 = read_pci_config_16(bus, slot, func, 0xc6); + if (word1 != word2) { + res.start = ((resource_size_t) word1 << 16) | 0x0000; + res.end = ((resource_size_t) word2 << 16) | 0xffff; + res.flags = IORESOURCE_MEM | IORESOURCE_PREFETCH; + update_res(info, res.start, res.end, res.flags, 0); + } + + /* read the IO port window */ + word1 = read_pci_config_16(bus, slot, func, 0xd0); + word2 = read_pci_config_16(bus, slot, func, 0xd2); + if (word1 != word2) { + res.start = word1; + res.end = word2; + res.flags = IORESOURCE_IO; + update_res(info, res.start, res.end, res.flags, 0); + } + + /* print information about this host bridge */ + res.start = fbus; + res.end = lbus; + res.flags = IORESOURCE_BUS; + printk(KERN_INFO "CNB20LE PCI Host Bridge (domain 0000 %pR)\n", &res); + + list_for_each_entry(root_res, &info->resources, list) + printk(KERN_INFO "host bridge window %pR\n", &root_res->res); +} + +static int __init broadcom_postcore_init(void) +{ + u8 bus = 0, slot = 0; + u32 id; + u16 vendor, device; + +#ifdef CONFIG_ACPI + /* + * We should get host bridge information from ACPI unless the BIOS + * doesn't support it. + */ + if (acpi_os_get_root_pointer()) + return 0; +#endif + + id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID); + vendor = id & 0xffff; + device = (id >> 16) & 0xffff; + + if (vendor == PCI_VENDOR_ID_SERVERWORKS && + device == PCI_DEVICE_ID_SERVERWORKS_LE) { + cnb20le_res(bus, slot, 0); + cnb20le_res(bus, slot, 1); + } + return 0; +} + +postcore_initcall(broadcom_postcore_init); diff --git a/kernel/arch/x86/pci/bus_numa.c b/kernel/arch/x86/pci/bus_numa.c new file mode 100644 index 000000000..7bcf06a7c --- /dev/null +++ b/kernel/arch/x86/pci/bus_numa.c @@ -0,0 +1,154 @@ +#include +#include +#include + +#include "bus_numa.h" + +LIST_HEAD(pci_root_infos); + +static struct pci_root_info *x86_find_pci_root_info(int bus) +{ + struct pci_root_info *info; + + list_for_each_entry(info, &pci_root_infos, list) + if (info->busn.start == bus) + return info; + + return NULL; +} + +int x86_pci_root_bus_node(int bus) +{ + struct pci_root_info *info = x86_find_pci_root_info(bus); + + if (!info) + return NUMA_NO_NODE; + + return info->node; +} + +void x86_pci_root_bus_resources(int bus, struct list_head *resources) +{ + struct pci_root_info *info = x86_find_pci_root_info(bus); + struct pci_root_res *root_res; + struct resource_entry *window; + bool found = false; + + if (!info) + goto default_resources; + + printk(KERN_DEBUG "PCI: root bus %02x: hardware-probed resources\n", + bus); + + /* already added by acpi ? */ + resource_list_for_each_entry(window, resources) + if (window->res->flags & IORESOURCE_BUS) { + found = true; + break; + } + + if (!found) + pci_add_resource(resources, &info->busn); + + list_for_each_entry(root_res, &info->resources, list) { + struct resource *res; + struct resource *root; + + res = &root_res->res; + pci_add_resource(resources, res); + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + root = &iomem_resource; + insert_resource(root, res); + } + return; + +default_resources: + /* + * We don't have any host bridge aperture information from the + * "native host bridge drivers," e.g., amd_bus or broadcom_bus, + * so fall back to the defaults historically used by pci_create_bus(). + */ + printk(KERN_DEBUG "PCI: root bus %02x: using default resources\n", bus); + pci_add_resource(resources, &ioport_resource); + pci_add_resource(resources, &iomem_resource); +} + +struct pci_root_info __init *alloc_pci_root_info(int bus_min, int bus_max, + int node, int link) +{ + struct pci_root_info *info; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + + if (!info) + return info; + + sprintf(info->name, "PCI Bus #%02x", bus_min); + + INIT_LIST_HEAD(&info->resources); + info->busn.name = info->name; + info->busn.start = bus_min; + info->busn.end = bus_max; + info->busn.flags = IORESOURCE_BUS; + info->node = node; + info->link = link; + + list_add_tail(&info->list, &pci_root_infos); + + return info; +} + +void update_res(struct pci_root_info *info, resource_size_t start, + resource_size_t end, unsigned long flags, int merge) +{ + struct resource *res; + struct pci_root_res *root_res; + + if (start > end) + return; + + if (start == MAX_RESOURCE) + return; + + if (!merge) + goto addit; + + /* try to merge it with old one */ + list_for_each_entry(root_res, &info->resources, list) { + resource_size_t final_start, final_end; + resource_size_t common_start, common_end; + + res = &root_res->res; + if (res->flags != flags) + continue; + + common_start = max(res->start, start); + common_end = min(res->end, end); + if (common_start > common_end + 1) + continue; + + final_start = min(res->start, start); + final_end = max(res->end, end); + + res->start = final_start; + res->end = final_end; + return; + } + +addit: + + /* need to add that */ + root_res = kzalloc(sizeof(*root_res), GFP_KERNEL); + if (!root_res) + return; + + res = &root_res->res; + res->name = info->name; + res->flags = flags; + res->start = start; + res->end = end; + + list_add_tail(&root_res->list, &info->resources); +} diff --git a/kernel/arch/x86/pci/bus_numa.h b/kernel/arch/x86/pci/bus_numa.h new file mode 100644 index 000000000..ff8f65b04 --- /dev/null +++ b/kernel/arch/x86/pci/bus_numa.h @@ -0,0 +1,26 @@ +#ifndef __BUS_NUMA_H +#define __BUS_NUMA_H +/* + * sub bus (transparent) will use entres from 3 to store extra from + * root, so need to make sure we have enough slot there. + */ +struct pci_root_res { + struct list_head list; + struct resource res; +}; + +struct pci_root_info { + struct list_head list; + char name[12]; + struct list_head resources; + struct resource busn; + int node; + int link; +}; + +extern struct list_head pci_root_infos; +struct pci_root_info *alloc_pci_root_info(int bus_min, int bus_max, + int node, int link); +extern void update_res(struct pci_root_info *info, resource_size_t start, + resource_size_t end, unsigned long flags, int merge); +#endif diff --git a/kernel/arch/x86/pci/ce4100.c b/kernel/arch/x86/pci/ce4100.c new file mode 100644 index 000000000..b914e20b5 --- /dev/null +++ b/kernel/arch/x86/pci/ce4100.c @@ -0,0 +1,331 @@ +/* + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * 2200 Mission College Blvd. + * Santa Clara, CA 97052 + * + * This provides access methods for PCI registers that mis-behave on + * the CE4100. Each register can be assigned a private init, read and + * write routine. The exception to this is the bridge device. The + * bridge device is the only device on bus zero (0) that requires any + * fixup so it is a special case ATM + */ + +#include +#include +#include + +#include +#include + +struct sim_reg { + u32 value; + u32 mask; +}; + +struct sim_dev_reg { + int dev_func; + int reg; + void (*init)(struct sim_dev_reg *reg); + void (*read)(struct sim_dev_reg *reg, u32 *value); + void (*write)(struct sim_dev_reg *reg, u32 value); + struct sim_reg sim_reg; +}; + +struct sim_reg_op { + void (*init)(struct sim_dev_reg *reg); + void (*read)(struct sim_dev_reg *reg, u32 value); + void (*write)(struct sim_dev_reg *reg, u32 value); +}; + +#define MB (1024 * 1024) +#define KB (1024) +#define SIZE_TO_MASK(size) (~(size - 1)) + +#define DEFINE_REG(device, func, offset, size, init_op, read_op, write_op)\ +{ PCI_DEVFN(device, func), offset, init_op, read_op, write_op,\ + {0, SIZE_TO_MASK(size)} }, + +static void reg_init(struct sim_dev_reg *reg) +{ + pci_direct_conf1.read(0, 1, reg->dev_func, reg->reg, 4, + ®->sim_reg.value); +} + +static void reg_read(struct sim_dev_reg *reg, u32 *value) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&pci_config_lock, flags); + *value = reg->sim_reg.value; + raw_spin_unlock_irqrestore(&pci_config_lock, flags); +} + +static void reg_write(struct sim_dev_reg *reg, u32 value) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&pci_config_lock, flags); + reg->sim_reg.value = (value & reg->sim_reg.mask) | + (reg->sim_reg.value & ~reg->sim_reg.mask); + raw_spin_unlock_irqrestore(&pci_config_lock, flags); +} + +static void sata_reg_init(struct sim_dev_reg *reg) +{ + pci_direct_conf1.read(0, 1, PCI_DEVFN(14, 0), 0x10, 4, + ®->sim_reg.value); + reg->sim_reg.value += 0x400; +} + +static void ehci_reg_read(struct sim_dev_reg *reg, u32 *value) +{ + reg_read(reg, value); + if (*value != reg->sim_reg.mask) + *value |= 0x100; +} + +void sata_revid_init(struct sim_dev_reg *reg) +{ + reg->sim_reg.value = 0x01060100; + reg->sim_reg.mask = 0; +} + +static void sata_revid_read(struct sim_dev_reg *reg, u32 *value) +{ + reg_read(reg, value); +} + +static void reg_noirq_read(struct sim_dev_reg *reg, u32 *value) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&pci_config_lock, flags); + /* force interrupt pin value to 0 */ + *value = reg->sim_reg.value & 0xfff00ff; + raw_spin_unlock_irqrestore(&pci_config_lock, flags); +} + +static struct sim_dev_reg bus1_fixups[] = { + DEFINE_REG(2, 0, 0x10, (16*MB), reg_init, reg_read, reg_write) + DEFINE_REG(2, 0, 0x14, (256), reg_init, reg_read, reg_write) + DEFINE_REG(2, 1, 0x10, (64*KB), reg_init, reg_read, reg_write) + DEFINE_REG(3, 0, 0x10, (64*KB), reg_init, reg_read, reg_write) + DEFINE_REG(4, 0, 0x10, (128*KB), reg_init, reg_read, reg_write) + DEFINE_REG(4, 1, 0x10, (128*KB), reg_init, reg_read, reg_write) + DEFINE_REG(6, 0, 0x10, (512*KB), reg_init, reg_read, reg_write) + DEFINE_REG(6, 1, 0x10, (512*KB), reg_init, reg_read, reg_write) + DEFINE_REG(6, 2, 0x10, (64*KB), reg_init, reg_read, reg_write) + DEFINE_REG(8, 0, 0x10, (1*MB), reg_init, reg_read, reg_write) + DEFINE_REG(8, 1, 0x10, (64*KB), reg_init, reg_read, reg_write) + DEFINE_REG(8, 2, 0x10, (64*KB), reg_init, reg_read, reg_write) + DEFINE_REG(9, 0, 0x10 , (1*MB), reg_init, reg_read, reg_write) + DEFINE_REG(9, 0, 0x14, (64*KB), reg_init, reg_read, reg_write) + DEFINE_REG(10, 0, 0x10, (256), reg_init, reg_read, reg_write) + DEFINE_REG(10, 0, 0x14, (256*MB), reg_init, reg_read, reg_write) + DEFINE_REG(11, 0, 0x10, (256), reg_init, reg_read, reg_write) + DEFINE_REG(11, 0, 0x14, (256), reg_init, reg_read, reg_write) + DEFINE_REG(11, 1, 0x10, (256), reg_init, reg_read, reg_write) + DEFINE_REG(11, 2, 0x10, (256), reg_init, reg_read, reg_write) + DEFINE_REG(11, 2, 0x14, (256), reg_init, reg_read, reg_write) + DEFINE_REG(11, 2, 0x18, (256), reg_init, reg_read, reg_write) + DEFINE_REG(11, 3, 0x10, (256), reg_init, reg_read, reg_write) + DEFINE_REG(11, 3, 0x14, (256), reg_init, reg_read, reg_write) + DEFINE_REG(11, 4, 0x10, (256), reg_init, reg_read, reg_write) + DEFINE_REG(11, 5, 0x10, (64*KB), reg_init, reg_read, reg_write) + DEFINE_REG(11, 6, 0x10, (256), reg_init, reg_read, reg_write) + DEFINE_REG(11, 7, 0x10, (64*KB), reg_init, reg_read, reg_write) + DEFINE_REG(11, 7, 0x3c, 256, reg_init, reg_noirq_read, reg_write) + DEFINE_REG(12, 0, 0x10, (128*KB), reg_init, reg_read, reg_write) + DEFINE_REG(12, 0, 0x14, (256), reg_init, reg_read, reg_write) + DEFINE_REG(12, 1, 0x10, (1024), reg_init, reg_read, reg_write) + DEFINE_REG(13, 0, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write) + DEFINE_REG(13, 1, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write) + DEFINE_REG(14, 0, 0x8, 0, sata_revid_init, sata_revid_read, 0) + DEFINE_REG(14, 0, 0x10, 0, reg_init, reg_read, reg_write) + DEFINE_REG(14, 0, 0x14, 0, reg_init, reg_read, reg_write) + DEFINE_REG(14, 0, 0x18, 0, reg_init, reg_read, reg_write) + DEFINE_REG(14, 0, 0x1C, 0, reg_init, reg_read, reg_write) + DEFINE_REG(14, 0, 0x20, 0, reg_init, reg_read, reg_write) + DEFINE_REG(14, 0, 0x24, (0x200), sata_reg_init, reg_read, reg_write) + DEFINE_REG(15, 0, 0x10, (64*KB), reg_init, reg_read, reg_write) + DEFINE_REG(15, 0, 0x14, (64*KB), reg_init, reg_read, reg_write) + DEFINE_REG(16, 0, 0x10, (64*KB), reg_init, reg_read, reg_write) + DEFINE_REG(16, 0, 0x14, (64*MB), reg_init, reg_read, reg_write) + DEFINE_REG(16, 0, 0x18, (64*MB), reg_init, reg_read, reg_write) + DEFINE_REG(16, 0, 0x3c, 256, reg_init, reg_noirq_read, reg_write) + DEFINE_REG(17, 0, 0x10, (128*KB), reg_init, reg_read, reg_write) + DEFINE_REG(18, 0, 0x10, (1*KB), reg_init, reg_read, reg_write) + DEFINE_REG(18, 0, 0x3c, 256, reg_init, reg_noirq_read, reg_write) +}; + +static void __init init_sim_regs(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { + if (bus1_fixups[i].init) + bus1_fixups[i].init(&bus1_fixups[i]); + } +} + +static inline void extract_bytes(u32 *value, int reg, int len) +{ + uint32_t mask; + + *value >>= ((reg & 3) * 8); + mask = 0xFFFFFFFF >> ((4 - len) * 8); + *value &= mask; +} + +int bridge_read(unsigned int devfn, int reg, int len, u32 *value) +{ + u32 av_bridge_base, av_bridge_limit; + int retval = 0; + + switch (reg) { + /* Make BARs appear to not request any memory. */ + case PCI_BASE_ADDRESS_0: + case PCI_BASE_ADDRESS_0 + 1: + case PCI_BASE_ADDRESS_0 + 2: + case PCI_BASE_ADDRESS_0 + 3: + *value = 0; + break; + + /* Since subordinate bus number register is hardwired + * to zero and read only, so do the simulation. + */ + case PCI_PRIMARY_BUS: + if (len == 4) + *value = 0x00010100; + break; + + case PCI_SUBORDINATE_BUS: + *value = 1; + break; + + case PCI_MEMORY_BASE: + case PCI_MEMORY_LIMIT: + /* Get the A/V bridge base address. */ + pci_direct_conf1.read(0, 0, devfn, + PCI_BASE_ADDRESS_0, 4, &av_bridge_base); + + av_bridge_limit = av_bridge_base + (512*MB - 1); + av_bridge_limit >>= 16; + av_bridge_limit &= 0xFFF0; + + av_bridge_base >>= 16; + av_bridge_base &= 0xFFF0; + + if (reg == PCI_MEMORY_LIMIT) + *value = av_bridge_limit; + else if (len == 2) + *value = av_bridge_base; + else + *value = (av_bridge_limit << 16) | av_bridge_base; + break; + /* Make prefetchable memory limit smaller than prefetchable + * memory base, so not claim prefetchable memory space. + */ + case PCI_PREF_MEMORY_BASE: + *value = 0xFFF0; + break; + case PCI_PREF_MEMORY_LIMIT: + *value = 0x0; + break; + /* Make IO limit smaller than IO base, so not claim IO space. */ + case PCI_IO_BASE: + *value = 0xF0; + break; + case PCI_IO_LIMIT: + *value = 0; + break; + default: + retval = 1; + } + return retval; +} + +static int ce4100_conf_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value) +{ + int i; + + WARN_ON(seg); + if (bus == 1) { + for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { + if (bus1_fixups[i].dev_func == devfn && + bus1_fixups[i].reg == (reg & ~3) && + bus1_fixups[i].read) { + bus1_fixups[i].read(&(bus1_fixups[i]), + value); + extract_bytes(value, reg, len); + return 0; + } + } + } + + if (bus == 0 && (PCI_DEVFN(1, 0) == devfn) && + !bridge_read(devfn, reg, len, value)) + return 0; + + return pci_direct_conf1.read(seg, bus, devfn, reg, len, value); +} + +static int ce4100_conf_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value) +{ + int i; + + WARN_ON(seg); + if (bus == 1) { + for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { + if (bus1_fixups[i].dev_func == devfn && + bus1_fixups[i].reg == (reg & ~3) && + bus1_fixups[i].write) { + bus1_fixups[i].write(&(bus1_fixups[i]), + value); + return 0; + } + } + } + + /* Discard writes to A/V bridge BAR. */ + if (bus == 0 && PCI_DEVFN(1, 0) == devfn && + ((reg & ~3) == PCI_BASE_ADDRESS_0)) + return 0; + + return pci_direct_conf1.write(seg, bus, devfn, reg, len, value); +} + +static const struct pci_raw_ops ce4100_pci_conf = { + .read = ce4100_conf_read, + .write = ce4100_conf_write, +}; + +int __init ce4100_pci_init(void) +{ + init_sim_regs(); + raw_pci_ops = &ce4100_pci_conf; + /* Indicate caller that it should invoke pci_legacy_init() */ + return 1; +} diff --git a/kernel/arch/x86/pci/common.c b/kernel/arch/x86/pci/common.c new file mode 100644 index 000000000..8fd6f44ae --- /dev/null +++ b/kernel/arch/x86/pci/common.c @@ -0,0 +1,700 @@ +/* + * Low-Level PCI Support for PC + * + * (c) 1999--2000 Martin Mares + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | + PCI_PROBE_MMCONF; + +unsigned int pci_early_dump_regs; +static int pci_bf_sort; +static int smbios_type_b1_flag; +int pci_routeirq; +int noioapicquirk; +#ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS +int noioapicreroute = 0; +#else +int noioapicreroute = 1; +#endif +int pcibios_last_bus = -1; +unsigned long pirq_table_addr; +const struct pci_raw_ops *__read_mostly raw_pci_ops; +const struct pci_raw_ops *__read_mostly raw_pci_ext_ops; + +int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, + int reg, int len, u32 *val) +{ + if (domain == 0 && reg < 256 && raw_pci_ops) + return raw_pci_ops->read(domain, bus, devfn, reg, len, val); + if (raw_pci_ext_ops) + return raw_pci_ext_ops->read(domain, bus, devfn, reg, len, val); + return -EINVAL; +} + +int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, + int reg, int len, u32 val) +{ + if (domain == 0 && reg < 256 && raw_pci_ops) + return raw_pci_ops->write(domain, bus, devfn, reg, len, val); + if (raw_pci_ext_ops) + return raw_pci_ext_ops->write(domain, bus, devfn, reg, len, val); + return -EINVAL; +} + +static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) +{ + return raw_pci_read(pci_domain_nr(bus), bus->number, + devfn, where, size, value); +} + +static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value) +{ + return raw_pci_write(pci_domain_nr(bus), bus->number, + devfn, where, size, value); +} + +struct pci_ops pci_root_ops = { + .read = pci_read, + .write = pci_write, +}; + +/* + * This interrupt-safe spinlock protects all accesses to PCI + * configuration space. + */ +DEFINE_RAW_SPINLOCK(pci_config_lock); + +static int __init can_skip_ioresource_align(const struct dmi_system_id *d) +{ + pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; + printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident); + return 0; +} + +static const struct dmi_system_id can_skip_pciprobe_dmi_table[] __initconst = { +/* + * Systems where PCI IO resource ISA alignment can be skipped + * when the ISA enable bit in the bridge control is not set + */ + { + .callback = can_skip_ioresource_align, + .ident = "IBM System x3800", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "x3800"), + }, + }, + { + .callback = can_skip_ioresource_align, + .ident = "IBM System x3850", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "x3850"), + }, + }, + { + .callback = can_skip_ioresource_align, + .ident = "IBM System x3950", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "x3950"), + }, + }, + {} +}; + +void __init dmi_check_skip_isa_align(void) +{ + dmi_check_system(can_skip_pciprobe_dmi_table); +} + +static void pcibios_fixup_device_resources(struct pci_dev *dev) +{ + struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE]; + struct resource *bar_r; + int bar; + + if (pci_probe & PCI_NOASSIGN_BARS) { + /* + * If the BIOS did not assign the BAR, zero out the + * resource so the kernel doesn't attmept to assign + * it later on in pci_assign_unassigned_resources + */ + for (bar = 0; bar <= PCI_STD_RESOURCE_END; bar++) { + bar_r = &dev->resource[bar]; + if (bar_r->start == 0 && bar_r->end != 0) { + bar_r->flags = 0; + bar_r->end = 0; + } + } + } + + if (pci_probe & PCI_NOASSIGN_ROMS) { + if (rom_r->parent) + return; + if (rom_r->start) { + /* we deal with BIOS assigned ROM later */ + return; + } + rom_r->start = rom_r->end = rom_r->flags = 0; + } +} + +/* + * Called after each bus is probed, but before its children + * are examined. + */ + +void pcibios_fixup_bus(struct pci_bus *b) +{ + struct pci_dev *dev; + + pci_read_bridge_bases(b); + list_for_each_entry(dev, &b->devices, bus_list) + pcibios_fixup_device_resources(dev); +} + +void pcibios_add_bus(struct pci_bus *bus) +{ + acpi_pci_add_bus(bus); +} + +void pcibios_remove_bus(struct pci_bus *bus) +{ + acpi_pci_remove_bus(bus); +} + +/* + * Only use DMI information to set this if nothing was passed + * on the kernel command line (which was parsed earlier). + */ + +static int __init set_bf_sort(const struct dmi_system_id *d) +{ + if (pci_bf_sort == pci_bf_sort_default) { + pci_bf_sort = pci_dmi_bf; + printk(KERN_INFO "PCI: %s detected, enabling pci=bfsort.\n", d->ident); + } + return 0; +} + +static void __init read_dmi_type_b1(const struct dmi_header *dm, + void *private_data) +{ + u8 *d = (u8 *)dm + 4; + + if (dm->type != 0xB1) + return; + switch (((*(u32 *)d) >> 9) & 0x03) { + case 0x00: + printk(KERN_INFO "dmi type 0xB1 record - unknown flag\n"); + break; + case 0x01: /* set pci=bfsort */ + smbios_type_b1_flag = 1; + break; + case 0x02: /* do not set pci=bfsort */ + smbios_type_b1_flag = 2; + break; + default: + break; + } +} + +static int __init find_sort_method(const struct dmi_system_id *d) +{ + dmi_walk(read_dmi_type_b1, NULL); + + if (smbios_type_b1_flag == 1) { + set_bf_sort(d); + return 0; + } + return -1; +} + +/* + * Enable renumbering of PCI bus# ranges to reach all PCI busses (Cardbus) + */ +#ifdef __i386__ +static int __init assign_all_busses(const struct dmi_system_id *d) +{ + pci_probe |= PCI_ASSIGN_ALL_BUSSES; + printk(KERN_INFO "%s detected: enabling PCI bus# renumbering" + " (pci=assign-busses)\n", d->ident); + return 0; +} +#endif + +static int __init set_scan_all(const struct dmi_system_id *d) +{ + printk(KERN_INFO "PCI: %s detected, enabling pci=pcie_scan_all\n", + d->ident); + pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS); + return 0; +} + +static const struct dmi_system_id pciprobe_dmi_table[] __initconst = { +#ifdef __i386__ +/* + * Laptops which need pci=assign-busses to see Cardbus cards + */ + { + .callback = assign_all_busses, + .ident = "Samsung X20 Laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Samsung Electronics"), + DMI_MATCH(DMI_PRODUCT_NAME, "SX20S"), + }, + }, +#endif /* __i386__ */ + { + .callback = set_bf_sort, + .ident = "Dell PowerEdge 1950", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell"), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1950"), + }, + }, + { + .callback = set_bf_sort, + .ident = "Dell PowerEdge 1955", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell"), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1955"), + }, + }, + { + .callback = set_bf_sort, + .ident = "Dell PowerEdge 2900", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell"), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2900"), + }, + }, + { + .callback = set_bf_sort, + .ident = "Dell PowerEdge 2950", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell"), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2950"), + }, + }, + { + .callback = set_bf_sort, + .ident = "Dell PowerEdge R900", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell"), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R900"), + }, + }, + { + .callback = find_sort_method, + .ident = "Dell System", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL20p G3", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL20p G3"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL20p G4", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL20p G4"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL30p G1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL30p G1"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL25p G1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL25p G1"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL35p G1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL35p G1"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL45p G1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL45p G1"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL45p G2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL45p G2"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL460c G1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL460c G1"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL465c G1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL465c G1"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL480c G1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL480c G1"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL685c G1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL685c G1"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant DL360", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL360"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant DL380", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL380"), + }, + }, +#ifdef __i386__ + { + .callback = assign_all_busses, + .ident = "Compaq EVO N800c", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), + DMI_MATCH(DMI_PRODUCT_NAME, "EVO N800c"), + }, + }, +#endif + { + .callback = set_bf_sort, + .ident = "HP ProLiant DL385 G2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL385 G2"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant DL585 G2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"), + }, + }, + { + .callback = set_scan_all, + .ident = "Stratus/NEC ftServer", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Stratus"), + DMI_MATCH(DMI_PRODUCT_NAME, "ftServer"), + }, + }, + { + .callback = set_scan_all, + .ident = "Stratus/NEC ftServer", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "NEC"), + DMI_MATCH(DMI_PRODUCT_NAME, "Express5800/R32"), + }, + }, + { + .callback = set_scan_all, + .ident = "Stratus/NEC ftServer", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "NEC"), + DMI_MATCH(DMI_PRODUCT_NAME, "Express5800/R31"), + }, + }, + {} +}; + +void __init dmi_check_pciprobe(void) +{ + dmi_check_system(pciprobe_dmi_table); +} + +void pcibios_scan_root(int busnum) +{ + struct pci_bus *bus; + struct pci_sysdata *sd; + LIST_HEAD(resources); + + sd = kzalloc(sizeof(*sd), GFP_KERNEL); + if (!sd) { + printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busnum); + return; + } + sd->node = x86_pci_root_bus_node(busnum); + x86_pci_root_bus_resources(busnum, &resources); + printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); + bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, sd, &resources); + if (!bus) { + pci_free_resource_list(&resources); + kfree(sd); + return; + } + pci_bus_add_devices(bus); +} + +void __init pcibios_set_cache_line_size(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + /* + * Set PCI cacheline size to that of the CPU if the CPU has reported it. + * (For older CPUs that don't support cpuid, we se it to 32 bytes + * It's also good for 386/486s (which actually have 16) + * as quite a few PCI devices do not support smaller values. + */ + if (c->x86_clflush_size > 0) { + pci_dfl_cache_line_size = c->x86_clflush_size >> 2; + printk(KERN_DEBUG "PCI: pci_cache_line_size set to %d bytes\n", + pci_dfl_cache_line_size << 2); + } else { + pci_dfl_cache_line_size = 32 >> 2; + printk(KERN_DEBUG "PCI: Unknown cacheline size. Setting to 32 bytes\n"); + } +} + +int __init pcibios_init(void) +{ + if (!raw_pci_ops) { + printk(KERN_WARNING "PCI: System does not support PCI\n"); + return 0; + } + + pcibios_set_cache_line_size(); + pcibios_resource_survey(); + + if (pci_bf_sort >= pci_force_bf) + pci_sort_breadthfirst(); + return 0; +} + +char *__init pcibios_setup(char *str) +{ + if (!strcmp(str, "off")) { + pci_probe = 0; + return NULL; + } else if (!strcmp(str, "bfsort")) { + pci_bf_sort = pci_force_bf; + return NULL; + } else if (!strcmp(str, "nobfsort")) { + pci_bf_sort = pci_force_nobf; + return NULL; + } +#ifdef CONFIG_PCI_BIOS + else if (!strcmp(str, "bios")) { + pci_probe = PCI_PROBE_BIOS; + return NULL; + } else if (!strcmp(str, "nobios")) { + pci_probe &= ~PCI_PROBE_BIOS; + return NULL; + } else if (!strcmp(str, "biosirq")) { + pci_probe |= PCI_BIOS_IRQ_SCAN; + return NULL; + } else if (!strncmp(str, "pirqaddr=", 9)) { + pirq_table_addr = simple_strtoul(str+9, NULL, 0); + return NULL; + } +#endif +#ifdef CONFIG_PCI_DIRECT + else if (!strcmp(str, "conf1")) { + pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS; + return NULL; + } + else if (!strcmp(str, "conf2")) { + pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS; + return NULL; + } +#endif +#ifdef CONFIG_PCI_MMCONFIG + else if (!strcmp(str, "nommconf")) { + pci_probe &= ~PCI_PROBE_MMCONF; + return NULL; + } + else if (!strcmp(str, "check_enable_amd_mmconf")) { + pci_probe |= PCI_CHECK_ENABLE_AMD_MMCONF; + return NULL; + } +#endif + else if (!strcmp(str, "noacpi")) { + acpi_noirq_set(); + return NULL; + } + else if (!strcmp(str, "noearly")) { + pci_probe |= PCI_PROBE_NOEARLY; + return NULL; + } + else if (!strcmp(str, "usepirqmask")) { + pci_probe |= PCI_USE_PIRQ_MASK; + return NULL; + } else if (!strncmp(str, "irqmask=", 8)) { + pcibios_irq_mask = simple_strtol(str+8, NULL, 0); + return NULL; + } else if (!strncmp(str, "lastbus=", 8)) { + pcibios_last_bus = simple_strtol(str+8, NULL, 0); + return NULL; + } else if (!strcmp(str, "rom")) { + pci_probe |= PCI_ASSIGN_ROMS; + return NULL; + } else if (!strcmp(str, "norom")) { + pci_probe |= PCI_NOASSIGN_ROMS; + return NULL; + } else if (!strcmp(str, "nobar")) { + pci_probe |= PCI_NOASSIGN_BARS; + return NULL; + } else if (!strcmp(str, "assign-busses")) { + pci_probe |= PCI_ASSIGN_ALL_BUSSES; + return NULL; + } else if (!strcmp(str, "use_crs")) { + pci_probe |= PCI_USE__CRS; + return NULL; + } else if (!strcmp(str, "nocrs")) { + pci_probe |= PCI_ROOT_NO_CRS; + return NULL; + } else if (!strcmp(str, "earlydump")) { + pci_early_dump_regs = 1; + return NULL; + } else if (!strcmp(str, "routeirq")) { + pci_routeirq = 1; + return NULL; + } else if (!strcmp(str, "skip_isa_align")) { + pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; + return NULL; + } else if (!strcmp(str, "noioapicquirk")) { + noioapicquirk = 1; + return NULL; + } else if (!strcmp(str, "ioapicreroute")) { + if (noioapicreroute != -1) + noioapicreroute = 0; + return NULL; + } else if (!strcmp(str, "noioapicreroute")) { + if (noioapicreroute != -1) + noioapicreroute = 1; + return NULL; + } + return str; +} + +unsigned int pcibios_assign_all_busses(void) +{ + return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0; +} + +int pcibios_add_device(struct pci_dev *dev) +{ + struct setup_data *data; + struct pci_setup_rom *rom; + u64 pa_data; + + pa_data = boot_params.hdr.setup_data; + while (pa_data) { + data = ioremap(pa_data, sizeof(*rom)); + if (!data) + return -ENOMEM; + + if (data->type == SETUP_PCI) { + rom = (struct pci_setup_rom *)data; + + if ((pci_domain_nr(dev->bus) == rom->segment) && + (dev->bus->number == rom->bus) && + (PCI_SLOT(dev->devfn) == rom->device) && + (PCI_FUNC(dev->devfn) == rom->function) && + (dev->vendor == rom->vendor) && + (dev->device == rom->devid)) { + dev->rom = pa_data + + offsetof(struct pci_setup_rom, romdata); + dev->romlen = rom->pcilen; + } + } + pa_data = data->next; + iounmap(data); + } + return 0; +} + +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + int err; + + if ((err = pci_enable_resources(dev, mask)) < 0) + return err; + + if (!pci_dev_msi_enabled(dev)) + return pcibios_enable_irq(dev); + return 0; +} + +void pcibios_disable_device (struct pci_dev *dev) +{ + if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq) + pcibios_disable_irq(dev); +} + +int pci_ext_cfg_avail(void) +{ + if (raw_pci_ext_ops) + return 1; + else + return 0; +} diff --git a/kernel/arch/x86/pci/direct.c b/kernel/arch/x86/pci/direct.c new file mode 100644 index 000000000..15460590b --- /dev/null +++ b/kernel/arch/x86/pci/direct.c @@ -0,0 +1,315 @@ +/* + * direct.c - Low-level direct PCI config space access + */ + +#include +#include +#include +#include + +/* + * Functions for accessing PCI base (first 256 bytes) and extended + * (4096 bytes per PCI function) configuration space with type 1 + * accesses. + */ + +#define PCI_CONF1_ADDRESS(bus, devfn, reg) \ + (0x80000000 | ((reg & 0xF00) << 16) | (bus << 16) \ + | (devfn << 8) | (reg & 0xFC)) + +static int pci_conf1_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value) +{ + unsigned long flags; + + if (seg || (bus > 255) || (devfn > 255) || (reg > 4095)) { + *value = -1; + return -EINVAL; + } + + raw_spin_lock_irqsave(&pci_config_lock, flags); + + outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); + + switch (len) { + case 1: + *value = inb(0xCFC + (reg & 3)); + break; + case 2: + *value = inw(0xCFC + (reg & 2)); + break; + case 4: + *value = inl(0xCFC); + break; + } + + raw_spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +static int pci_conf1_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value) +{ + unsigned long flags; + + if (seg || (bus > 255) || (devfn > 255) || (reg > 4095)) + return -EINVAL; + + raw_spin_lock_irqsave(&pci_config_lock, flags); + + outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); + + switch (len) { + case 1: + outb((u8)value, 0xCFC + (reg & 3)); + break; + case 2: + outw((u16)value, 0xCFC + (reg & 2)); + break; + case 4: + outl((u32)value, 0xCFC); + break; + } + + raw_spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +#undef PCI_CONF1_ADDRESS + +const struct pci_raw_ops pci_direct_conf1 = { + .read = pci_conf1_read, + .write = pci_conf1_write, +}; + + +/* + * Functions for accessing PCI configuration space with type 2 accesses + */ + +#define PCI_CONF2_ADDRESS(dev, reg) (u16)(0xC000 | (dev << 8) | reg) + +static int pci_conf2_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value) +{ + unsigned long flags; + int dev, fn; + + WARN_ON(seg); + if ((bus > 255) || (devfn > 255) || (reg > 255)) { + *value = -1; + return -EINVAL; + } + + dev = PCI_SLOT(devfn); + fn = PCI_FUNC(devfn); + + if (dev & 0x10) + return PCIBIOS_DEVICE_NOT_FOUND; + + raw_spin_lock_irqsave(&pci_config_lock, flags); + + outb((u8)(0xF0 | (fn << 1)), 0xCF8); + outb((u8)bus, 0xCFA); + + switch (len) { + case 1: + *value = inb(PCI_CONF2_ADDRESS(dev, reg)); + break; + case 2: + *value = inw(PCI_CONF2_ADDRESS(dev, reg)); + break; + case 4: + *value = inl(PCI_CONF2_ADDRESS(dev, reg)); + break; + } + + outb(0, 0xCF8); + + raw_spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +static int pci_conf2_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value) +{ + unsigned long flags; + int dev, fn; + + WARN_ON(seg); + if ((bus > 255) || (devfn > 255) || (reg > 255)) + return -EINVAL; + + dev = PCI_SLOT(devfn); + fn = PCI_FUNC(devfn); + + if (dev & 0x10) + return PCIBIOS_DEVICE_NOT_FOUND; + + raw_spin_lock_irqsave(&pci_config_lock, flags); + + outb((u8)(0xF0 | (fn << 1)), 0xCF8); + outb((u8)bus, 0xCFA); + + switch (len) { + case 1: + outb((u8)value, PCI_CONF2_ADDRESS(dev, reg)); + break; + case 2: + outw((u16)value, PCI_CONF2_ADDRESS(dev, reg)); + break; + case 4: + outl((u32)value, PCI_CONF2_ADDRESS(dev, reg)); + break; + } + + outb(0, 0xCF8); + + raw_spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +#undef PCI_CONF2_ADDRESS + +static const struct pci_raw_ops pci_direct_conf2 = { + .read = pci_conf2_read, + .write = pci_conf2_write, +}; + + +/* + * Before we decide to use direct hardware access mechanisms, we try to do some + * trivial checks to ensure it at least _seems_ to be working -- we just test + * whether bus 00 contains a host bridge (this is similar to checking + * techniques used in XFree86, but ours should be more reliable since we + * attempt to make use of direct access hints provided by the PCI BIOS). + * + * This should be close to trivial, but it isn't, because there are buggy + * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID. + */ +static int __init pci_sanity_check(const struct pci_raw_ops *o) +{ + u32 x = 0; + int year, devfn; + + if (pci_probe & PCI_NO_CHECKS) + return 1; + /* Assume Type 1 works for newer systems. + This handles machines that don't have anything on PCI Bus 0. */ + dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL); + if (year >= 2001) + return 1; + + for (devfn = 0; devfn < 0x100; devfn++) { + if (o->read(0, 0, devfn, PCI_CLASS_DEVICE, 2, &x)) + continue; + if (x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA) + return 1; + + if (o->read(0, 0, devfn, PCI_VENDOR_ID, 2, &x)) + continue; + if (x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ) + return 1; + } + + DBG(KERN_WARNING "PCI: Sanity check failed\n"); + return 0; +} + +static int __init pci_check_type1(void) +{ + unsigned long flags; + unsigned int tmp; + int works = 0; + + local_irq_save(flags); + + outb(0x01, 0xCFB); + tmp = inl(0xCF8); + outl(0x80000000, 0xCF8); + if (inl(0xCF8) == 0x80000000 && pci_sanity_check(&pci_direct_conf1)) { + works = 1; + } + outl(tmp, 0xCF8); + local_irq_restore(flags); + + return works; +} + +static int __init pci_check_type2(void) +{ + unsigned long flags; + int works = 0; + + local_irq_save(flags); + + outb(0x00, 0xCFB); + outb(0x00, 0xCF8); + outb(0x00, 0xCFA); + if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00 && + pci_sanity_check(&pci_direct_conf2)) { + works = 1; + } + + local_irq_restore(flags); + + return works; +} + +void __init pci_direct_init(int type) +{ + if (type == 0) + return; + printk(KERN_INFO "PCI: Using configuration type %d for base access\n", + type); + if (type == 1) { + raw_pci_ops = &pci_direct_conf1; + if (raw_pci_ext_ops) + return; + if (!(pci_probe & PCI_HAS_IO_ECS)) + return; + printk(KERN_INFO "PCI: Using configuration type 1 " + "for extended access\n"); + raw_pci_ext_ops = &pci_direct_conf1; + return; + } + raw_pci_ops = &pci_direct_conf2; +} + +int __init pci_direct_probe(void) +{ + if ((pci_probe & PCI_PROBE_CONF1) == 0) + goto type2; + if (!request_region(0xCF8, 8, "PCI conf1")) + goto type2; + + if (pci_check_type1()) { + raw_pci_ops = &pci_direct_conf1; + port_cf9_safe = true; + return 1; + } + release_region(0xCF8, 8); + + type2: + if ((pci_probe & PCI_PROBE_CONF2) == 0) + return 0; + if (!request_region(0xCF8, 4, "PCI conf2")) + return 0; + if (!request_region(0xC000, 0x1000, "PCI conf2")) + goto fail2; + + if (pci_check_type2()) { + raw_pci_ops = &pci_direct_conf2; + port_cf9_safe = true; + return 2; + } + + release_region(0xC000, 0x1000); + fail2: + release_region(0xCF8, 4); + return 0; +} diff --git a/kernel/arch/x86/pci/early.c b/kernel/arch/x86/pci/early.c new file mode 100644 index 000000000..d1067d539 --- /dev/null +++ b/kernel/arch/x86/pci/early.c @@ -0,0 +1,111 @@ +#include +#include +#include +#include +#include + +/* Direct PCI access. This is used for PCI accesses in early boot before + the PCI subsystem works. */ + +u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) +{ + u32 v; + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + v = inl(0xcfc); + return v; +} + +u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset) +{ + u8 v; + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + v = inb(0xcfc + (offset&3)); + return v; +} + +u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset) +{ + u16 v; + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + v = inw(0xcfc + (offset&2)); + return v; +} + +void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, + u32 val) +{ + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + outl(val, 0xcfc); +} + +void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) +{ + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + outb(val, 0xcfc + (offset&3)); +} + +void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val) +{ + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + outw(val, 0xcfc + (offset&2)); +} + +int early_pci_allowed(void) +{ + return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) == + PCI_PROBE_CONF1; +} + +void early_dump_pci_device(u8 bus, u8 slot, u8 func) +{ + int i; + int j; + u32 val; + + printk(KERN_INFO "pci 0000:%02x:%02x.%d config space:", + bus, slot, func); + + for (i = 0; i < 256; i += 4) { + if (!(i & 0x0f)) + printk("\n %02x:",i); + + val = read_pci_config(bus, slot, func, i); + for (j = 0; j < 4; j++) { + printk(" %02x", val & 0xff); + val >>= 8; + } + } + printk("\n"); +} + +void early_dump_pci_devices(void) +{ + unsigned bus, slot, func; + + if (!early_pci_allowed()) + return; + + for (bus = 0; bus < 256; bus++) { + for (slot = 0; slot < 32; slot++) { + for (func = 0; func < 8; func++) { + u32 class; + u8 type; + + class = read_pci_config(bus, slot, func, + PCI_CLASS_REVISION); + if (class == 0xffffffff) + continue; + + early_dump_pci_device(bus, slot, func); + + if (func == 0) { + type = read_pci_config_byte(bus, slot, + func, + PCI_HEADER_TYPE); + if (!(type & 0x80)) + break; + } + } + } + } +} diff --git a/kernel/arch/x86/pci/fixup.c b/kernel/arch/x86/pci/fixup.c new file mode 100644 index 000000000..9a2b7101a --- /dev/null +++ b/kernel/arch/x86/pci/fixup.c @@ -0,0 +1,555 @@ +/* + * Exceptions for specific devices. Usually work-arounds for fatal design flaws. + */ + +#include +#include +#include +#include +#include +#include + +static void pci_fixup_i450nx(struct pci_dev *d) +{ + /* + * i450NX -- Find and scan all secondary buses on all PXB's. + */ + int pxb, reg; + u8 busno, suba, subb; + + dev_warn(&d->dev, "Searching for i450NX host bridges\n"); + reg = 0xd0; + for(pxb = 0; pxb < 2; pxb++) { + pci_read_config_byte(d, reg++, &busno); + pci_read_config_byte(d, reg++, &suba); + pci_read_config_byte(d, reg++, &subb); + dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, + suba, subb); + if (busno) + pcibios_scan_root(busno); /* Bus A */ + if (suba < subb) + pcibios_scan_root(suba+1); /* Bus B */ + } + pcibios_last_bus = -1; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); + +static void pci_fixup_i450gx(struct pci_dev *d) +{ + /* + * i450GX and i450KX -- Find and scan all secondary buses. + * (called separately for each PCI bridge found) + */ + u8 busno; + pci_read_config_byte(d, 0x4a, &busno); + dev_info(&d->dev, "i440KX/GX host bridge; secondary bus %02x\n", busno); + pcibios_scan_root(busno); + pcibios_last_bus = -1; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx); + +static void pci_fixup_umc_ide(struct pci_dev *d) +{ + /* + * UM8886BF IDE controller sets region type bits incorrectly, + * therefore they look like memory despite of them being I/O. + */ + int i; + + dev_warn(&d->dev, "Fixing base address flags\n"); + for(i = 0; i < 4; i++) + d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide); + +static void pci_fixup_ncr53c810(struct pci_dev *d) +{ + /* + * NCR 53C810 returns class code 0 (at least on some systems). + * Fix class to be PCI_CLASS_STORAGE_SCSI + */ + if (!d->class) { + dev_warn(&d->dev, "Fixing NCR 53C810 class code\n"); + d->class = PCI_CLASS_STORAGE_SCSI << 8; + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810); + +static void pci_fixup_latency(struct pci_dev *d) +{ + /* + * SiS 5597 and 5598 chipsets require latency timer set to + * at most 32 to avoid lockups. + */ + dev_dbg(&d->dev, "Setting max latency to 32\n"); + pcibios_max_latency = 32; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5598, pci_fixup_latency); + +static void pci_fixup_piix4_acpi(struct pci_dev *d) +{ + /* + * PIIX4 ACPI device: hardwired IRQ9 + */ + d->irq = 9; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, pci_fixup_piix4_acpi); + +/* + * Addresses issues with problems in the memory write queue timer in + * certain VIA Northbridges. This bugfix is per VIA's specifications, + * except for the KL133/KM133: clearing bit 5 on those Northbridges seems + * to trigger a bug in its integrated ProSavage video card, which + * causes screen corruption. We only clear bits 6 and 7 for that chipset, + * until VIA can provide us with definitive information on why screen + * corruption occurs, and what exactly those bits do. + * + * VIA 8363,8622,8361 Northbridges: + * - bits 5, 6, 7 at offset 0x55 need to be turned off + * VIA 8367 (KT266x) Northbridges: + * - bits 5, 6, 7 at offset 0x95 need to be turned off + * VIA 8363 rev 0x81/0x84 (KL133/KM133) Northbridges: + * - bits 6, 7 at offset 0x55 need to be turned off + */ + +#define VIA_8363_KL133_REVISION_ID 0x81 +#define VIA_8363_KM133_REVISION_ID 0x84 + +static void pci_fixup_via_northbridge_bug(struct pci_dev *d) +{ + u8 v; + int where = 0x55; + int mask = 0x1f; /* clear bits 5, 6, 7 by default */ + + if (d->device == PCI_DEVICE_ID_VIA_8367_0) { + /* fix pci bus latency issues resulted by NB bios error + it appears on bug free^Wreduced kt266x's bios forces + NB latency to zero */ + pci_write_config_byte(d, PCI_LATENCY_TIMER, 0); + + where = 0x95; /* the memory write queue timer register is + different for the KT266x's: 0x95 not 0x55 */ + } else if (d->device == PCI_DEVICE_ID_VIA_8363_0 && + (d->revision == VIA_8363_KL133_REVISION_ID || + d->revision == VIA_8363_KM133_REVISION_ID)) { + mask = 0x3f; /* clear only bits 6 and 7; clearing bit 5 + causes screen corruption on the KL133/KM133 */ + } + + pci_read_config_byte(d, where, &v); + if (v & ~mask) { + dev_warn(&d->dev, "Disabling VIA memory write queue (PCI ID %04x, rev %02x): [%02x] %02x & %02x -> %02x\n", \ + d->device, d->revision, where, v, mask, v & mask); + v &= mask; + pci_write_config_byte(d, where, v); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8622, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8622, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug); + +/* + * For some reasons Intel decided that certain parts of their + * 815, 845 and some other chipsets must look like PCI-to-PCI bridges + * while they are obviously not. The 82801 family (AA, AB, BAM/CAM, + * BA/CA/DB and E) PCI bridges are actually HUB-to-PCI ones, according + * to Intel terminology. These devices do forward all addresses from + * system to PCI bus no matter what are their window settings, so they are + * "transparent" (or subtractive decoding) from programmers point of view. + */ +static void pci_fixup_transparent_bridge(struct pci_dev *dev) +{ + if ((dev->device & 0xff00) == 0x2400) + dev->transparent = 1; +} +DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, + PCI_CLASS_BRIDGE_PCI, 8, pci_fixup_transparent_bridge); + +/* + * Fixup for C1 Halt Disconnect problem on nForce2 systems. + * + * From information provided by "Allen Martin" : + * + * A hang is caused when the CPU generates a very fast CONNECT/HALT cycle + * sequence. Workaround is to set the SYSTEM_IDLE_TIMEOUT to 80 ns. + * This allows the state-machine and timer to return to a proper state within + * 80 ns of the CONNECT and probe appearing together. Since the CPU will not + * issue another HALT within 80 ns of the initial HALT, the failure condition + * is avoided. + */ +static void pci_fixup_nforce2(struct pci_dev *dev) +{ + u32 val; + + /* + * Chip Old value New value + * C17 0x1F0FFF01 0x1F01FF01 + * C18D 0x9F0FFF01 0x9F01FF01 + * + * Northbridge chip version may be determined by + * reading the PCI revision ID (0xC1 or greater is C18D). + */ + pci_read_config_dword(dev, 0x6c, &val); + + /* + * Apply fixup if needed, but don't touch disconnect state + */ + if ((val & 0x00FF0000) != 0x00010000) { + dev_warn(&dev->dev, "nForce2 C1 Halt Disconnect fixup\n"); + pci_write_config_dword(dev, 0x6c, (val & 0xFF00FFFF) | 0x00010000); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, pci_fixup_nforce2); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, pci_fixup_nforce2); + +/* Max PCI Express root ports */ +#define MAX_PCIEROOT 6 +static int quirk_aspm_offset[MAX_PCIEROOT << 3]; + +#define GET_INDEX(a, b) ((((a) - PCI_DEVICE_ID_INTEL_MCH_PA) << 3) + ((b) & 7)) + +static int quirk_pcie_aspm_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) +{ + return raw_pci_read(pci_domain_nr(bus), bus->number, + devfn, where, size, value); +} + +/* + * Replace the original pci bus ops for write with a new one that will filter + * the request to insure ASPM cannot be enabled. + */ +static int quirk_pcie_aspm_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value) +{ + u8 offset; + + offset = quirk_aspm_offset[GET_INDEX(bus->self->device, devfn)]; + + if ((offset) && (where == offset)) + value = value & ~PCI_EXP_LNKCTL_ASPMC; + + return raw_pci_write(pci_domain_nr(bus), bus->number, + devfn, where, size, value); +} + +static struct pci_ops quirk_pcie_aspm_ops = { + .read = quirk_pcie_aspm_read, + .write = quirk_pcie_aspm_write, +}; + +/* + * Prevents PCI Express ASPM (Active State Power Management) being enabled. + * + * Save the register offset, where the ASPM control bits are located, + * for each PCI Express device that is in the device list of + * the root port in an array for fast indexing. Replace the bus ops + * with the modified one. + */ +static void pcie_rootport_aspm_quirk(struct pci_dev *pdev) +{ + int i; + struct pci_bus *pbus; + struct pci_dev *dev; + + if ((pbus = pdev->subordinate) == NULL) + return; + + /* + * Check if the DID of pdev matches one of the six root ports. This + * check is needed in the case this function is called directly by the + * hot-plug driver. + */ + if ((pdev->device < PCI_DEVICE_ID_INTEL_MCH_PA) || + (pdev->device > PCI_DEVICE_ID_INTEL_MCH_PC1)) + return; + + if (list_empty(&pbus->devices)) { + /* + * If no device is attached to the root port at power-up or + * after hot-remove, the pbus->devices is empty and this code + * will set the offsets to zero and the bus ops to parent's bus + * ops, which is unmodified. + */ + for (i = GET_INDEX(pdev->device, 0); i <= GET_INDEX(pdev->device, 7); ++i) + quirk_aspm_offset[i] = 0; + + pci_bus_set_ops(pbus, pbus->parent->ops); + } else { + /* + * If devices are attached to the root port at power-up or + * after hot-add, the code loops through the device list of + * each root port to save the register offsets and replace the + * bus ops. + */ + list_for_each_entry(dev, &pbus->devices, bus_list) + /* There are 0 to 8 devices attached to this bus */ + quirk_aspm_offset[GET_INDEX(pdev->device, dev->devfn)] = + dev->pcie_cap + PCI_EXP_LNKCTL; + + pci_bus_set_ops(pbus, &quirk_pcie_aspm_ops); + dev_info(&pbus->dev, "writes to ASPM control bits will be ignored\n"); + } + +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PA, pcie_rootport_aspm_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PA1, pcie_rootport_aspm_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PB, pcie_rootport_aspm_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PB1, pcie_rootport_aspm_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC, pcie_rootport_aspm_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC1, pcie_rootport_aspm_quirk); + +/* + * Fixup to mark boot BIOS video selected by BIOS before it changes + * + * From information provided by "Jon Smirl" + * + * The standard boot ROM sequence for an x86 machine uses the BIOS + * to select an initial video card for boot display. This boot video + * card will have it's BIOS copied to C0000 in system RAM. + * IORESOURCE_ROM_SHADOW is used to associate the boot video + * card with this copy. On laptops this copy has to be used since + * the main ROM may be compressed or combined with another image. + * See pci_map_rom() for use of this flag. Before marking the device + * with IORESOURCE_ROM_SHADOW check if a vga_default_device is already set + * by either arch cde or vga-arbitration, if so only apply the fixup to this + * already determined primary video card. + */ + +static void pci_fixup_video(struct pci_dev *pdev) +{ + struct pci_dev *bridge; + struct pci_bus *bus; + u16 config; + + /* Is VGA routed to us? */ + bus = pdev->bus; + while (bus) { + bridge = bus->self; + + /* + * From information provided by + * "David Miller" + * The bridge control register is valid for PCI header + * type BRIDGE, or CARDBUS. Host to PCI controllers use + * PCI header type NORMAL. + */ + if (bridge && (pci_is_bridge(bridge))) { + pci_read_config_word(bridge, PCI_BRIDGE_CONTROL, + &config); + if (!(config & PCI_BRIDGE_CTL_VGA)) + return; + } + bus = bus->parent; + } + if (!vga_default_device() || pdev == vga_default_device()) { + pci_read_config_word(pdev, PCI_COMMAND, &config); + if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { + pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; + dev_printk(KERN_DEBUG, &pdev->dev, "Video device with shadowed ROM\n"); + } + } +} +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); + + +static const struct dmi_system_id msi_k8t_dmi_table[] = { + { + .ident = "MSI-K8T-Neo2Fir", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MSI"), + DMI_MATCH(DMI_PRODUCT_NAME, "MS-6702E"), + }, + }, + {} +}; + +/* + * The AMD-Athlon64 board MSI "K8T Neo2-FIR" disables the onboard sound + * card if a PCI-soundcard is added. + * + * The BIOS only gives options "DISABLED" and "AUTO". This code sets + * the corresponding register-value to enable the soundcard. + * + * The soundcard is only enabled, if the mainborad is identified + * via DMI-tables and the soundcard is detected to be off. + */ +static void pci_fixup_msi_k8t_onboard_sound(struct pci_dev *dev) +{ + unsigned char val; + if (!dmi_check_system(msi_k8t_dmi_table)) + return; /* only applies to MSI K8T Neo2-FIR */ + + pci_read_config_byte(dev, 0x50, &val); + if (val & 0x40) { + pci_write_config_byte(dev, 0x50, val & (~0x40)); + + /* verify the change for status output */ + pci_read_config_byte(dev, 0x50, &val); + if (val & 0x40) + dev_info(&dev->dev, "Detected MSI K8T Neo2-FIR; " + "can't enable onboard soundcard!\n"); + else + dev_info(&dev->dev, "Detected MSI K8T Neo2-FIR; " + "enabled onboard soundcard\n"); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, + pci_fixup_msi_k8t_onboard_sound); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, + pci_fixup_msi_k8t_onboard_sound); + +/* + * Some Toshiba laptops need extra code to enable their TI TSB43AB22/A. + * + * We pretend to bring them out of full D3 state, and restore the proper + * IRQ, PCI cache line size, and BARs, otherwise the device won't function + * properly. In some cases, the device will generate an interrupt on + * the wrong IRQ line, causing any devices sharing the line it's + * *supposed* to use to be disabled by the kernel's IRQ debug code. + */ +static u16 toshiba_line_size; + +static const struct dmi_system_id toshiba_ohci1394_dmi_table[] = { + { + .ident = "Toshiba PS5 based laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_VERSION, "PS5"), + }, + }, + { + .ident = "Toshiba PSM4 based laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_VERSION, "PSM4"), + }, + }, + { + .ident = "Toshiba A40 based laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_VERSION, "PSA40U"), + }, + }, + { } +}; + +static void pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev) +{ + if (!dmi_check_system(toshiba_ohci1394_dmi_table)) + return; /* only applies to certain Toshibas (so far) */ + + dev->current_state = PCI_D3cold; + pci_read_config_word(dev, PCI_CACHE_LINE_SIZE, &toshiba_line_size); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, 0x8032, + pci_pre_fixup_toshiba_ohci1394); + +static void pci_post_fixup_toshiba_ohci1394(struct pci_dev *dev) +{ + if (!dmi_check_system(toshiba_ohci1394_dmi_table)) + return; /* only applies to certain Toshibas (so far) */ + + /* Restore config space on Toshiba laptops */ + pci_write_config_word(dev, PCI_CACHE_LINE_SIZE, toshiba_line_size); + pci_read_config_byte(dev, PCI_INTERRUPT_LINE, (u8 *)&dev->irq); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, + pci_resource_start(dev, 0)); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, + pci_resource_start(dev, 1)); +} +DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_TI, 0x8032, + pci_post_fixup_toshiba_ohci1394); + + +/* + * Prevent the BIOS trapping accesses to the Cyrix CS5530A video device + * configuration space. + */ +static void pci_early_fixup_cyrix_5530(struct pci_dev *dev) +{ + u8 r; + /* clear 'F4 Video Configuration Trap' bit */ + pci_read_config_byte(dev, 0x42, &r); + r &= 0xfd; + pci_write_config_byte(dev, 0x42, r); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, + pci_early_fixup_cyrix_5530); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, + pci_early_fixup_cyrix_5530); + +/* + * Siemens Nixdorf AG FSC Multiprocessor Interrupt Controller: + * prevent update of the BAR0, which doesn't look like a normal BAR. + */ +static void pci_siemens_interrupt_controller(struct pci_dev *dev) +{ + dev->resource[0].flags |= IORESOURCE_PCI_FIXED; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, + pci_siemens_interrupt_controller); + +/* + * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from + * confusing the PCI engine: + */ +static void sb600_disable_hpet_bar(struct pci_dev *dev) +{ + u8 val; + + /* + * The SB600 and SB700 both share the same device + * ID, but the PM register 0x55 does something different + * for the SB700, so make sure we are dealing with the + * SB600 before touching the bit: + */ + + pci_read_config_byte(dev, 0x08, &val); + + if (val < 0x2F) { + outb(0x55, 0xCD6); + val = inb(0xCD7); + + /* Set bit 7 in PM register 0x55 */ + outb(0x55, 0xCD6); + outb(val | 0x80, 0xCD7); + } +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar); + +#ifdef CONFIG_HPET_TIMER +static void sb600_hpet_quirk(struct pci_dev *dev) +{ + struct resource *r = &dev->resource[1]; + + if (r->flags & IORESOURCE_MEM && r->start == hpet_address) { + r->flags |= IORESOURCE_PCI_FIXED; + dev_info(&dev->dev, "reg 0x14 contains HPET; making it immovable\n"); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, 0x4385, sb600_hpet_quirk); +#endif + +/* + * Twinhead H12Y needs us to block out a region otherwise we map devices + * there and any access kills the box. + * + * See: https://bugzilla.kernel.org/show_bug.cgi?id=10231 + * + * Match off the LPC and svid/sdid (older kernels lose the bridge subvendor) + */ +static void twinhead_reserve_killing_zone(struct pci_dev *dev) +{ + if (dev->subsystem_vendor == 0x14FF && dev->subsystem_device == 0xA003) { + pr_info("Reserving memory on Twinhead H12Y\n"); + request_mem_region(0xFFB00000, 0x100000, "twinhead"); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone); diff --git a/kernel/arch/x86/pci/i386.c b/kernel/arch/x86/pci/i386.c new file mode 100644 index 000000000..349c0d32c --- /dev/null +++ b/kernel/arch/x86/pci/i386.c @@ -0,0 +1,455 @@ +/* + * Low-Level PCI Access for i386 machines + * + * Copyright 1993, 1994 Drew Eckhardt + * Visionary Computing + * (Unix and Linux consulting and custom programming) + * Drew@Colorado.EDU + * +1 (303) 786-7975 + * + * Drew's work was sponsored by: + * iX Multiuser Multitasking Magazine + * Hannover, Germany + * hm@ix.de + * + * Copyright 1997--2000 Martin Mares + * + * For more information, please consult the following manuals (look at + * http://www.pcisig.com/ for how to get them): + * + * PCI BIOS Specification + * PCI Local Bus Specification + * PCI to PCI Bridge Specification + * PCI System Design Guide + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +/* + * This list of dynamic mappings is for temporarily maintaining + * original BIOS BAR addresses for possible reinstatement. + */ +struct pcibios_fwaddrmap { + struct list_head list; + struct pci_dev *dev; + resource_size_t fw_addr[DEVICE_COUNT_RESOURCE]; +}; + +static LIST_HEAD(pcibios_fwaddrmappings); +static DEFINE_SPINLOCK(pcibios_fwaddrmap_lock); +static bool pcibios_fw_addr_done; + +/* Must be called with 'pcibios_fwaddrmap_lock' lock held. */ +static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev) +{ + struct pcibios_fwaddrmap *map; + + WARN_ON_SMP(!spin_is_locked(&pcibios_fwaddrmap_lock)); + + list_for_each_entry(map, &pcibios_fwaddrmappings, list) + if (map->dev == dev) + return map; + + return NULL; +} + +static void +pcibios_save_fw_addr(struct pci_dev *dev, int idx, resource_size_t fw_addr) +{ + unsigned long flags; + struct pcibios_fwaddrmap *map; + + if (pcibios_fw_addr_done) + return; + + spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); + map = pcibios_fwaddrmap_lookup(dev); + if (!map) { + spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return; + + map->dev = pci_dev_get(dev); + map->fw_addr[idx] = fw_addr; + INIT_LIST_HEAD(&map->list); + + spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); + list_add_tail(&map->list, &pcibios_fwaddrmappings); + } else + map->fw_addr[idx] = fw_addr; + spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); +} + +resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx) +{ + unsigned long flags; + struct pcibios_fwaddrmap *map; + resource_size_t fw_addr = 0; + + if (pcibios_fw_addr_done) + return 0; + + spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); + map = pcibios_fwaddrmap_lookup(dev); + if (map) + fw_addr = map->fw_addr[idx]; + spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); + + return fw_addr; +} + +static void __init pcibios_fw_addr_list_del(void) +{ + unsigned long flags; + struct pcibios_fwaddrmap *entry, *next; + + spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); + list_for_each_entry_safe(entry, next, &pcibios_fwaddrmappings, list) { + list_del(&entry->list); + pci_dev_put(entry->dev); + kfree(entry); + } + spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); + pcibios_fw_addr_done = true; +} + +static int +skip_isa_ioresource_align(struct pci_dev *dev) { + + if ((pci_probe & PCI_CAN_SKIP_ISA_ALIGN) && + !(dev->bus->bridge_ctl & PCI_BRIDGE_CTL_ISA)) + return 1; + return 0; +} + +/* + * We need to avoid collisions with `mirrored' VGA ports + * and other strange ISA hardware, so we always want the + * addresses to be allocated in the 0x000-0x0ff region + * modulo 0x400. + * + * Why? Because some silly external IO cards only decode + * the low 10 bits of the IO address. The 0x00-0xff region + * is reserved for motherboard devices that decode all 16 + * bits, so it's ok to allocate at, say, 0x2800-0x28ff, + * but we want to try to avoid allocating at 0x2900-0x2bff + * which might have be mirrored at 0x0100-0x03ff.. + */ +resource_size_t +pcibios_align_resource(void *data, const struct resource *res, + resource_size_t size, resource_size_t align) +{ + struct pci_dev *dev = data; + resource_size_t start = res->start; + + if (res->flags & IORESOURCE_IO) { + if (skip_isa_ioresource_align(dev)) + return start; + if (start & 0x300) + start = (start + 0x3ff) & ~0x3ff; + } else if (res->flags & IORESOURCE_MEM) { + /* The low 1MB range is reserved for ISA cards */ + if (start < BIOS_END) + start = BIOS_END; + } + return start; +} +EXPORT_SYMBOL(pcibios_align_resource); + +/* + * Handle resources of PCI devices. If the world were perfect, we could + * just allocate all the resource regions and do nothing more. It isn't. + * On the other hand, we cannot just re-allocate all devices, as it would + * require us to know lots of host bridge internals. So we attempt to + * keep as much of the original configuration as possible, but tweak it + * when it's found to be wrong. + * + * Known BIOS problems we have to work around: + * - I/O or memory regions not configured + * - regions configured, but not enabled in the command register + * - bogus I/O addresses above 64K used + * - expansion ROMs left enabled (this may sound harmless, but given + * the fact the PCI specs explicitly allow address decoders to be + * shared between expansion ROMs and other resource regions, it's + * at least dangerous) + * - bad resource sizes or overlaps with other regions + * + * Our solution: + * (1) Allocate resources for all buses behind PCI-to-PCI bridges. + * This gives us fixed barriers on where we can allocate. + * (2) Allocate resources for all enabled devices. If there is + * a collision, just mark the resource as unallocated. Also + * disable expansion ROMs during this step. + * (3) Try to allocate resources for disabled devices. If the + * resources were assigned correctly, everything goes well, + * if they weren't, they won't disturb allocation of other + * resources. + * (4) Assign new addresses to resources which were either + * not configured at all or misconfigured. If explicitly + * requested by the user, configure expansion ROM address + * as well. + */ + +static void pcibios_allocate_bridge_resources(struct pci_dev *dev) +{ + int idx; + struct resource *r; + + for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) { + r = &dev->resource[idx]; + if (!r->flags) + continue; + if (r->parent) /* Already allocated */ + continue; + if (!r->start || pci_claim_bridge_resource(dev, idx) < 0) { + /* + * Something is wrong with the region. + * Invalidate the resource to prevent + * child resource allocations in this + * range. + */ + r->start = r->end = 0; + r->flags = 0; + } + } +} + +static void pcibios_allocate_bus_resources(struct pci_bus *bus) +{ + struct pci_bus *child; + + /* Depth-First Search on bus tree */ + if (bus->self) + pcibios_allocate_bridge_resources(bus->self); + list_for_each_entry(child, &bus->children, node) + pcibios_allocate_bus_resources(child); +} + +struct pci_check_idx_range { + int start; + int end; +}; + +static void pcibios_allocate_dev_resources(struct pci_dev *dev, int pass) +{ + int idx, disabled, i; + u16 command; + struct resource *r; + + struct pci_check_idx_range idx_range[] = { + { PCI_STD_RESOURCES, PCI_STD_RESOURCE_END }, +#ifdef CONFIG_PCI_IOV + { PCI_IOV_RESOURCES, PCI_IOV_RESOURCE_END }, +#endif + }; + + pci_read_config_word(dev, PCI_COMMAND, &command); + for (i = 0; i < ARRAY_SIZE(idx_range); i++) + for (idx = idx_range[i].start; idx <= idx_range[i].end; idx++) { + r = &dev->resource[idx]; + if (r->parent) /* Already allocated */ + continue; + if (!r->start) /* Address not assigned at all */ + continue; + if (r->flags & IORESOURCE_IO) + disabled = !(command & PCI_COMMAND_IO); + else + disabled = !(command & PCI_COMMAND_MEMORY); + if (pass == disabled) { + dev_dbg(&dev->dev, + "BAR %d: reserving %pr (d=%d, p=%d)\n", + idx, r, disabled, pass); + if (pci_claim_resource(dev, idx) < 0) { + if (r->flags & IORESOURCE_PCI_FIXED) { + dev_info(&dev->dev, "BAR %d %pR is immovable\n", + idx, r); + } else { + /* We'll assign a new address later */ + pcibios_save_fw_addr(dev, + idx, r->start); + r->end -= r->start; + r->start = 0; + } + } + } + } + if (!pass) { + r = &dev->resource[PCI_ROM_RESOURCE]; + if (r->flags & IORESOURCE_ROM_ENABLE) { + /* Turn the ROM off, leave the resource region, + * but keep it unregistered. */ + u32 reg; + dev_dbg(&dev->dev, "disabling ROM %pR\n", r); + r->flags &= ~IORESOURCE_ROM_ENABLE; + pci_read_config_dword(dev, dev->rom_base_reg, ®); + pci_write_config_dword(dev, dev->rom_base_reg, + reg & ~PCI_ROM_ADDRESS_ENABLE); + } + } +} + +static void pcibios_allocate_resources(struct pci_bus *bus, int pass) +{ + struct pci_dev *dev; + struct pci_bus *child; + + list_for_each_entry(dev, &bus->devices, bus_list) { + pcibios_allocate_dev_resources(dev, pass); + + child = dev->subordinate; + if (child) + pcibios_allocate_resources(child, pass); + } +} + +static void pcibios_allocate_dev_rom_resource(struct pci_dev *dev) +{ + struct resource *r; + + /* + * Try to use BIOS settings for ROMs, otherwise let + * pci_assign_unassigned_resources() allocate the new + * addresses. + */ + r = &dev->resource[PCI_ROM_RESOURCE]; + if (!r->flags || !r->start) + return; + if (r->parent) /* Already allocated */ + return; + + if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) { + r->end -= r->start; + r->start = 0; + } +} +static void pcibios_allocate_rom_resources(struct pci_bus *bus) +{ + struct pci_dev *dev; + struct pci_bus *child; + + list_for_each_entry(dev, &bus->devices, bus_list) { + pcibios_allocate_dev_rom_resource(dev); + + child = dev->subordinate; + if (child) + pcibios_allocate_rom_resources(child); + } +} + +static int __init pcibios_assign_resources(void) +{ + struct pci_bus *bus; + + if (!(pci_probe & PCI_ASSIGN_ROMS)) + list_for_each_entry(bus, &pci_root_buses, node) + pcibios_allocate_rom_resources(bus); + + pci_assign_unassigned_resources(); + pcibios_fw_addr_list_del(); + + return 0; +} + +/** + * called in fs_initcall (one below subsys_initcall), + * give a chance for motherboard reserve resources + */ +fs_initcall(pcibios_assign_resources); + +void pcibios_resource_survey_bus(struct pci_bus *bus) +{ + dev_printk(KERN_DEBUG, &bus->dev, "Allocating resources\n"); + + pcibios_allocate_bus_resources(bus); + + pcibios_allocate_resources(bus, 0); + pcibios_allocate_resources(bus, 1); + + if (!(pci_probe & PCI_ASSIGN_ROMS)) + pcibios_allocate_rom_resources(bus); +} + +void __init pcibios_resource_survey(void) +{ + struct pci_bus *bus; + + DBG("PCI: Allocating resources\n"); + + list_for_each_entry(bus, &pci_root_buses, node) + pcibios_allocate_bus_resources(bus); + + list_for_each_entry(bus, &pci_root_buses, node) + pcibios_allocate_resources(bus, 0); + list_for_each_entry(bus, &pci_root_buses, node) + pcibios_allocate_resources(bus, 1); + + e820_reserve_resources_late(); + /* + * Insert the IO APIC resources after PCI initialization has + * occurred to handle IO APICS that are mapped in on a BAR in + * PCI space, but before trying to assign unassigned pci res. + */ + ioapic_insert_resources(); +} + +static const struct vm_operations_struct pci_mmap_ops = { + .access = generic_access_phys, +}; + +int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine) +{ + unsigned long prot; + + /* I/O space cannot be accessed via normal processor loads and + * stores on this platform. + */ + if (mmap_state == pci_mmap_io) + return -EINVAL; + + prot = pgprot_val(vma->vm_page_prot); + + /* + * Return error if pat is not enabled and write_combine is requested. + * Caller can followup with UC MINUS request and add a WC mtrr if there + * is a free mtrr slot. + */ + if (!pat_enabled && write_combine) + return -EINVAL; + + if (pat_enabled && write_combine) + prot |= cachemode2protval(_PAGE_CACHE_MODE_WC); + else if (pat_enabled || boot_cpu_data.x86 > 3) + /* + * ioremap() and ioremap_nocache() defaults to UC MINUS for now. + * To avoid attribute conflicts, request UC MINUS here + * as well. + */ + prot |= cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS); + + vma->vm_page_prot = __pgprot(prot); + + if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot)) + return -EAGAIN; + + vma->vm_ops = &pci_mmap_ops; + + return 0; +} diff --git a/kernel/arch/x86/pci/init.c b/kernel/arch/x86/pci/init.c new file mode 100644 index 000000000..adb62aaa7 --- /dev/null +++ b/kernel/arch/x86/pci/init.c @@ -0,0 +1,44 @@ +#include +#include +#include +#include + +/* arch_initcall has too random ordering, so call the initializers + in the right sequence from here. */ +static __init int pci_arch_init(void) +{ +#ifdef CONFIG_PCI_DIRECT + int type = 0; + + type = pci_direct_probe(); +#endif + + if (!(pci_probe & PCI_PROBE_NOEARLY)) + pci_mmcfg_early_init(); + + if (x86_init.pci.arch_init && !x86_init.pci.arch_init()) + return 0; + +#ifdef CONFIG_PCI_BIOS + pci_pcbios_init(); +#endif + /* + * don't check for raw_pci_ops here because we want pcbios as last + * fallback, yet it's needed to run first to set pcibios_last_bus + * in case legacy PCI probing is used. otherwise detecting peer busses + * fails. + */ +#ifdef CONFIG_PCI_DIRECT + pci_direct_init(type); +#endif + if (!raw_pci_ops && !raw_pci_ext_ops) + printk(KERN_ERR + "PCI: Fatal: No config space access function found\n"); + + dmi_check_pciprobe(); + + dmi_check_skip_isa_align(); + + return 0; +} +arch_initcall(pci_arch_init); diff --git a/kernel/arch/x86/pci/intel_mid_pci.c b/kernel/arch/x86/pci/intel_mid_pci.c new file mode 100644 index 000000000..852aa4c92 --- /dev/null +++ b/kernel/arch/x86/pci/intel_mid_pci.c @@ -0,0 +1,326 @@ +/* + * Intel MID PCI support + * Copyright (c) 2008 Intel Corporation + * Jesse Barnes + * + * Moorestown has an interesting PCI implementation: + * - configuration space is memory mapped (as defined by MCFG) + * - Lincroft devices also have a real, type 1 configuration space + * - Early Lincroft silicon has a type 1 access bug that will cause + * a hang if non-existent devices are accessed + * - some devices have the "fixed BAR" capability, which means + * they can't be relocated or modified; check for that during + * BAR sizing + * + * So, we use the MCFG space for all reads and writes, but also send + * Lincroft writes to type 1 space. But only read/write if the device + * actually exists, otherwise return all 1s for reads and bit bucket + * the writes. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define PCIE_CAP_OFFSET 0x100 + +/* Fixed BAR fields */ +#define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00 /* Fixed BAR (TBD) */ +#define PCI_FIXED_BAR_0_SIZE 0x04 +#define PCI_FIXED_BAR_1_SIZE 0x08 +#define PCI_FIXED_BAR_2_SIZE 0x0c +#define PCI_FIXED_BAR_3_SIZE 0x10 +#define PCI_FIXED_BAR_4_SIZE 0x14 +#define PCI_FIXED_BAR_5_SIZE 0x1c + +static int pci_soc_mode; + +/** + * fixed_bar_cap - return the offset of the fixed BAR cap if found + * @bus: PCI bus + * @devfn: device in question + * + * Look for the fixed BAR cap on @bus and @devfn, returning its offset + * if found or 0 otherwise. + */ +static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn) +{ + int pos; + u32 pcie_cap = 0, cap_data; + + pos = PCIE_CAP_OFFSET; + + if (!raw_pci_ext_ops) + return 0; + + while (pos) { + if (raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, + devfn, pos, 4, &pcie_cap)) + return 0; + + if (PCI_EXT_CAP_ID(pcie_cap) == 0x0000 || + PCI_EXT_CAP_ID(pcie_cap) == 0xffff) + break; + + if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) { + raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, + devfn, pos + 4, 4, &cap_data); + if ((cap_data & 0xffff) == PCIE_VNDR_CAP_ID_FIXED_BAR) + return pos; + } + + pos = PCI_EXT_CAP_NEXT(pcie_cap); + } + + return 0; +} + +static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn, + int reg, int len, u32 val, int offset) +{ + u32 size; + unsigned int domain, busnum; + int bar = (reg - PCI_BASE_ADDRESS_0) >> 2; + + domain = pci_domain_nr(bus); + busnum = bus->number; + + if (val == ~0 && len == 4) { + unsigned long decode; + + raw_pci_ext_ops->read(domain, busnum, devfn, + offset + 8 + (bar * 4), 4, &size); + + /* Turn the size into a decode pattern for the sizing code */ + if (size) { + decode = size - 1; + decode |= decode >> 1; + decode |= decode >> 2; + decode |= decode >> 4; + decode |= decode >> 8; + decode |= decode >> 16; + decode++; + decode = ~(decode - 1); + } else { + decode = 0; + } + + /* + * If val is all ones, the core code is trying to size the reg, + * so update the mmconfig space with the real size. + * + * Note: this assumes the fixed size we got is a power of two. + */ + return raw_pci_ext_ops->write(domain, busnum, devfn, reg, 4, + decode); + } + + /* This is some other kind of BAR write, so just do it. */ + return raw_pci_ext_ops->write(domain, busnum, devfn, reg, len, val); +} + +/** + * type1_access_ok - check whether to use type 1 + * @bus: bus number + * @devfn: device & function in question + * + * If the bus is on a Lincroft chip and it exists, or is not on a Lincroft at + * all, the we can go ahead with any reads & writes. If it's on a Lincroft, + * but doesn't exist, avoid the access altogether to keep the chip from + * hanging. + */ +static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg) +{ + /* + * This is a workaround for A0 LNC bug where PCI status register does + * not have new CAP bit set. can not be written by SW either. + * + * PCI header type in real LNC indicates a single function device, this + * will prevent probing other devices under the same function in PCI + * shim. Therefore, use the header type in shim instead. + */ + if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE) + return false; + if (bus == 0 && (devfn == PCI_DEVFN(2, 0) + || devfn == PCI_DEVFN(0, 0) + || devfn == PCI_DEVFN(3, 0))) + return true; + return false; /* Langwell on others */ +} + +static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *value) +{ + if (type1_access_ok(bus->number, devfn, where)) + return pci_direct_conf1.read(pci_domain_nr(bus), bus->number, + devfn, where, size, value); + return raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, + devfn, where, size, value); +} + +static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 value) +{ + int offset; + + /* + * On MRST, there is no PCI ROM BAR, this will cause a subsequent read + * to ROM BAR return 0 then being ignored. + */ + if (where == PCI_ROM_ADDRESS) + return 0; + + /* + * Devices with fixed BARs need special handling: + * - BAR sizing code will save, write ~0, read size, restore + * - so writes to fixed BARs need special handling + * - other writes to fixed BAR devices should go through mmconfig + */ + offset = fixed_bar_cap(bus, devfn); + if (offset && + (where >= PCI_BASE_ADDRESS_0 && where <= PCI_BASE_ADDRESS_5)) { + return pci_device_update_fixed(bus, devfn, where, size, value, + offset); + } + + /* + * On Moorestown update both real & mmconfig space + * Note: early Lincroft silicon can't handle type 1 accesses to + * non-existent devices, so just eat the write in that case. + */ + if (type1_access_ok(bus->number, devfn, where)) + return pci_direct_conf1.write(pci_domain_nr(bus), bus->number, + devfn, where, size, value); + return raw_pci_ext_ops->write(pci_domain_nr(bus), bus->number, devfn, + where, size, value); +} + +static int intel_mid_pci_irq_enable(struct pci_dev *dev) +{ + int polarity; + + if (dev->irq_managed && dev->irq > 0) + return 0; + + if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) + polarity = 0; /* active high */ + else + polarity = 1; /* active low */ + + /* + * MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to + * IOAPIC RTE entries, so we just enable RTE for the device. + */ + if (mp_set_gsi_attr(dev->irq, 1, polarity, dev_to_node(&dev->dev))) + return -EBUSY; + if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC) < 0) + return -EBUSY; + + dev->irq_managed = 1; + + return 0; +} + +static void intel_mid_pci_irq_disable(struct pci_dev *dev) +{ + if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed && + dev->irq > 0) { + mp_unmap_irq(dev->irq); + dev->irq_managed = 0; + } +} + +struct pci_ops intel_mid_pci_ops = { + .read = pci_read, + .write = pci_write, +}; + +/** + * intel_mid_pci_init - installs intel_mid_pci_ops + * + * Moorestown has an interesting PCI implementation (see above). + * Called when the early platform detection installs it. + */ +int __init intel_mid_pci_init(void) +{ + pr_info("Intel MID platform detected, using MID PCI ops\n"); + pci_mmcfg_late_init(); + pcibios_enable_irq = intel_mid_pci_irq_enable; + pcibios_disable_irq = intel_mid_pci_irq_disable; + pci_root_ops = intel_mid_pci_ops; + pci_soc_mode = 1; + /* Continue with standard init */ + return 1; +} + +/* + * Langwell devices are not true PCI devices; they are not subject to 10 ms + * d3 to d0 delay required by PCI spec. + */ +static void pci_d3delay_fixup(struct pci_dev *dev) +{ + /* + * PCI fixups are effectively decided compile time. If we have a dual + * SoC/non-SoC kernel we don't want to mangle d3 on non-SoC devices. + */ + if (!pci_soc_mode) + return; + /* + * True PCI devices in Lincroft should allow type 1 access, the rest + * are Langwell fake PCI devices. + */ + if (type1_access_ok(dev->bus->number, dev->devfn, PCI_DEVICE_ID)) + return; + dev->d3_delay = 0; +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup); + +static void mrst_power_off_unused_dev(struct pci_dev *dev) +{ + pci_set_power_state(dev, PCI_D3hot); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev); + +/* + * Langwell devices reside at fixed offsets, don't try to move them. + */ +static void pci_fixed_bar_fixup(struct pci_dev *dev) +{ + unsigned long offset; + u32 size; + int i; + + if (!pci_soc_mode) + return; + + /* Must have extended configuration space */ + if (dev->cfg_size < PCIE_CAP_OFFSET + 4) + return; + + /* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */ + offset = fixed_bar_cap(dev->bus, dev->devfn); + if (!offset || PCI_DEVFN(2, 0) == dev->devfn || + PCI_DEVFN(2, 2) == dev->devfn) + return; + + for (i = 0; i < PCI_ROM_RESOURCE; i++) { + pci_read_config_dword(dev, offset + 8 + (i * 4), &size); + dev->resource[i].end = dev->resource[i].start + size - 1; + dev->resource[i].flags |= IORESOURCE_PCI_FIXED; + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixed_bar_fixup); diff --git a/kernel/arch/x86/pci/irq.c b/kernel/arch/x86/pci/irq.c new file mode 100644 index 000000000..5dc6ca5e1 --- /dev/null +++ b/kernel/arch/x86/pci/irq.c @@ -0,0 +1,1279 @@ +/* + * Low-Level PCI Support for PC -- Routing of Interrupts + * + * (c) 1999--2000 Martin Mares + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) +#define PIRQ_VERSION 0x0100 + +static int broken_hp_bios_irq9; +static int acer_tm360_irqrouting; + +static struct irq_routing_table *pirq_table; + +static int pirq_enable_irq(struct pci_dev *dev); +static void pirq_disable_irq(struct pci_dev *dev); + +/* + * Never use: 0, 1, 2 (timer, keyboard, and cascade) + * Avoid using: 13, 14 and 15 (FP error and IDE). + * Penalize: 3, 4, 6, 7, 12 (known ISA uses: serial, floppy, parallel and mouse) + */ +unsigned int pcibios_irq_mask = 0xfff8; + +static int pirq_penalty[16] = { + 1000000, 1000000, 1000000, 1000, 1000, 0, 1000, 1000, + 0, 0, 0, 0, 1000, 100000, 100000, 100000 +}; + +struct irq_router { + char *name; + u16 vendor, device; + int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq); + int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, + int new); +}; + +struct irq_router_handler { + u16 vendor; + int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device); +}; + +int (*pcibios_enable_irq)(struct pci_dev *dev) = pirq_enable_irq; +void (*pcibios_disable_irq)(struct pci_dev *dev) = pirq_disable_irq; + +/* + * Check passed address for the PCI IRQ Routing Table signature + * and perform checksum verification. + */ + +static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr) +{ + struct irq_routing_table *rt; + int i; + u8 sum; + + rt = (struct irq_routing_table *) addr; + if (rt->signature != PIRQ_SIGNATURE || + rt->version != PIRQ_VERSION || + rt->size % 16 || + rt->size < sizeof(struct irq_routing_table)) + return NULL; + sum = 0; + for (i = 0; i < rt->size; i++) + sum += addr[i]; + if (!sum) { + DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", + rt); + return rt; + } + return NULL; +} + + + +/* + * Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table. + */ + +static struct irq_routing_table * __init pirq_find_routing_table(void) +{ + u8 *addr; + struct irq_routing_table *rt; + + if (pirq_table_addr) { + rt = pirq_check_routing_table((u8 *) __va(pirq_table_addr)); + if (rt) + return rt; + printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n"); + } + for (addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) { + rt = pirq_check_routing_table(addr); + if (rt) + return rt; + } + return NULL; +} + +/* + * If we have a IRQ routing table, use it to search for peer host + * bridges. It's a gross hack, but since there are no other known + * ways how to get a list of buses, we have to go this way. + */ + +static void __init pirq_peer_trick(void) +{ + struct irq_routing_table *rt = pirq_table; + u8 busmap[256]; + int i; + struct irq_info *e; + + memset(busmap, 0, sizeof(busmap)); + for (i = 0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) { + e = &rt->slots[i]; +#ifdef DEBUG + { + int j; + DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot); + for (j = 0; j < 4; j++) + DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap); + DBG("\n"); + } +#endif + busmap[e->bus] = 1; + } + for (i = 1; i < 256; i++) { + if (!busmap[i] || pci_find_bus(0, i)) + continue; + pcibios_scan_root(i); + } + pcibios_last_bus = -1; +} + +/* + * Code for querying and setting of IRQ routes on various interrupt routers. + */ + +void eisa_set_level_irq(unsigned int irq) +{ + unsigned char mask = 1 << (irq & 7); + unsigned int port = 0x4d0 + (irq >> 3); + unsigned char val; + static u16 eisa_irq_mask; + + if (irq >= 16 || (1 << irq) & eisa_irq_mask) + return; + + eisa_irq_mask |= (1 << irq); + printk(KERN_DEBUG "PCI: setting IRQ %u as level-triggered\n", irq); + val = inb(port); + if (!(val & mask)) { + DBG(KERN_DEBUG " -> edge"); + outb(val | mask, port); + } +} + +/* + * Common IRQ routing practice: nibbles in config space, + * offset by some magic constant. + */ +static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr) +{ + u8 x; + unsigned reg = offset + (nr >> 1); + + pci_read_config_byte(router, reg, &x); + return (nr & 1) ? (x >> 4) : (x & 0xf); +} + +static void write_config_nybble(struct pci_dev *router, unsigned offset, + unsigned nr, unsigned int val) +{ + u8 x; + unsigned reg = offset + (nr >> 1); + + pci_read_config_byte(router, reg, &x); + x = (nr & 1) ? ((x & 0x0f) | (val << 4)) : ((x & 0xf0) | val); + pci_write_config_byte(router, reg, x); +} + +/* + * ALI pirq entries are damn ugly, and completely undocumented. + * This has been figured out from pirq tables, and it's not a pretty + * picture. + */ +static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + static const unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 }; + + WARN_ON_ONCE(pirq > 16); + return irqmap[read_config_nybble(router, 0x48, pirq-1)]; +} + +static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + static const unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 }; + unsigned int val = irqmap[irq]; + + WARN_ON_ONCE(pirq > 16); + if (val) { + write_config_nybble(router, 0x48, pirq-1, val); + return 1; + } + return 0; +} + +/* + * The Intel PIIX4 pirq rules are fairly simple: "pirq" is + * just a pointer to the config space. + */ +static int pirq_piix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + u8 x; + + pci_read_config_byte(router, pirq, &x); + return (x < 16) ? x : 0; +} + +static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + pci_write_config_byte(router, pirq, irq); + return 1; +} + +/* + * The VIA pirq rules are nibble-based, like ALI, + * but without the ugly irq number munging. + * However, PIRQD is in the upper instead of lower 4 bits. + */ +static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + return read_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq); +} + +static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + write_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq, irq); + return 1; +} + +/* + * The VIA pirq rules are nibble-based, like ALI, + * but without the ugly irq number munging. + * However, for 82C586, nibble map is different . + */ +static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 }; + + WARN_ON_ONCE(pirq > 5); + return read_config_nybble(router, 0x55, pirqmap[pirq-1]); +} + +static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 }; + + WARN_ON_ONCE(pirq > 5); + write_config_nybble(router, 0x55, pirqmap[pirq-1], irq); + return 1; +} + +/* + * ITE 8330G pirq rules are nibble-based + * FIXME: pirqmap may be { 1, 0, 3, 2 }, + * 2+3 are both mapped to irq 9 on my system + */ +static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; + + WARN_ON_ONCE(pirq > 4); + return read_config_nybble(router, 0x43, pirqmap[pirq-1]); +} + +static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; + + WARN_ON_ONCE(pirq > 4); + write_config_nybble(router, 0x43, pirqmap[pirq-1], irq); + return 1; +} + +/* + * OPTI: high four bits are nibble pointer.. + * I wonder what the low bits do? + */ +static int pirq_opti_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + return read_config_nybble(router, 0xb8, pirq >> 4); +} + +static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + write_config_nybble(router, 0xb8, pirq >> 4, irq); + return 1; +} + +/* + * Cyrix: nibble offset 0x5C + * 0x5C bits 7:4 is INTB bits 3:0 is INTA + * 0x5D bits 7:4 is INTD bits 3:0 is INTC + */ +static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + return read_config_nybble(router, 0x5C, (pirq-1)^1); +} + +static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + write_config_nybble(router, 0x5C, (pirq-1)^1, irq); + return 1; +} + +/* + * PIRQ routing for SiS 85C503 router used in several SiS chipsets. + * We have to deal with the following issues here: + * - vendors have different ideas about the meaning of link values + * - some onboard devices (integrated in the chipset) have special + * links and are thus routed differently (i.e. not via PCI INTA-INTD) + * - different revision of the router have a different layout for + * the routing registers, particularly for the onchip devices + * + * For all routing registers the common thing is we have one byte + * per routeable link which is defined as: + * bit 7 IRQ mapping enabled (0) or disabled (1) + * bits [6:4] reserved (sometimes used for onchip devices) + * bits [3:0] IRQ to map to + * allowed: 3-7, 9-12, 14-15 + * reserved: 0, 1, 2, 8, 13 + * + * The config-space registers located at 0x41/0x42/0x43/0x44 are + * always used to route the normal PCI INT A/B/C/D respectively. + * Apparently there are systems implementing PCI routing table using + * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D. + * We try our best to handle both link mappings. + * + * Currently (2003-05-21) it appears most SiS chipsets follow the + * definition of routing registers from the SiS-5595 southbridge. + * According to the SiS 5595 datasheets the revision id's of the + * router (ISA-bridge) should be 0x01 or 0xb0. + * + * Furthermore we've also seen lspci dumps with revision 0x00 and 0xb1. + * Looks like these are used in a number of SiS 5xx/6xx/7xx chipsets. + * They seem to work with the current routing code. However there is + * some concern because of the two USB-OHCI HCs (original SiS 5595 + * had only one). YMMV. + * + * Onchip routing for router rev-id 0x01/0xb0 and probably 0x00/0xb1: + * + * 0x61: IDEIRQ: + * bits [6:5] must be written 01 + * bit 4 channel-select primary (0), secondary (1) + * + * 0x62: USBIRQ: + * bit 6 OHCI function disabled (0), enabled (1) + * + * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved + * + * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved + * + * We support USBIRQ (in addition to INTA-INTD) and keep the + * IDE, ACPI and DAQ routing untouched as set by the BIOS. + * + * Currently the only reported exception is the new SiS 65x chipset + * which includes the SiS 69x southbridge. Here we have the 85C503 + * router revision 0x04 and there are changes in the register layout + * mostly related to the different USB HCs with USB 2.0 support. + * + * Onchip routing for router rev-id 0x04 (try-and-error observation) + * + * 0x60/0x61/0x62/0x63: 1xEHCI and 3xOHCI (companion) USB-HCs + * bit 6-4 are probably unused, not like 5595 + */ + +#define PIRQ_SIS_IRQ_MASK 0x0f +#define PIRQ_SIS_IRQ_DISABLE 0x80 +#define PIRQ_SIS_USB_ENABLE 0x40 + +static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + u8 x; + int reg; + + reg = pirq; + if (reg >= 0x01 && reg <= 0x04) + reg += 0x40; + pci_read_config_byte(router, reg, &x); + return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK); +} + +static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + u8 x; + int reg; + + reg = pirq; + if (reg >= 0x01 && reg <= 0x04) + reg += 0x40; + pci_read_config_byte(router, reg, &x); + x &= ~(PIRQ_SIS_IRQ_MASK | PIRQ_SIS_IRQ_DISABLE); + x |= irq ? irq: PIRQ_SIS_IRQ_DISABLE; + pci_write_config_byte(router, reg, x); + return 1; +} + + +/* + * VLSI: nibble offset 0x74 - educated guess due to routing table and + * config space of VLSI 82C534 PCI-bridge/router (1004:0102) + * Tested on HP OmniBook 800 covering PIRQ 1, 2, 4, 8 for onboard + * devices, PIRQ 3 for non-pci(!) soundchip and (untested) PIRQ 6 + * for the busbridge to the docking station. + */ + +static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + WARN_ON_ONCE(pirq >= 9); + if (pirq > 8) { + dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq); + return 0; + } + return read_config_nybble(router, 0x74, pirq-1); +} + +static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + WARN_ON_ONCE(pirq >= 9); + if (pirq > 8) { + dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq); + return 0; + } + write_config_nybble(router, 0x74, pirq-1, irq); + return 1; +} + +/* + * ServerWorks: PCI interrupts mapped to system IRQ lines through Index + * and Redirect I/O registers (0x0c00 and 0x0c01). The Index register + * format is (PCIIRQ## | 0x10), e.g.: PCIIRQ10=0x1a. The Redirect + * register is a straight binary coding of desired PIC IRQ (low nibble). + * + * The 'link' value in the PIRQ table is already in the correct format + * for the Index register. There are some special index values: + * 0x00 for ACPI (SCI), 0x01 for USB, 0x02 for IDE0, 0x04 for IDE1, + * and 0x03 for SMBus. + */ +static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + outb(pirq, 0xc00); + return inb(0xc01) & 0xf; +} + +static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, + int pirq, int irq) +{ + outb(pirq, 0xc00); + outb(irq, 0xc01); + return 1; +} + +/* Support for AMD756 PCI IRQ Routing + * Jhon H. Caicedo + * Jun/21/2001 0.2.0 Release, fixed to use "nybble" functions... (jhcaiced) + * Jun/19/2001 Alpha Release 0.1.0 (jhcaiced) + * The AMD756 pirq rules are nibble-based + * offset 0x56 0-3 PIRQA 4-7 PIRQB + * offset 0x57 0-3 PIRQC 4-7 PIRQD + */ +static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + u8 irq; + irq = 0; + if (pirq <= 4) + irq = read_config_nybble(router, 0x56, pirq - 1); + dev_info(&dev->dev, + "AMD756: dev [%04x:%04x], router PIRQ %d get IRQ %d\n", + dev->vendor, dev->device, pirq, irq); + return irq; +} + +static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + dev_info(&dev->dev, + "AMD756: dev [%04x:%04x], router PIRQ %d set IRQ %d\n", + dev->vendor, dev->device, pirq, irq); + if (pirq <= 4) + write_config_nybble(router, 0x56, pirq - 1, irq); + return 1; +} + +/* + * PicoPower PT86C523 + */ +static int pirq_pico_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + outb(0x10 + ((pirq - 1) >> 1), 0x24); + return ((pirq - 1) & 1) ? (inb(0x26) >> 4) : (inb(0x26) & 0xf); +} + +static int pirq_pico_set(struct pci_dev *router, struct pci_dev *dev, int pirq, + int irq) +{ + unsigned int x; + outb(0x10 + ((pirq - 1) >> 1), 0x24); + x = inb(0x26); + x = ((pirq - 1) & 1) ? ((x & 0x0f) | (irq << 4)) : ((x & 0xf0) | (irq)); + outb(x, 0x26); + return 1; +} + +#ifdef CONFIG_PCI_BIOS + +static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + struct pci_dev *bridge; + int pin = pci_get_interrupt_pin(dev, &bridge); + return pcibios_set_irq_routing(bridge, pin - 1, irq); +} + +#endif + +static __init int intel_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + static struct pci_device_id __initdata pirq_440gx[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2) }, + { }, + }; + + /* 440GX has a proprietary PIRQ router -- don't use it */ + if (pci_dev_present(pirq_440gx)) + return 0; + + switch (device) { + case PCI_DEVICE_ID_INTEL_82371FB_0: + case PCI_DEVICE_ID_INTEL_82371SB_0: + case PCI_DEVICE_ID_INTEL_82371AB_0: + case PCI_DEVICE_ID_INTEL_82371MX: + case PCI_DEVICE_ID_INTEL_82443MX_0: + case PCI_DEVICE_ID_INTEL_82801AA_0: + case PCI_DEVICE_ID_INTEL_82801AB_0: + case PCI_DEVICE_ID_INTEL_82801BA_0: + case PCI_DEVICE_ID_INTEL_82801BA_10: + case PCI_DEVICE_ID_INTEL_82801CA_0: + case PCI_DEVICE_ID_INTEL_82801CA_12: + case PCI_DEVICE_ID_INTEL_82801DB_0: + case PCI_DEVICE_ID_INTEL_82801E_0: + case PCI_DEVICE_ID_INTEL_82801EB_0: + case PCI_DEVICE_ID_INTEL_ESB_1: + case PCI_DEVICE_ID_INTEL_ICH6_0: + case PCI_DEVICE_ID_INTEL_ICH6_1: + case PCI_DEVICE_ID_INTEL_ICH7_0: + case PCI_DEVICE_ID_INTEL_ICH7_1: + case PCI_DEVICE_ID_INTEL_ICH7_30: + case PCI_DEVICE_ID_INTEL_ICH7_31: + case PCI_DEVICE_ID_INTEL_TGP_LPC: + case PCI_DEVICE_ID_INTEL_ESB2_0: + case PCI_DEVICE_ID_INTEL_ICH8_0: + case PCI_DEVICE_ID_INTEL_ICH8_1: + case PCI_DEVICE_ID_INTEL_ICH8_2: + case PCI_DEVICE_ID_INTEL_ICH8_3: + case PCI_DEVICE_ID_INTEL_ICH8_4: + case PCI_DEVICE_ID_INTEL_ICH9_0: + case PCI_DEVICE_ID_INTEL_ICH9_1: + case PCI_DEVICE_ID_INTEL_ICH9_2: + case PCI_DEVICE_ID_INTEL_ICH9_3: + case PCI_DEVICE_ID_INTEL_ICH9_4: + case PCI_DEVICE_ID_INTEL_ICH9_5: + case PCI_DEVICE_ID_INTEL_EP80579_0: + case PCI_DEVICE_ID_INTEL_ICH10_0: + case PCI_DEVICE_ID_INTEL_ICH10_1: + case PCI_DEVICE_ID_INTEL_ICH10_2: + case PCI_DEVICE_ID_INTEL_ICH10_3: + case PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0: + case PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1: + r->name = "PIIX/ICH"; + r->get = pirq_piix_get; + r->set = pirq_piix_set; + return 1; + } + + if ((device >= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN && + device <= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX) + || (device >= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN && + device <= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX) + || (device >= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN && + device <= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX) + || (device >= PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN && + device <= PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX)) { + r->name = "PIIX/ICH"; + r->get = pirq_piix_get; + r->set = pirq_piix_set; + return 1; + } + + return 0; +} + +static __init int via_router_probe(struct irq_router *r, + struct pci_dev *router, u16 device) +{ + /* FIXME: We should move some of the quirk fixup stuff here */ + + /* + * workarounds for some buggy BIOSes + */ + if (device == PCI_DEVICE_ID_VIA_82C586_0) { + switch (router->device) { + case PCI_DEVICE_ID_VIA_82C686: + /* + * Asus k7m bios wrongly reports 82C686A + * as 586-compatible + */ + device = PCI_DEVICE_ID_VIA_82C686; + break; + case PCI_DEVICE_ID_VIA_8235: + /** + * Asus a7v-x bios wrongly reports 8235 + * as 586-compatible + */ + device = PCI_DEVICE_ID_VIA_8235; + break; + case PCI_DEVICE_ID_VIA_8237: + /** + * Asus a7v600 bios wrongly reports 8237 + * as 586-compatible + */ + device = PCI_DEVICE_ID_VIA_8237; + break; + } + } + + switch (device) { + case PCI_DEVICE_ID_VIA_82C586_0: + r->name = "VIA"; + r->get = pirq_via586_get; + r->set = pirq_via586_set; + return 1; + case PCI_DEVICE_ID_VIA_82C596: + case PCI_DEVICE_ID_VIA_82C686: + case PCI_DEVICE_ID_VIA_8231: + case PCI_DEVICE_ID_VIA_8233A: + case PCI_DEVICE_ID_VIA_8235: + case PCI_DEVICE_ID_VIA_8237: + /* FIXME: add new ones for 8233/5 */ + r->name = "VIA"; + r->get = pirq_via_get; + r->set = pirq_via_set; + return 1; + } + return 0; +} + +static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch (device) { + case PCI_DEVICE_ID_VLSI_82C534: + r->name = "VLSI 82C534"; + r->get = pirq_vlsi_get; + r->set = pirq_vlsi_set; + return 1; + } + return 0; +} + + +static __init int serverworks_router_probe(struct irq_router *r, + struct pci_dev *router, u16 device) +{ + switch (device) { + case PCI_DEVICE_ID_SERVERWORKS_OSB4: + case PCI_DEVICE_ID_SERVERWORKS_CSB5: + r->name = "ServerWorks"; + r->get = pirq_serverworks_get; + r->set = pirq_serverworks_set; + return 1; + } + return 0; +} + +static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + if (device != PCI_DEVICE_ID_SI_503) + return 0; + + r->name = "SIS"; + r->get = pirq_sis_get; + r->set = pirq_sis_set; + return 1; +} + +static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch (device) { + case PCI_DEVICE_ID_CYRIX_5520: + r->name = "NatSemi"; + r->get = pirq_cyrix_get; + r->set = pirq_cyrix_set; + return 1; + } + return 0; +} + +static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch (device) { + case PCI_DEVICE_ID_OPTI_82C700: + r->name = "OPTI"; + r->get = pirq_opti_get; + r->set = pirq_opti_set; + return 1; + } + return 0; +} + +static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch (device) { + case PCI_DEVICE_ID_ITE_IT8330G_0: + r->name = "ITE"; + r->get = pirq_ite_get; + r->set = pirq_ite_set; + return 1; + } + return 0; +} + +static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch (device) { + case PCI_DEVICE_ID_AL_M1533: + case PCI_DEVICE_ID_AL_M1563: + r->name = "ALI"; + r->get = pirq_ali_get; + r->set = pirq_ali_set; + return 1; + } + return 0; +} + +static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch (device) { + case PCI_DEVICE_ID_AMD_VIPER_740B: + r->name = "AMD756"; + break; + case PCI_DEVICE_ID_AMD_VIPER_7413: + r->name = "AMD766"; + break; + case PCI_DEVICE_ID_AMD_VIPER_7443: + r->name = "AMD768"; + break; + default: + return 0; + } + r->get = pirq_amd756_get; + r->set = pirq_amd756_set; + return 1; +} + +static __init int pico_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch (device) { + case PCI_DEVICE_ID_PICOPOWER_PT86C523: + r->name = "PicoPower PT86C523"; + r->get = pirq_pico_get; + r->set = pirq_pico_set; + return 1; + + case PCI_DEVICE_ID_PICOPOWER_PT86C523BBP: + r->name = "PicoPower PT86C523 rev. BB+"; + r->get = pirq_pico_get; + r->set = pirq_pico_set; + return 1; + } + return 0; +} + +static __initdata struct irq_router_handler pirq_routers[] = { + { PCI_VENDOR_ID_INTEL, intel_router_probe }, + { PCI_VENDOR_ID_AL, ali_router_probe }, + { PCI_VENDOR_ID_ITE, ite_router_probe }, + { PCI_VENDOR_ID_VIA, via_router_probe }, + { PCI_VENDOR_ID_OPTI, opti_router_probe }, + { PCI_VENDOR_ID_SI, sis_router_probe }, + { PCI_VENDOR_ID_CYRIX, cyrix_router_probe }, + { PCI_VENDOR_ID_VLSI, vlsi_router_probe }, + { PCI_VENDOR_ID_SERVERWORKS, serverworks_router_probe }, + { PCI_VENDOR_ID_AMD, amd_router_probe }, + { PCI_VENDOR_ID_PICOPOWER, pico_router_probe }, + /* Someone with docs needs to add the ATI Radeon IGP */ + { 0, NULL } +}; +static struct irq_router pirq_router; +static struct pci_dev *pirq_router_dev; + + +/* + * FIXME: should we have an option to say "generic for + * chipset" ? + */ + +static void __init pirq_find_router(struct irq_router *r) +{ + struct irq_routing_table *rt = pirq_table; + struct irq_router_handler *h; + +#ifdef CONFIG_PCI_BIOS + if (!rt->signature) { + printk(KERN_INFO "PCI: Using BIOS for IRQ routing\n"); + r->set = pirq_bios_set; + r->name = "BIOS"; + return; + } +#endif + + /* Default unless a driver reloads it */ + r->name = "default"; + r->get = NULL; + r->set = NULL; + + DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for [%04x:%04x]\n", + rt->rtr_vendor, rt->rtr_device); + + pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn); + if (!pirq_router_dev) { + DBG(KERN_DEBUG "PCI: Interrupt router not found at " + "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn); + return; + } + + for (h = pirq_routers; h->vendor; h++) { + /* First look for a router match */ + if (rt->rtr_vendor == h->vendor && + h->probe(r, pirq_router_dev, rt->rtr_device)) + break; + /* Fall back to a device match */ + if (pirq_router_dev->vendor == h->vendor && + h->probe(r, pirq_router_dev, pirq_router_dev->device)) + break; + } + dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x:%04x]\n", + pirq_router.name, + pirq_router_dev->vendor, pirq_router_dev->device); + + /* The device remains referenced for the kernel lifetime */ +} + +static struct irq_info *pirq_get_info(struct pci_dev *dev) +{ + struct irq_routing_table *rt = pirq_table; + int entries = (rt->size - sizeof(struct irq_routing_table)) / + sizeof(struct irq_info); + struct irq_info *info; + + for (info = rt->slots; entries--; info++) + if (info->bus == dev->bus->number && + PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn)) + return info; + return NULL; +} + +static int pcibios_lookup_irq(struct pci_dev *dev, int assign) +{ + u8 pin; + struct irq_info *info; + int i, pirq, newirq; + int irq = 0; + u32 mask; + struct irq_router *r = &pirq_router; + struct pci_dev *dev2 = NULL; + char *msg = NULL; + + /* Find IRQ pin */ + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + if (!pin) { + dev_dbg(&dev->dev, "no interrupt pin\n"); + return 0; + } + + if (io_apic_assign_pci_irqs) + return 0; + + /* Find IRQ routing entry */ + + if (!pirq_table) + return 0; + + info = pirq_get_info(dev); + if (!info) { + dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n", + 'A' + pin - 1); + return 0; + } + pirq = info->irq[pin - 1].link; + mask = info->irq[pin - 1].bitmap; + if (!pirq) { + dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin - 1); + return 0; + } + dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x", + 'A' + pin - 1, pirq, mask, pirq_table->exclusive_irqs); + mask &= pcibios_irq_mask; + + /* Work around broken HP Pavilion Notebooks which assign USB to + IRQ 9 even though it is actually wired to IRQ 11 */ + + if (broken_hp_bios_irq9 && pirq == 0x59 && dev->irq == 9) { + dev->irq = 11; + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 11); + r->set(pirq_router_dev, dev, pirq, 11); + } + + /* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */ + if (acer_tm360_irqrouting && dev->irq == 11 && + dev->vendor == PCI_VENDOR_ID_O2) { + pirq = 0x68; + mask = 0x400; + dev->irq = r->get(pirq_router_dev, dev, pirq); + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); + } + + /* + * Find the best IRQ to assign: use the one + * reported by the device if possible. + */ + newirq = dev->irq; + if (newirq && !((1 << newirq) & mask)) { + if (pci_probe & PCI_USE_PIRQ_MASK) + newirq = 0; + else + dev_warn(&dev->dev, "IRQ %d doesn't match PIRQ mask " + "%#x; try pci=usepirqmask\n", newirq, mask); + } + if (!newirq && assign) { + for (i = 0; i < 16; i++) { + if (!(mask & (1 << i))) + continue; + if (pirq_penalty[i] < pirq_penalty[newirq] && + can_request_irq(i, IRQF_SHARED)) + newirq = i; + } + } + dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin - 1, newirq); + + /* Check if it is hardcoded */ + if ((pirq & 0xf0) == 0xf0) { + irq = pirq & 0xf; + msg = "hardcoded"; + } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ + ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { + msg = "found"; + eisa_set_level_irq(irq); + } else if (newirq && r->set && + (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { + if (r->set(pirq_router_dev, dev, pirq, newirq)) { + eisa_set_level_irq(newirq); + msg = "assigned"; + irq = newirq; + } + } + + if (!irq) { + if (newirq && mask == (1 << newirq)) { + msg = "guessed"; + irq = newirq; + } else { + dev_dbg(&dev->dev, "can't route interrupt\n"); + return 0; + } + } + dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin - 1, irq); + + /* Update IRQ for all devices with the same pirq value */ + for_each_pci_dev(dev2) { + pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin); + if (!pin) + continue; + + info = pirq_get_info(dev2); + if (!info) + continue; + if (info->irq[pin - 1].link == pirq) { + /* + * We refuse to override the dev->irq + * information. Give a warning! + */ + if (dev2->irq && dev2->irq != irq && \ + (!(pci_probe & PCI_USE_PIRQ_MASK) || \ + ((1 << dev2->irq) & mask))) { +#ifndef CONFIG_PCI_MSI + dev_info(&dev2->dev, "IRQ routing conflict: " + "have IRQ %d, want IRQ %d\n", + dev2->irq, irq); +#endif + continue; + } + dev2->irq = irq; + pirq_penalty[irq]++; + if (dev != dev2) + dev_info(&dev->dev, "sharing IRQ %d with %s\n", + irq, pci_name(dev2)); + } + } + return 1; +} + +void __init pcibios_fixup_irqs(void) +{ + struct pci_dev *dev = NULL; + u8 pin; + + DBG(KERN_DEBUG "PCI: IRQ fixup\n"); + for_each_pci_dev(dev) { + /* + * If the BIOS has set an out of range IRQ number, just + * ignore it. Also keep track of which IRQ's are + * already in use. + */ + if (dev->irq >= 16) { + dev_dbg(&dev->dev, "ignoring bogus IRQ %d\n", dev->irq); + dev->irq = 0; + } + /* + * If the IRQ is already assigned to a PCI device, + * ignore its ISA use penalty + */ + if (pirq_penalty[dev->irq] >= 100 && + pirq_penalty[dev->irq] < 100000) + pirq_penalty[dev->irq] = 0; + pirq_penalty[dev->irq]++; + } + + if (io_apic_assign_pci_irqs) + return; + + dev = NULL; + for_each_pci_dev(dev) { + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + if (!pin) + continue; + + /* + * Still no IRQ? Try to lookup one... + */ + if (!dev->irq) + pcibios_lookup_irq(dev, 0); + } +} + +/* + * Work around broken HP Pavilion Notebooks which assign USB to + * IRQ 9 even though it is actually wired to IRQ 11 + */ +static int __init fix_broken_hp_bios_irq9(const struct dmi_system_id *d) +{ + if (!broken_hp_bios_irq9) { + broken_hp_bios_irq9 = 1; + printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", + d->ident); + } + return 0; +} + +/* + * Work around broken Acer TravelMate 360 Notebooks which assign + * Cardbus to IRQ 11 even though it is actually wired to IRQ 10 + */ +static int __init fix_acer_tm360_irqrouting(const struct dmi_system_id *d) +{ + if (!acer_tm360_irqrouting) { + acer_tm360_irqrouting = 1; + printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", + d->ident); + } + return 0; +} + +static struct dmi_system_id __initdata pciirq_dmi_table[] = { + { + .callback = fix_broken_hp_bios_irq9, + .ident = "HP Pavilion N5400 Series Laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"), + DMI_MATCH(DMI_PRODUCT_VERSION, + "HP Pavilion Notebook Model GE"), + DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"), + }, + }, + { + .callback = fix_acer_tm360_irqrouting, + .ident = "Acer TravelMate 36x Laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), + }, + }, + { } +}; + +void __init pcibios_irq_init(void) +{ + DBG(KERN_DEBUG "PCI: IRQ init\n"); + + if (raw_pci_ops == NULL) + return; + + dmi_check_system(pciirq_dmi_table); + + pirq_table = pirq_find_routing_table(); + +#ifdef CONFIG_PCI_BIOS + if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN)) + pirq_table = pcibios_get_irq_routing_table(); +#endif + if (pirq_table) { + pirq_peer_trick(); + pirq_find_router(&pirq_router); + if (pirq_table->exclusive_irqs) { + int i; + for (i = 0; i < 16; i++) + if (!(pirq_table->exclusive_irqs & (1 << i))) + pirq_penalty[i] += 100; + } + /* + * If we're using the I/O APIC, avoid using the PCI IRQ + * routing table + */ + if (io_apic_assign_pci_irqs) + pirq_table = NULL; + } + + x86_init.pci.fixup_irqs(); + + if (io_apic_assign_pci_irqs && pci_routeirq) { + struct pci_dev *dev = NULL; + /* + * PCI IRQ routing is set up by pci_enable_device(), but we + * also do it here in case there are still broken drivers that + * don't use pci_enable_device(). + */ + printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n"); + for_each_pci_dev(dev) + pirq_enable_irq(dev); + } +} + +static void pirq_penalize_isa_irq(int irq, int active) +{ + /* + * If any ISAPnP device reports an IRQ in its list of possible + * IRQ's, we try to avoid assigning it to PCI devices. + */ + if (irq < 16) { + if (active) + pirq_penalty[irq] += 1000; + else + pirq_penalty[irq] += 100; + } +} + +void pcibios_penalize_isa_irq(int irq, int active) +{ +#ifdef CONFIG_ACPI + if (!acpi_noirq) + acpi_penalize_isa_irq(irq, active); + else +#endif + pirq_penalize_isa_irq(irq, active); +} + +static int pirq_enable_irq(struct pci_dev *dev) +{ + u8 pin = 0; + + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + if (pin && !pcibios_lookup_irq(dev, 1)) { + char *msg = ""; + + if (!io_apic_assign_pci_irqs && dev->irq) + return 0; + + if (io_apic_assign_pci_irqs) { +#ifdef CONFIG_X86_IO_APIC + struct pci_dev *temp_dev; + int irq; + + if (dev->irq_managed && dev->irq > 0) + return 0; + + irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, + PCI_SLOT(dev->devfn), pin - 1); + /* + * Busses behind bridges are typically not listed in the MP-table. + * In this case we have to look up the IRQ based on the parent bus, + * parent slot, and pin number. The SMP code detects such bridged + * busses itself so we should get into this branch reliably. + */ + temp_dev = dev; + while (irq < 0 && dev->bus->parent) { /* go back to the bridge */ + struct pci_dev *bridge = dev->bus->self; + + pin = pci_swizzle_interrupt_pin(dev, pin); + irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, + PCI_SLOT(bridge->devfn), + pin - 1); + if (irq >= 0) + dev_warn(&dev->dev, "using bridge %s " + "INT %c to get IRQ %d\n", + pci_name(bridge), 'A' + pin - 1, + irq); + dev = bridge; + } + dev = temp_dev; + if (irq >= 0) { + dev->irq_managed = 1; + dev->irq = irq; + dev_info(&dev->dev, "PCI->APIC IRQ transform: " + "INT %c -> IRQ %d\n", 'A' + pin - 1, irq); + return 0; + } else + msg = "; probably buggy MP table"; +#endif + } else if (pci_probe & PCI_BIOS_IRQ_SCAN) + msg = ""; + else + msg = "; please try using pci=biosirq"; + + /* + * With IDE legacy devices the IRQ lookup failure is not + * a problem.. + */ + if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && + !(dev->class & 0x5)) + return 0; + + dev_warn(&dev->dev, "can't find IRQ for PCI INT %c%s\n", + 'A' + pin - 1, msg); + } + return 0; +} + +bool mp_should_keep_irq(struct device *dev) +{ + if (dev->power.is_prepared) + return true; +#ifdef CONFIG_PM + if (dev->power.runtime_status == RPM_SUSPENDING) + return true; +#endif + + return false; +} + +static void pirq_disable_irq(struct pci_dev *dev) +{ + if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) && + dev->irq_managed && dev->irq) { + mp_unmap_irq(dev->irq); + dev->irq = 0; + dev->irq_managed = 0; + } +} diff --git a/kernel/arch/x86/pci/legacy.c b/kernel/arch/x86/pci/legacy.c new file mode 100644 index 000000000..5b662c0fa --- /dev/null +++ b/kernel/arch/x86/pci/legacy.c @@ -0,0 +1,72 @@ +/* + * legacy.c - traditional, old school PCI bus probing + */ +#include +#include +#include +#include + +/* + * Discover remaining PCI buses in case there are peer host bridges. + * We use the number of last PCI bus provided by the PCI BIOS. + */ +static void pcibios_fixup_peer_bridges(void) +{ + int n; + + if (pcibios_last_bus <= 0 || pcibios_last_bus > 0xff) + return; + DBG("PCI: Peer bridge fixup\n"); + + for (n=0; n <= pcibios_last_bus; n++) + pcibios_scan_specific_bus(n); +} + +int __init pci_legacy_init(void) +{ + if (!raw_pci_ops) { + printk("PCI: System does not support PCI\n"); + return 0; + } + + printk("PCI: Probing PCI hardware\n"); + pcibios_scan_root(0); + return 0; +} + +void pcibios_scan_specific_bus(int busn) +{ + int devfn; + u32 l; + + if (pci_find_bus(0, busn)) + return; + + for (devfn = 0; devfn < 256; devfn += 8) { + if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, &l) && + l != 0x0000 && l != 0xffff) { + DBG("Found device at %02x:%02x [%04x]\n", busn, devfn, l); + printk(KERN_INFO "PCI: Discovered peer bus %02x\n", busn); + pcibios_scan_root(busn); + return; + } + } +} +EXPORT_SYMBOL_GPL(pcibios_scan_specific_bus); + +int __init pci_subsys_init(void) +{ + /* + * The init function returns an non zero value when + * pci_legacy_init should be invoked. + */ + if (x86_init.pci.init()) + pci_legacy_init(); + + pcibios_fixup_peer_bridges(); + x86_init.pci.init_irq(); + pcibios_init(); + + return 0; +} +subsys_initcall(pci_subsys_init); diff --git a/kernel/arch/x86/pci/mmconfig-shared.c b/kernel/arch/x86/pci/mmconfig-shared.c new file mode 100644 index 000000000..dd30b7e08 --- /dev/null +++ b/kernel/arch/x86/pci/mmconfig-shared.c @@ -0,0 +1,818 @@ +/* + * mmconfig-shared.c - Low-level direct PCI config space access via + * MMCONFIG - common code between i386 and x86-64. + * + * This code does: + * - known chipset handling + * - ACPI decoding and validation + * + * Per-architecture code takes care of the mappings and accesses + * themselves. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PREFIX "PCI: " + +/* Indicate if the mmcfg resources have been placed into the resource table. */ +static bool pci_mmcfg_running_state; +static bool pci_mmcfg_arch_init_failed; +static DEFINE_MUTEX(pci_mmcfg_lock); + +LIST_HEAD(pci_mmcfg_list); + +static void __init pci_mmconfig_remove(struct pci_mmcfg_region *cfg) +{ + if (cfg->res.parent) + release_resource(&cfg->res); + list_del(&cfg->list); + kfree(cfg); +} + +static void __init free_all_mmcfg(void) +{ + struct pci_mmcfg_region *cfg, *tmp; + + pci_mmcfg_arch_free(); + list_for_each_entry_safe(cfg, tmp, &pci_mmcfg_list, list) + pci_mmconfig_remove(cfg); +} + +static void list_add_sorted(struct pci_mmcfg_region *new) +{ + struct pci_mmcfg_region *cfg; + + /* keep list sorted by segment and starting bus number */ + list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) { + if (cfg->segment > new->segment || + (cfg->segment == new->segment && + cfg->start_bus >= new->start_bus)) { + list_add_tail_rcu(&new->list, &cfg->list); + return; + } + } + list_add_tail_rcu(&new->list, &pci_mmcfg_list); +} + +static struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, int start, + int end, u64 addr) +{ + struct pci_mmcfg_region *new; + struct resource *res; + + if (addr == 0) + return NULL; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return NULL; + + new->address = addr; + new->segment = segment; + new->start_bus = start; + new->end_bus = end; + + res = &new->res; + res->start = addr + PCI_MMCFG_BUS_OFFSET(start); + res->end = addr + PCI_MMCFG_BUS_OFFSET(end + 1) - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + snprintf(new->name, PCI_MMCFG_RESOURCE_NAME_LEN, + "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end); + res->name = new->name; + + return new; +} + +static struct pci_mmcfg_region *__init pci_mmconfig_add(int segment, int start, + int end, u64 addr) +{ + struct pci_mmcfg_region *new; + + new = pci_mmconfig_alloc(segment, start, end, addr); + if (new) { + mutex_lock(&pci_mmcfg_lock); + list_add_sorted(new); + mutex_unlock(&pci_mmcfg_lock); + + pr_info(PREFIX + "MMCONFIG for domain %04x [bus %02x-%02x] at %pR " + "(base %#lx)\n", + segment, start, end, &new->res, (unsigned long)addr); + } + + return new; +} + +struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus) +{ + struct pci_mmcfg_region *cfg; + + list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) + if (cfg->segment == segment && + cfg->start_bus <= bus && bus <= cfg->end_bus) + return cfg; + + return NULL; +} + +static const char *__init pci_mmcfg_e7520(void) +{ + u32 win; + raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0xce, 2, &win); + + win = win & 0xf000; + if (win == 0x0000 || win == 0xf000) + return NULL; + + if (pci_mmconfig_add(0, 0, 255, win << 16) == NULL) + return NULL; + + return "Intel Corporation E7520 Memory Controller Hub"; +} + +static const char *__init pci_mmcfg_intel_945(void) +{ + u32 pciexbar, mask = 0, len = 0; + + raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0x48, 4, &pciexbar); + + /* Enable bit */ + if (!(pciexbar & 1)) + return NULL; + + /* Size bits */ + switch ((pciexbar >> 1) & 3) { + case 0: + mask = 0xf0000000U; + len = 0x10000000U; + break; + case 1: + mask = 0xf8000000U; + len = 0x08000000U; + break; + case 2: + mask = 0xfc000000U; + len = 0x04000000U; + break; + default: + return NULL; + } + + /* Errata #2, things break when not aligned on a 256Mb boundary */ + /* Can only happen in 64M/128M mode */ + + if ((pciexbar & mask) & 0x0fffffffU) + return NULL; + + /* Don't hit the APIC registers and their friends */ + if ((pciexbar & mask) >= 0xf0000000U) + return NULL; + + if (pci_mmconfig_add(0, 0, (len >> 20) - 1, pciexbar & mask) == NULL) + return NULL; + + return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub"; +} + +static const char *__init pci_mmcfg_amd_fam10h(void) +{ + u32 low, high, address; + u64 base, msr; + int i; + unsigned segnbits = 0, busnbits, end_bus; + + if (!(pci_probe & PCI_CHECK_ENABLE_AMD_MMCONF)) + return NULL; + + address = MSR_FAM10H_MMIO_CONF_BASE; + if (rdmsr_safe(address, &low, &high)) + return NULL; + + msr = high; + msr <<= 32; + msr |= low; + + /* mmconfig is not enable */ + if (!(msr & FAM10H_MMIO_CONF_ENABLE)) + return NULL; + + base = msr & (FAM10H_MMIO_CONF_BASE_MASK<> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & + FAM10H_MMIO_CONF_BUSRANGE_MASK; + + /* + * only handle bus 0 ? + * need to skip it + */ + if (!busnbits) + return NULL; + + if (busnbits > 8) { + segnbits = busnbits - 8; + busnbits = 8; + } + + end_bus = (1 << busnbits) - 1; + for (i = 0; i < (1 << segnbits); i++) + if (pci_mmconfig_add(i, 0, end_bus, + base + (1<<28) * i) == NULL) { + free_all_mmcfg(); + return NULL; + } + + return "AMD Family 10h NB"; +} + +static bool __initdata mcp55_checked; +static const char *__init pci_mmcfg_nvidia_mcp55(void) +{ + int bus; + int mcp55_mmconf_found = 0; + + static const u32 extcfg_regnum __initconst = 0x90; + static const u32 extcfg_regsize __initconst = 4; + static const u32 extcfg_enable_mask __initconst = 1 << 31; + static const u32 extcfg_start_mask __initconst = 0xff << 16; + static const int extcfg_start_shift __initconst = 16; + static const u32 extcfg_size_mask __initconst = 0x3 << 28; + static const int extcfg_size_shift __initconst = 28; + static const int extcfg_sizebus[] __initconst = { + 0x100, 0x80, 0x40, 0x20 + }; + static const u32 extcfg_base_mask[] __initconst = { + 0x7ff8, 0x7ffc, 0x7ffe, 0x7fff + }; + static const int extcfg_base_lshift __initconst = 25; + + /* + * do check if amd fam10h already took over + */ + if (!acpi_disabled || !list_empty(&pci_mmcfg_list) || mcp55_checked) + return NULL; + + mcp55_checked = true; + for (bus = 0; bus < 256; bus++) { + u64 base; + u32 l, extcfg; + u16 vendor, device; + int start, size_index, end; + + raw_pci_ops->read(0, bus, PCI_DEVFN(0, 0), 0, 4, &l); + vendor = l & 0xffff; + device = (l >> 16) & 0xffff; + + if (PCI_VENDOR_ID_NVIDIA != vendor || 0x0369 != device) + continue; + + raw_pci_ops->read(0, bus, PCI_DEVFN(0, 0), extcfg_regnum, + extcfg_regsize, &extcfg); + + if (!(extcfg & extcfg_enable_mask)) + continue; + + size_index = (extcfg & extcfg_size_mask) >> extcfg_size_shift; + base = extcfg & extcfg_base_mask[size_index]; + /* base could > 4G */ + base <<= extcfg_base_lshift; + start = (extcfg & extcfg_start_mask) >> extcfg_start_shift; + end = start + extcfg_sizebus[size_index] - 1; + if (pci_mmconfig_add(0, start, end, base) == NULL) + continue; + mcp55_mmconf_found++; + } + + if (!mcp55_mmconf_found) + return NULL; + + return "nVidia MCP55"; +} + +struct pci_mmcfg_hostbridge_probe { + u32 bus; + u32 devfn; + u32 vendor; + u32 device; + const char *(*probe)(void); +}; + +static const struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initconst = { + { 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_E7520_MCH, pci_mmcfg_e7520 }, + { 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82945G_HB, pci_mmcfg_intel_945 }, + { 0, PCI_DEVFN(0x18, 0), PCI_VENDOR_ID_AMD, + 0x1200, pci_mmcfg_amd_fam10h }, + { 0xff, PCI_DEVFN(0, 0), PCI_VENDOR_ID_AMD, + 0x1200, pci_mmcfg_amd_fam10h }, + { 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_NVIDIA, + 0x0369, pci_mmcfg_nvidia_mcp55 }, +}; + +static void __init pci_mmcfg_check_end_bus_number(void) +{ + struct pci_mmcfg_region *cfg, *cfgx; + + /* Fixup overlaps */ + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + if (cfg->end_bus < cfg->start_bus) + cfg->end_bus = 255; + + /* Don't access the list head ! */ + if (cfg->list.next == &pci_mmcfg_list) + break; + + cfgx = list_entry(cfg->list.next, typeof(*cfg), list); + if (cfg->end_bus >= cfgx->start_bus) + cfg->end_bus = cfgx->start_bus - 1; + } +} + +static int __init pci_mmcfg_check_hostbridge(void) +{ + u32 l; + u32 bus, devfn; + u16 vendor, device; + int i; + const char *name; + + if (!raw_pci_ops) + return 0; + + free_all_mmcfg(); + + for (i = 0; i < ARRAY_SIZE(pci_mmcfg_probes); i++) { + bus = pci_mmcfg_probes[i].bus; + devfn = pci_mmcfg_probes[i].devfn; + raw_pci_ops->read(0, bus, devfn, 0, 4, &l); + vendor = l & 0xffff; + device = (l >> 16) & 0xffff; + + name = NULL; + if (pci_mmcfg_probes[i].vendor == vendor && + pci_mmcfg_probes[i].device == device) + name = pci_mmcfg_probes[i].probe(); + + if (name) + pr_info(PREFIX "%s with MMCONFIG support\n", name); + } + + /* some end_bus_number is crazy, fix it */ + pci_mmcfg_check_end_bus_number(); + + return !list_empty(&pci_mmcfg_list); +} + +static acpi_status check_mcfg_resource(struct acpi_resource *res, void *data) +{ + struct resource *mcfg_res = data; + struct acpi_resource_address64 address; + acpi_status status; + + if (res->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) { + struct acpi_resource_fixed_memory32 *fixmem32 = + &res->data.fixed_memory32; + if (!fixmem32) + return AE_OK; + if ((mcfg_res->start >= fixmem32->address) && + (mcfg_res->end < (fixmem32->address + + fixmem32->address_length))) { + mcfg_res->flags = 1; + return AE_CTRL_TERMINATE; + } + } + if ((res->type != ACPI_RESOURCE_TYPE_ADDRESS32) && + (res->type != ACPI_RESOURCE_TYPE_ADDRESS64)) + return AE_OK; + + status = acpi_resource_to_address64(res, &address); + if (ACPI_FAILURE(status) || + (address.address.address_length <= 0) || + (address.resource_type != ACPI_MEMORY_RANGE)) + return AE_OK; + + if ((mcfg_res->start >= address.address.minimum) && + (mcfg_res->end < (address.address.minimum + address.address.address_length))) { + mcfg_res->flags = 1; + return AE_CTRL_TERMINATE; + } + return AE_OK; +} + +static acpi_status find_mboard_resource(acpi_handle handle, u32 lvl, + void *context, void **rv) +{ + struct resource *mcfg_res = context; + + acpi_walk_resources(handle, METHOD_NAME__CRS, + check_mcfg_resource, context); + + if (mcfg_res->flags) + return AE_CTRL_TERMINATE; + + return AE_OK; +} + +static int is_acpi_reserved(u64 start, u64 end, unsigned not_used) +{ + struct resource mcfg_res; + + mcfg_res.start = start; + mcfg_res.end = end - 1; + mcfg_res.flags = 0; + + acpi_get_devices("PNP0C01", find_mboard_resource, &mcfg_res, NULL); + + if (!mcfg_res.flags) + acpi_get_devices("PNP0C02", find_mboard_resource, &mcfg_res, + NULL); + + return mcfg_res.flags; +} + +typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); + +static int __ref is_mmconf_reserved(check_reserved_t is_reserved, + struct pci_mmcfg_region *cfg, + struct device *dev, int with_e820) +{ + u64 addr = cfg->res.start; + u64 size = resource_size(&cfg->res); + u64 old_size = size; + int num_buses; + char *method = with_e820 ? "E820" : "ACPI motherboard resources"; + + while (!is_reserved(addr, addr + size, E820_RESERVED)) { + size >>= 1; + if (size < (16UL<<20)) + break; + } + + if (size < (16UL<<20) && size != old_size) + return 0; + + if (dev) + dev_info(dev, "MMCONFIG at %pR reserved in %s\n", + &cfg->res, method); + else + pr_info(PREFIX "MMCONFIG at %pR reserved in %s\n", + &cfg->res, method); + + if (old_size != size) { + /* update end_bus */ + cfg->end_bus = cfg->start_bus + ((size>>20) - 1); + num_buses = cfg->end_bus - cfg->start_bus + 1; + cfg->res.end = cfg->res.start + + PCI_MMCFG_BUS_OFFSET(num_buses) - 1; + snprintf(cfg->name, PCI_MMCFG_RESOURCE_NAME_LEN, + "PCI MMCONFIG %04x [bus %02x-%02x]", + cfg->segment, cfg->start_bus, cfg->end_bus); + + if (dev) + dev_info(dev, + "MMCONFIG " + "at %pR (base %#lx) (size reduced!)\n", + &cfg->res, (unsigned long) cfg->address); + else + pr_info(PREFIX + "MMCONFIG for %04x [bus%02x-%02x] " + "at %pR (base %#lx) (size reduced!)\n", + cfg->segment, cfg->start_bus, cfg->end_bus, + &cfg->res, (unsigned long) cfg->address); + } + + return 1; +} + +static int __ref pci_mmcfg_check_reserved(struct device *dev, + struct pci_mmcfg_region *cfg, int early) +{ + if (!early && !acpi_disabled) { + if (is_mmconf_reserved(is_acpi_reserved, cfg, dev, 0)) + return 1; + + if (dev) + dev_info(dev, FW_INFO + "MMCONFIG at %pR not reserved in " + "ACPI motherboard resources\n", + &cfg->res); + else + pr_info(FW_INFO PREFIX + "MMCONFIG at %pR not reserved in " + "ACPI motherboard resources\n", + &cfg->res); + } + + /* + * e820_all_mapped() is marked as __init. + * All entries from ACPI MCFG table have been checked at boot time. + * For MCFG information constructed from hotpluggable host bridge's + * _CBA method, just assume it's reserved. + */ + if (pci_mmcfg_running_state) + return 1; + + /* Don't try to do this check unless configuration + type 1 is available. how about type 2 ?*/ + if (raw_pci_ops) + return is_mmconf_reserved(e820_all_mapped, cfg, dev, 1); + + return 0; +} + +static void __init pci_mmcfg_reject_broken(int early) +{ + struct pci_mmcfg_region *cfg; + + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + if (pci_mmcfg_check_reserved(NULL, cfg, early) == 0) { + pr_info(PREFIX "not using MMCONFIG\n"); + free_all_mmcfg(); + return; + } + } +} + +static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, + struct acpi_mcfg_allocation *cfg) +{ + int year; + + if (cfg->address < 0xFFFFFFFF) + return 0; + + if (!strncmp(mcfg->header.oem_id, "SGI", 3)) + return 0; + + if (mcfg->header.revision >= 1) { + if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && + year >= 2010) + return 0; + } + + pr_err(PREFIX "MCFG region for %04x [bus %02x-%02x] at %#llx " + "is above 4GB, ignored\n", cfg->pci_segment, + cfg->start_bus_number, cfg->end_bus_number, cfg->address); + return -EINVAL; +} + +static int __init pci_parse_mcfg(struct acpi_table_header *header) +{ + struct acpi_table_mcfg *mcfg; + struct acpi_mcfg_allocation *cfg_table, *cfg; + unsigned long i; + int entries; + + if (!header) + return -EINVAL; + + mcfg = (struct acpi_table_mcfg *)header; + + /* how many config structures do we have */ + free_all_mmcfg(); + entries = 0; + i = header->length - sizeof(struct acpi_table_mcfg); + while (i >= sizeof(struct acpi_mcfg_allocation)) { + entries++; + i -= sizeof(struct acpi_mcfg_allocation); + } + if (entries == 0) { + pr_err(PREFIX "MMCONFIG has no entries\n"); + return -ENODEV; + } + + cfg_table = (struct acpi_mcfg_allocation *) &mcfg[1]; + for (i = 0; i < entries; i++) { + cfg = &cfg_table[i]; + if (acpi_mcfg_check_entry(mcfg, cfg)) { + free_all_mmcfg(); + return -ENODEV; + } + + if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number, + cfg->end_bus_number, cfg->address) == NULL) { + pr_warn(PREFIX "no memory for MCFG entries\n"); + free_all_mmcfg(); + return -ENOMEM; + } + } + + return 0; +} + +#ifdef CONFIG_ACPI_APEI +extern int (*arch_apei_filter_addr)(int (*func)(__u64 start, __u64 size, + void *data), void *data); + +static int pci_mmcfg_for_each_region(int (*func)(__u64 start, __u64 size, + void *data), void *data) +{ + struct pci_mmcfg_region *cfg; + int rc; + + if (list_empty(&pci_mmcfg_list)) + return 0; + + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + rc = func(cfg->res.start, resource_size(&cfg->res), data); + if (rc) + return rc; + } + + return 0; +} +#define set_apei_filter() (arch_apei_filter_addr = pci_mmcfg_for_each_region) +#else +#define set_apei_filter() +#endif + +static void __init __pci_mmcfg_init(int early) +{ + pci_mmcfg_reject_broken(early); + if (list_empty(&pci_mmcfg_list)) + return; + + if (pcibios_last_bus < 0) { + const struct pci_mmcfg_region *cfg; + + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + if (cfg->segment) + break; + pcibios_last_bus = cfg->end_bus; + } + } + + if (pci_mmcfg_arch_init()) + pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; + else { + free_all_mmcfg(); + pci_mmcfg_arch_init_failed = true; + } +} + +static int __initdata known_bridge; + +void __init pci_mmcfg_early_init(void) +{ + if (pci_probe & PCI_PROBE_MMCONF) { + if (pci_mmcfg_check_hostbridge()) + known_bridge = 1; + else + acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); + __pci_mmcfg_init(1); + + set_apei_filter(); + } +} + +void __init pci_mmcfg_late_init(void) +{ + /* MMCONFIG disabled */ + if ((pci_probe & PCI_PROBE_MMCONF) == 0) + return; + + if (known_bridge) + return; + + /* MMCONFIG hasn't been enabled yet, try again */ + if (pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF) { + acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); + __pci_mmcfg_init(0); + } +} + +static int __init pci_mmcfg_late_insert_resources(void) +{ + struct pci_mmcfg_region *cfg; + + pci_mmcfg_running_state = true; + + /* If we are not using MMCONFIG, don't insert the resources. */ + if ((pci_probe & PCI_PROBE_MMCONF) == 0) + return 1; + + /* + * Attempt to insert the mmcfg resources but not with the busy flag + * marked so it won't cause request errors when __request_region is + * called. + */ + list_for_each_entry(cfg, &pci_mmcfg_list, list) + if (!cfg->res.parent) + insert_resource(&iomem_resource, &cfg->res); + + return 0; +} + +/* + * Perform MMCONFIG resource insertion after PCI initialization to allow for + * misprogrammed MCFG tables that state larger sizes but actually conflict + * with other system resources. + */ +late_initcall(pci_mmcfg_late_insert_resources); + +/* Add MMCFG information for host bridges */ +int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end, + phys_addr_t addr) +{ + int rc; + struct resource *tmp = NULL; + struct pci_mmcfg_region *cfg; + + if (!(pci_probe & PCI_PROBE_MMCONF) || pci_mmcfg_arch_init_failed) + return -ENODEV; + + if (start > end) + return -EINVAL; + + mutex_lock(&pci_mmcfg_lock); + cfg = pci_mmconfig_lookup(seg, start); + if (cfg) { + if (cfg->end_bus < end) + dev_info(dev, FW_INFO + "MMCONFIG for " + "domain %04x [bus %02x-%02x] " + "only partially covers this bridge\n", + cfg->segment, cfg->start_bus, cfg->end_bus); + mutex_unlock(&pci_mmcfg_lock); + return -EEXIST; + } + + if (!addr) { + mutex_unlock(&pci_mmcfg_lock); + return -EINVAL; + } + + rc = -EBUSY; + cfg = pci_mmconfig_alloc(seg, start, end, addr); + if (cfg == NULL) { + dev_warn(dev, "fail to add MMCONFIG (out of memory)\n"); + rc = -ENOMEM; + } else if (!pci_mmcfg_check_reserved(dev, cfg, 0)) { + dev_warn(dev, FW_BUG "MMCONFIG %pR isn't reserved\n", + &cfg->res); + } else { + /* Insert resource if it's not in boot stage */ + if (pci_mmcfg_running_state) + tmp = insert_resource_conflict(&iomem_resource, + &cfg->res); + + if (tmp) { + dev_warn(dev, + "MMCONFIG %pR conflicts with " + "%s %pR\n", + &cfg->res, tmp->name, tmp); + } else if (pci_mmcfg_arch_map(cfg)) { + dev_warn(dev, "fail to map MMCONFIG %pR.\n", + &cfg->res); + } else { + list_add_sorted(cfg); + dev_info(dev, "MMCONFIG at %pR (base %#lx)\n", + &cfg->res, (unsigned long)addr); + cfg = NULL; + rc = 0; + } + } + + if (cfg) { + if (cfg->res.parent) + release_resource(&cfg->res); + kfree(cfg); + } + + mutex_unlock(&pci_mmcfg_lock); + + return rc; +} + +/* Delete MMCFG information for host bridges */ +int pci_mmconfig_delete(u16 seg, u8 start, u8 end) +{ + struct pci_mmcfg_region *cfg; + + mutex_lock(&pci_mmcfg_lock); + list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) + if (cfg->segment == seg && cfg->start_bus == start && + cfg->end_bus == end) { + list_del_rcu(&cfg->list); + synchronize_rcu(); + pci_mmcfg_arch_unmap(cfg); + if (cfg->res.parent) + release_resource(&cfg->res); + mutex_unlock(&pci_mmcfg_lock); + kfree(cfg); + return 0; + } + mutex_unlock(&pci_mmcfg_lock); + + return -ENOENT; +} diff --git a/kernel/arch/x86/pci/mmconfig_32.c b/kernel/arch/x86/pci/mmconfig_32.c new file mode 100644 index 000000000..43984bc16 --- /dev/null +++ b/kernel/arch/x86/pci/mmconfig_32.c @@ -0,0 +1,157 @@ +/* + * Copyright (C) 2004 Matthew Wilcox + * Copyright (C) 2004 Intel Corp. + * + * This code is released under the GNU General Public License version 2. + */ + +/* + * mmconfig.c - Low-level direct PCI config space access via MMCONFIG + */ + +#include +#include +#include +#include +#include + +/* Assume systems with more busses have correct MCFG */ +#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) + +/* The base address of the last MMCONFIG device accessed */ +static u32 mmcfg_last_accessed_device; +static int mmcfg_last_accessed_cpu; + +/* + * Functions for accessing PCI configuration space with MMCONFIG accesses + */ +static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) +{ + struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); + + if (cfg) + return cfg->address; + return 0; +} + +/* + * This is always called under pci_config_lock + */ +static void pci_exp_set_dev_base(unsigned int base, int bus, int devfn) +{ + u32 dev_base = base | PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12); + int cpu = smp_processor_id(); + if (dev_base != mmcfg_last_accessed_device || + cpu != mmcfg_last_accessed_cpu) { + mmcfg_last_accessed_device = dev_base; + mmcfg_last_accessed_cpu = cpu; + set_fixmap_nocache(FIX_PCIE_MCFG, dev_base); + } +} + +static int pci_mmcfg_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value) +{ + unsigned long flags; + u32 base; + + if ((bus > 255) || (devfn > 255) || (reg > 4095)) { +err: *value = -1; + return -EINVAL; + } + + rcu_read_lock(); + base = get_base_addr(seg, bus, devfn); + if (!base) { + rcu_read_unlock(); + goto err; + } + + raw_spin_lock_irqsave(&pci_config_lock, flags); + + pci_exp_set_dev_base(base, bus, devfn); + + switch (len) { + case 1: + *value = mmio_config_readb(mmcfg_virt_addr + reg); + break; + case 2: + *value = mmio_config_readw(mmcfg_virt_addr + reg); + break; + case 4: + *value = mmio_config_readl(mmcfg_virt_addr + reg); + break; + } + raw_spin_unlock_irqrestore(&pci_config_lock, flags); + rcu_read_unlock(); + + return 0; +} + +static int pci_mmcfg_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value) +{ + unsigned long flags; + u32 base; + + if ((bus > 255) || (devfn > 255) || (reg > 4095)) + return -EINVAL; + + rcu_read_lock(); + base = get_base_addr(seg, bus, devfn); + if (!base) { + rcu_read_unlock(); + return -EINVAL; + } + + raw_spin_lock_irqsave(&pci_config_lock, flags); + + pci_exp_set_dev_base(base, bus, devfn); + + switch (len) { + case 1: + mmio_config_writeb(mmcfg_virt_addr + reg, value); + break; + case 2: + mmio_config_writew(mmcfg_virt_addr + reg, value); + break; + case 4: + mmio_config_writel(mmcfg_virt_addr + reg, value); + break; + } + raw_spin_unlock_irqrestore(&pci_config_lock, flags); + rcu_read_unlock(); + + return 0; +} + +const struct pci_raw_ops pci_mmcfg = { + .read = pci_mmcfg_read, + .write = pci_mmcfg_write, +}; + +int __init pci_mmcfg_arch_init(void) +{ + printk(KERN_INFO "PCI: Using MMCONFIG for extended config space\n"); + raw_pci_ext_ops = &pci_mmcfg; + return 1; +} + +void __init pci_mmcfg_arch_free(void) +{ +} + +int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg) +{ + return 0; +} + +void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg) +{ + unsigned long flags; + + /* Invalidate the cached mmcfg map entry. */ + raw_spin_lock_irqsave(&pci_config_lock, flags); + mmcfg_last_accessed_device = 0; + raw_spin_unlock_irqrestore(&pci_config_lock, flags); +} diff --git a/kernel/arch/x86/pci/mmconfig_64.c b/kernel/arch/x86/pci/mmconfig_64.c new file mode 100644 index 000000000..bea52496a --- /dev/null +++ b/kernel/arch/x86/pci/mmconfig_64.c @@ -0,0 +1,153 @@ +/* + * mmconfig.c - Low-level direct PCI config space access via MMCONFIG + * + * This is an 64bit optimized version that always keeps the full mmconfig + * space mapped. This allows lockless config space operation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define PREFIX "PCI: " + +static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) +{ + struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); + + if (cfg && cfg->virt) + return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); + return NULL; +} + +static int pci_mmcfg_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value) +{ + char __iomem *addr; + + /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ + if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) { +err: *value = -1; + return -EINVAL; + } + + rcu_read_lock(); + addr = pci_dev_base(seg, bus, devfn); + if (!addr) { + rcu_read_unlock(); + goto err; + } + + switch (len) { + case 1: + *value = mmio_config_readb(addr + reg); + break; + case 2: + *value = mmio_config_readw(addr + reg); + break; + case 4: + *value = mmio_config_readl(addr + reg); + break; + } + rcu_read_unlock(); + + return 0; +} + +static int pci_mmcfg_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value) +{ + char __iomem *addr; + + /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ + if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) + return -EINVAL; + + rcu_read_lock(); + addr = pci_dev_base(seg, bus, devfn); + if (!addr) { + rcu_read_unlock(); + return -EINVAL; + } + + switch (len) { + case 1: + mmio_config_writeb(addr + reg, value); + break; + case 2: + mmio_config_writew(addr + reg, value); + break; + case 4: + mmio_config_writel(addr + reg, value); + break; + } + rcu_read_unlock(); + + return 0; +} + +const struct pci_raw_ops pci_mmcfg = { + .read = pci_mmcfg_read, + .write = pci_mmcfg_write, +}; + +static void __iomem *mcfg_ioremap(struct pci_mmcfg_region *cfg) +{ + void __iomem *addr; + u64 start, size; + int num_buses; + + start = cfg->address + PCI_MMCFG_BUS_OFFSET(cfg->start_bus); + num_buses = cfg->end_bus - cfg->start_bus + 1; + size = PCI_MMCFG_BUS_OFFSET(num_buses); + addr = ioremap_nocache(start, size); + if (addr) + addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus); + return addr; +} + +int __init pci_mmcfg_arch_init(void) +{ + struct pci_mmcfg_region *cfg; + + list_for_each_entry(cfg, &pci_mmcfg_list, list) + if (pci_mmcfg_arch_map(cfg)) { + pci_mmcfg_arch_free(); + return 0; + } + + raw_pci_ext_ops = &pci_mmcfg; + + return 1; +} + +void __init pci_mmcfg_arch_free(void) +{ + struct pci_mmcfg_region *cfg; + + list_for_each_entry(cfg, &pci_mmcfg_list, list) + pci_mmcfg_arch_unmap(cfg); +} + +int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg) +{ + cfg->virt = mcfg_ioremap(cfg); + if (!cfg->virt) { + pr_err(PREFIX "can't map MMCONFIG at %pR\n", &cfg->res); + return -ENOMEM; + } + + return 0; +} + +void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg) +{ + if (cfg && cfg->virt) { + iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus)); + cfg->virt = NULL; + } +} diff --git a/kernel/arch/x86/pci/numachip.c b/kernel/arch/x86/pci/numachip.c new file mode 100644 index 000000000..2e565e65c --- /dev/null +++ b/kernel/arch/x86/pci/numachip.c @@ -0,0 +1,129 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Numascale NumaConnect-specific PCI code + * + * Copyright (C) 2012 Numascale AS. All rights reserved. + * + * Send feedback to + * + * PCI accessor functions derived from mmconfig_64.c + * + */ + +#include +#include + +static u8 limit __read_mostly; + +static inline char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) +{ + struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); + + if (cfg && cfg->virt) + return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); + return NULL; +} + +static int pci_mmcfg_read_numachip(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value) +{ + char __iomem *addr; + + /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ + if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) { +err: *value = -1; + return -EINVAL; + } + + /* Ensure AMD Northbridges don't decode reads to other devices */ + if (unlikely(bus == 0 && devfn >= limit)) { + *value = -1; + return 0; + } + + rcu_read_lock(); + addr = pci_dev_base(seg, bus, devfn); + if (!addr) { + rcu_read_unlock(); + goto err; + } + + switch (len) { + case 1: + *value = mmio_config_readb(addr + reg); + break; + case 2: + *value = mmio_config_readw(addr + reg); + break; + case 4: + *value = mmio_config_readl(addr + reg); + break; + } + rcu_read_unlock(); + + return 0; +} + +static int pci_mmcfg_write_numachip(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value) +{ + char __iomem *addr; + + /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ + if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) + return -EINVAL; + + /* Ensure AMD Northbridges don't decode writes to other devices */ + if (unlikely(bus == 0 && devfn >= limit)) + return 0; + + rcu_read_lock(); + addr = pci_dev_base(seg, bus, devfn); + if (!addr) { + rcu_read_unlock(); + return -EINVAL; + } + + switch (len) { + case 1: + mmio_config_writeb(addr + reg, value); + break; + case 2: + mmio_config_writew(addr + reg, value); + break; + case 4: + mmio_config_writel(addr + reg, value); + break; + } + rcu_read_unlock(); + + return 0; +} + +static const struct pci_raw_ops pci_mmcfg_numachip = { + .read = pci_mmcfg_read_numachip, + .write = pci_mmcfg_write_numachip, +}; + +int __init pci_numachip_init(void) +{ + int ret = 0; + u32 val; + + /* For remote I/O, restrict bus 0 access to the actual number of AMD + Northbridges, which starts at device number 0x18 */ + ret = raw_pci_read(0, 0, PCI_DEVFN(0x18, 0), 0x60, sizeof(val), &val); + if (ret) + goto out; + + /* HyperTransport fabric size in bits 6:4 */ + limit = PCI_DEVFN(0x18 + ((val >> 4) & 7) + 1, 0); + + /* Use NumaChip PCI accessors for non-extended and extended access */ + raw_pci_ops = raw_pci_ext_ops = &pci_mmcfg_numachip; +out: + return ret; +} diff --git a/kernel/arch/x86/pci/olpc.c b/kernel/arch/x86/pci/olpc.c new file mode 100644 index 000000000..7043a4f0e --- /dev/null +++ b/kernel/arch/x86/pci/olpc.c @@ -0,0 +1,315 @@ +/* + * Low-level PCI config space access for OLPC systems who lack the VSA + * PCI virtualization software. + * + * Copyright © 2006 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * The AMD Geode chipset (ie: GX2 processor, cs5536 I/O companion device) + * has some I/O functions (display, southbridge, sound, USB HCIs, etc) + * that more or less behave like PCI devices, but the hardware doesn't + * directly implement the PCI configuration space headers. AMD provides + * "VSA" (Virtual System Architecture) software that emulates PCI config + * space for these devices, by trapping I/O accesses to PCI config register + * (CF8/CFC) and running some code in System Management Mode interrupt state. + * On the OLPC platform, we don't want to use that VSA code because + * (a) it slows down suspend/resume, and (b) recompiling it requires special + * compilers that are hard to get. So instead of letting the complex VSA + * code simulate the PCI config registers for the on-chip devices, we + * just simulate them the easy way, by inserting the code into the + * pci_write_config and pci_read_config path. Most of the config registers + * are read-only anyway, so the bulk of the simulation is just table lookup. + */ + +#include +#include +#include +#include +#include + +/* + * In the tables below, the first two line (8 longwords) are the + * size masks that are used when the higher level PCI code determines + * the size of the region by writing ~0 to a base address register + * and reading back the result. + * + * The following lines are the values that are read during normal + * PCI config access cycles, i.e. not after just having written + * ~0 to a base address register. + */ + +static const uint32_t lxnb_hdr[] = { /* dev 1 function 0 - devfn = 8 */ + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x281022, 0x2200005, 0x6000021, 0x80f808, /* AMD Vendor ID */ + 0x0, 0x0, 0x0, 0x0, /* No virtual registers, hence no BAR */ + 0x0, 0x0, 0x0, 0x28100b, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t gxnb_hdr[] = { /* dev 1 function 0 - devfn = 8 */ + 0xfffffffd, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x28100b, 0x2200005, 0x6000021, 0x80f808, /* NSC Vendor ID */ + 0xac1d, 0x0, 0x0, 0x0, /* I/O BAR - base of virtual registers */ + 0x0, 0x0, 0x0, 0x28100b, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t lxfb_hdr[] = { /* dev 1 function 1 - devfn = 9 */ + 0xff000008, 0xffffc000, 0xffffc000, 0xffffc000, + 0xffffc000, 0x0, 0x0, 0x0, + + 0x20811022, 0x2200003, 0x3000000, 0x0, /* AMD Vendor ID */ + 0xfd000000, 0xfe000000, 0xfe004000, 0xfe008000, /* FB, GP, VG, DF */ + 0xfe00c000, 0x0, 0x0, 0x30100b, /* VIP */ + 0x0, 0x0, 0x0, 0x10e, /* INTA, IRQ14 for graphics accel */ + 0x0, 0x0, 0x0, 0x0, + 0x3d0, 0x3c0, 0xa0000, 0x0, /* VG IO, VG IO, EGA FB, MONO FB */ + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t gxfb_hdr[] = { /* dev 1 function 1 - devfn = 9 */ + 0xff800008, 0xffffc000, 0xffffc000, 0xffffc000, + 0x0, 0x0, 0x0, 0x0, + + 0x30100b, 0x2200003, 0x3000000, 0x0, /* NSC Vendor ID */ + 0xfd000000, 0xfe000000, 0xfe004000, 0xfe008000, /* FB, GP, VG, DF */ + 0x0, 0x0, 0x0, 0x30100b, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x3d0, 0x3c0, 0xa0000, 0x0, /* VG IO, VG IO, EGA FB, MONO FB */ + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t aes_hdr[] = { /* dev 1 function 2 - devfn = 0xa */ + 0xffffc000, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x20821022, 0x2a00006, 0x10100000, 0x8, /* NSC Vendor ID */ + 0xfe010000, 0x0, 0x0, 0x0, /* AES registers */ + 0x0, 0x0, 0x0, 0x20821022, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +}; + + +static const uint32_t isa_hdr[] = { /* dev f function 0 - devfn = 78 */ + 0xfffffff9, 0xffffff01, 0xffffffc1, 0xffffffe1, + 0xffffff81, 0xffffffc1, 0x0, 0x0, + + 0x20901022, 0x2a00049, 0x6010003, 0x802000, + 0x18b1, 0x1001, 0x1801, 0x1881, /* SMB-8 GPIO-256 MFGPT-64 IRQ-32 */ + 0x1401, 0x1841, 0x0, 0x20901022, /* PMS-128 ACPI-64 */ + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0xaa5b, /* IRQ steering */ + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t ac97_hdr[] = { /* dev f function 3 - devfn = 7b */ + 0xffffff81, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x20931022, 0x2a00041, 0x4010001, 0x0, + 0x1481, 0x0, 0x0, 0x0, /* I/O BAR-128 */ + 0x0, 0x0, 0x0, 0x20931022, + 0x0, 0x0, 0x0, 0x205, /* IntB, IRQ5 */ + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t ohci_hdr[] = { /* dev f function 4 - devfn = 7c */ + 0xfffff000, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x20941022, 0x2300006, 0xc031002, 0x0, + 0xfe01a000, 0x0, 0x0, 0x0, /* MEMBAR-1000 */ + 0x0, 0x0, 0x0, 0x20941022, + 0x0, 0x40, 0x0, 0x40a, /* CapPtr INT-D, IRQA */ + 0xc8020001, 0x0, 0x0, 0x0, /* Capabilities - 40 is R/O, + 44 is mask 8103 (power control) */ + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t ehci_hdr[] = { /* dev f function 4 - devfn = 7d */ + 0xfffff000, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x20951022, 0x2300006, 0xc032002, 0x0, + 0xfe01b000, 0x0, 0x0, 0x0, /* MEMBAR-1000 */ + 0x0, 0x0, 0x0, 0x20951022, + 0x0, 0x40, 0x0, 0x40a, /* CapPtr INT-D, IRQA */ + 0xc8020001, 0x0, 0x0, 0x0, /* Capabilities - 40 is R/O, 44 is + mask 8103 (power control) */ +#if 0 + 0x1, 0x40080000, 0x0, 0x0, /* EECP - see EHCI spec section 2.1.7 */ +#endif + 0x01000001, 0x0, 0x0, 0x0, /* EECP - see EHCI spec section 2.1.7 */ + 0x2020, 0x0, 0x0, 0x0, /* (EHCI page 8) 60 SBRN (R/O), + 61 FLADJ (R/W), PORTWAKECAP */ +}; + +static uint32_t ff_loc = ~0; +static uint32_t zero_loc; +static int bar_probing; /* Set after a write of ~0 to a BAR */ +static int is_lx; + +#define NB_SLOT 0x1 /* Northbridge - GX chip - Device 1 */ +#define SB_SLOT 0xf /* Southbridge - CS5536 chip - Device F */ + +static int is_simulated(unsigned int bus, unsigned int devfn) +{ + return (!bus && ((PCI_SLOT(devfn) == NB_SLOT) || + (PCI_SLOT(devfn) == SB_SLOT))); +} + +static uint32_t *hdr_addr(const uint32_t *hdr, int reg) +{ + uint32_t addr; + + /* + * This is a little bit tricky. The header maps consist of + * 0x20 bytes of size masks, followed by 0x70 bytes of header data. + * In the normal case, when not probing a BAR's size, we want + * to access the header data, so we add 0x20 to the reg offset, + * thus skipping the size mask area. + * In the BAR probing case, we want to access the size mask for + * the BAR, so we subtract 0x10 (the config header offset for + * BAR0), and don't skip the size mask area. + */ + + addr = (uint32_t)hdr + reg + (bar_probing ? -0x10 : 0x20); + + bar_probing = 0; + return (uint32_t *)addr; +} + +static int pci_olpc_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, uint32_t *value) +{ + uint32_t *addr; + + WARN_ON(seg); + + /* Use the hardware mechanism for non-simulated devices */ + if (!is_simulated(bus, devfn)) + return pci_direct_conf1.read(seg, bus, devfn, reg, len, value); + + /* + * No device has config registers past 0x70, so we save table space + * by not storing entries for the nonexistent registers + */ + if (reg >= 0x70) + addr = &zero_loc; + else { + switch (devfn) { + case 0x8: + addr = hdr_addr(is_lx ? lxnb_hdr : gxnb_hdr, reg); + break; + case 0x9: + addr = hdr_addr(is_lx ? lxfb_hdr : gxfb_hdr, reg); + break; + case 0xa: + addr = is_lx ? hdr_addr(aes_hdr, reg) : &ff_loc; + break; + case 0x78: + addr = hdr_addr(isa_hdr, reg); + break; + case 0x7b: + addr = hdr_addr(ac97_hdr, reg); + break; + case 0x7c: + addr = hdr_addr(ohci_hdr, reg); + break; + case 0x7d: + addr = hdr_addr(ehci_hdr, reg); + break; + default: + addr = &ff_loc; + break; + } + } + switch (len) { + case 1: + *value = *(uint8_t *)addr; + break; + case 2: + *value = *(uint16_t *)addr; + break; + case 4: + *value = *addr; + break; + default: + BUG(); + } + + return 0; +} + +static int pci_olpc_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, uint32_t value) +{ + WARN_ON(seg); + + /* Use the hardware mechanism for non-simulated devices */ + if (!is_simulated(bus, devfn)) + return pci_direct_conf1.write(seg, bus, devfn, reg, len, value); + + /* XXX we may want to extend this to simulate EHCI power management */ + + /* + * Mostly we just discard writes, but if the write is a size probe + * (i.e. writing ~0 to a BAR), we remember it and arrange to return + * the appropriate size mask on the next read. This is cheating + * to some extent, because it depends on the fact that the next + * access after such a write will always be a read to the same BAR. + */ + + if ((reg >= 0x10) && (reg < 0x2c)) { + /* write is to a BAR */ + if (value == ~0) + bar_probing = 1; + } else { + /* + * No warning on writes to ROM BAR, CMD, LATENCY_TIMER, + * CACHE_LINE_SIZE, or PM registers. + */ + if ((reg != PCI_ROM_ADDRESS) && (reg != PCI_COMMAND_MASTER) && + (reg != PCI_LATENCY_TIMER) && + (reg != PCI_CACHE_LINE_SIZE) && (reg != 0x44)) + printk(KERN_WARNING "OLPC PCI: Config write to devfn" + " %x reg %x value %x\n", devfn, reg, value); + } + + return 0; +} + +static const struct pci_raw_ops pci_olpc_conf = { + .read = pci_olpc_read, + .write = pci_olpc_write, +}; + +int __init pci_olpc_init(void) +{ + printk(KERN_INFO "PCI: Using configuration type OLPC XO-1\n"); + raw_pci_ops = &pci_olpc_conf; + is_lx = is_geode_lx(); + return 0; +} diff --git a/kernel/arch/x86/pci/pcbios.c b/kernel/arch/x86/pci/pcbios.c new file mode 100644 index 000000000..9b83b9051 --- /dev/null +++ b/kernel/arch/x86/pci/pcbios.c @@ -0,0 +1,455 @@ +/* + * BIOS32 and PCI BIOS handling. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* BIOS32 signature: "_32_" */ +#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) + +/* PCI signature: "PCI " */ +#define PCI_SIGNATURE (('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24)) + +/* PCI service signature: "$PCI" */ +#define PCI_SERVICE (('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24)) + +/* PCI BIOS hardware mechanism flags */ +#define PCIBIOS_HW_TYPE1 0x01 +#define PCIBIOS_HW_TYPE2 0x02 +#define PCIBIOS_HW_TYPE1_SPEC 0x10 +#define PCIBIOS_HW_TYPE2_SPEC 0x20 + +int pcibios_enabled; + +/* According to the BIOS specification at: + * http://members.datafast.net.au/dft0802/specs/bios21.pdf, we could + * restrict the x zone to some pages and make it ro. But this may be + * broken on some bios, complex to handle with static_protections. + * We could make the 0xe0000-0x100000 range rox, but this can break + * some ISA mapping. + * + * So we let's an rw and x hole when pcibios is used. This shouldn't + * happen for modern system with mmconfig, and if you don't want it + * you could disable pcibios... + */ +static inline void set_bios_x(void) +{ + pcibios_enabled = 1; + set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT); + if (__supported_pte_mask & _PAGE_NX) + printk(KERN_INFO "PCI : PCI BIOS area is rw and x. Use pci=nobios if you want it NX.\n"); +} + +/* + * This is the standard structure used to identify the entry point + * to the BIOS32 Service Directory, as documented in + * Standard BIOS 32-bit Service Directory Proposal + * Revision 0.4 May 24, 1993 + * Phoenix Technologies Ltd. + * Norwood, MA + * and the PCI BIOS specification. + */ + +union bios32 { + struct { + unsigned long signature; /* _32_ */ + unsigned long entry; /* 32 bit physical address */ + unsigned char revision; /* Revision level, 0 */ + unsigned char length; /* Length in paragraphs should be 01 */ + unsigned char checksum; /* All bytes must add up to zero */ + unsigned char reserved[5]; /* Must be zero */ + } fields; + char chars[16]; +}; + +/* + * Physical address of the service directory. I don't know if we're + * allowed to have more than one of these or not, so just in case + * we'll make pcibios_present() take a memory start parameter and store + * the array there. + */ + +static struct { + unsigned long address; + unsigned short segment; +} bios32_indirect __initdata = { 0, __KERNEL_CS }; + +/* + * Returns the entry point for the given service, NULL on error + */ + +static unsigned long __init bios32_service(unsigned long service) +{ + unsigned char return_code; /* %al */ + unsigned long address; /* %ebx */ + unsigned long length; /* %ecx */ + unsigned long entry; /* %edx */ + unsigned long flags; + + local_irq_save(flags); + __asm__("lcall *(%%edi); cld" + : "=a" (return_code), + "=b" (address), + "=c" (length), + "=d" (entry) + : "0" (service), + "1" (0), + "D" (&bios32_indirect)); + local_irq_restore(flags); + + switch (return_code) { + case 0: + return address + entry; + case 0x80: /* Not present */ + printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service); + return 0; + default: /* Shouldn't happen */ + printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n", + service, return_code); + return 0; + } +} + +static struct { + unsigned long address; + unsigned short segment; +} pci_indirect = { 0, __KERNEL_CS }; + +static int pci_bios_present; + +static int __init check_pcibios(void) +{ + u32 signature, eax, ebx, ecx; + u8 status, major_ver, minor_ver, hw_mech; + unsigned long flags, pcibios_entry; + + if ((pcibios_entry = bios32_service(PCI_SERVICE))) { + pci_indirect.address = pcibios_entry + PAGE_OFFSET; + + local_irq_save(flags); + __asm__( + "lcall *(%%edi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=d" (signature), + "=a" (eax), + "=b" (ebx), + "=c" (ecx) + : "1" (PCIBIOS_PCI_BIOS_PRESENT), + "D" (&pci_indirect) + : "memory"); + local_irq_restore(flags); + + status = (eax >> 8) & 0xff; + hw_mech = eax & 0xff; + major_ver = (ebx >> 8) & 0xff; + minor_ver = ebx & 0xff; + if (pcibios_last_bus < 0) + pcibios_last_bus = ecx & 0xff; + DBG("PCI: BIOS probe returned s=%02x hw=%02x ver=%02x.%02x l=%02x\n", + status, hw_mech, major_ver, minor_ver, pcibios_last_bus); + if (status || signature != PCI_SIGNATURE) { + printk (KERN_ERR "PCI: BIOS BUG #%x[%08x] found\n", + status, signature); + return 0; + } + printk(KERN_INFO "PCI: PCI BIOS revision %x.%02x entry at 0x%lx, last bus=%d\n", + major_ver, minor_ver, pcibios_entry, pcibios_last_bus); +#ifdef CONFIG_PCI_DIRECT + if (!(hw_mech & PCIBIOS_HW_TYPE1)) + pci_probe &= ~PCI_PROBE_CONF1; + if (!(hw_mech & PCIBIOS_HW_TYPE2)) + pci_probe &= ~PCI_PROBE_CONF2; +#endif + return 1; + } + return 0; +} + +static int pci_bios_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value) +{ + unsigned long result = 0; + unsigned long flags; + unsigned long bx = (bus << 8) | devfn; + + WARN_ON(seg); + if (!value || (bus > 255) || (devfn > 255) || (reg > 255)) + return -EINVAL; + + raw_spin_lock_irqsave(&pci_config_lock, flags); + + switch (len) { + case 1: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=c" (*value), + "=a" (result) + : "1" (PCIBIOS_READ_CONFIG_BYTE), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + /* + * Zero-extend the result beyond 8 bits, do not trust the + * BIOS having done it: + */ + *value &= 0xff; + break; + case 2: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=c" (*value), + "=a" (result) + : "1" (PCIBIOS_READ_CONFIG_WORD), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + /* + * Zero-extend the result beyond 16 bits, do not trust the + * BIOS having done it: + */ + *value &= 0xffff; + break; + case 4: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=c" (*value), + "=a" (result) + : "1" (PCIBIOS_READ_CONFIG_DWORD), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + } + + raw_spin_unlock_irqrestore(&pci_config_lock, flags); + + return (int)((result & 0xff00) >> 8); +} + +static int pci_bios_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value) +{ + unsigned long result = 0; + unsigned long flags; + unsigned long bx = (bus << 8) | devfn; + + WARN_ON(seg); + if ((bus > 255) || (devfn > 255) || (reg > 255)) + return -EINVAL; + + raw_spin_lock_irqsave(&pci_config_lock, flags); + + switch (len) { + case 1: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (result) + : "0" (PCIBIOS_WRITE_CONFIG_BYTE), + "c" (value), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + case 2: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (result) + : "0" (PCIBIOS_WRITE_CONFIG_WORD), + "c" (value), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + case 4: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (result) + : "0" (PCIBIOS_WRITE_CONFIG_DWORD), + "c" (value), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + } + + raw_spin_unlock_irqrestore(&pci_config_lock, flags); + + return (int)((result & 0xff00) >> 8); +} + + +/* + * Function table for BIOS32 access + */ + +static const struct pci_raw_ops pci_bios_access = { + .read = pci_bios_read, + .write = pci_bios_write +}; + +/* + * Try to find PCI BIOS. + */ + +static const struct pci_raw_ops *__init pci_find_bios(void) +{ + union bios32 *check; + unsigned char sum; + int i, length; + + /* + * Follow the standard procedure for locating the BIOS32 Service + * directory by scanning the permissible address range from + * 0xe0000 through 0xfffff for a valid BIOS32 structure. + */ + + for (check = (union bios32 *) __va(0xe0000); + check <= (union bios32 *) __va(0xffff0); + ++check) { + long sig; + if (probe_kernel_address(&check->fields.signature, sig)) + continue; + + if (check->fields.signature != BIOS32_SIGNATURE) + continue; + length = check->fields.length * 16; + if (!length) + continue; + sum = 0; + for (i = 0; i < length ; ++i) + sum += check->chars[i]; + if (sum != 0) + continue; + if (check->fields.revision != 0) { + printk("PCI: unsupported BIOS32 revision %d at 0x%p\n", + check->fields.revision, check); + continue; + } + DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check); + if (check->fields.entry >= 0x100000) { + printk("PCI: BIOS32 entry (0x%p) in high memory, " + "cannot use.\n", check); + return NULL; + } else { + unsigned long bios32_entry = check->fields.entry; + DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", + bios32_entry); + bios32_indirect.address = bios32_entry + PAGE_OFFSET; + set_bios_x(); + if (check_pcibios()) + return &pci_bios_access; + } + break; /* Hopefully more than one BIOS32 cannot happen... */ + } + + return NULL; +} + +/* + * BIOS Functions for IRQ Routing + */ + +struct irq_routing_options { + u16 size; + struct irq_info *table; + u16 segment; +} __attribute__((packed)); + +struct irq_routing_table * pcibios_get_irq_routing_table(void) +{ + struct irq_routing_options opt; + struct irq_routing_table *rt = NULL; + int ret, map; + unsigned long page; + + if (!pci_bios_present) + return NULL; + page = __get_free_page(GFP_KERNEL); + if (!page) + return NULL; + opt.table = (struct irq_info *) page; + opt.size = PAGE_SIZE; + opt.segment = __KERNEL_DS; + + DBG("PCI: Fetching IRQ routing table... "); + __asm__("push %%es\n\t" + "push %%ds\n\t" + "pop %%es\n\t" + "lcall *(%%esi); cld\n\t" + "pop %%es\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (ret), + "=b" (map), + "=m" (opt) + : "0" (PCIBIOS_GET_ROUTING_OPTIONS), + "1" (0), + "D" ((long) &opt), + "S" (&pci_indirect), + "m" (opt) + : "memory"); + DBG("OK ret=%d, size=%d, map=%x\n", ret, opt.size, map); + if (ret & 0xff00) + printk(KERN_ERR "PCI: Error %02x when fetching IRQ routing table.\n", (ret >> 8) & 0xff); + else if (opt.size) { + rt = kmalloc(sizeof(struct irq_routing_table) + opt.size, GFP_KERNEL); + if (rt) { + memset(rt, 0, sizeof(struct irq_routing_table)); + rt->size = opt.size + sizeof(struct irq_routing_table); + rt->exclusive_irqs = map; + memcpy(rt->slots, (void *) page, opt.size); + printk(KERN_INFO "PCI: Using BIOS Interrupt Routing Table\n"); + } + } + free_page(page); + return rt; +} +EXPORT_SYMBOL(pcibios_get_irq_routing_table); + +int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) +{ + int ret; + + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (ret) + : "0" (PCIBIOS_SET_PCI_HW_INT), + "b" ((dev->bus->number << 8) | dev->devfn), + "c" ((irq << 8) | (pin + 10)), + "S" (&pci_indirect)); + return !(ret & 0xff00); +} +EXPORT_SYMBOL(pcibios_set_irq_routing); + +void __init pci_pcbios_init(void) +{ + if ((pci_probe & PCI_PROBE_BIOS) + && ((raw_pci_ops = pci_find_bios()))) { + pci_bios_present = 1; + } +} + diff --git a/kernel/arch/x86/pci/sta2x11-fixup.c b/kernel/arch/x86/pci/sta2x11-fixup.c new file mode 100644 index 000000000..5ceda85b8 --- /dev/null +++ b/kernel/arch/x86/pci/sta2x11-fixup.c @@ -0,0 +1,364 @@ +/* + * arch/x86/pci/sta2x11-fixup.c + * glue code for lib/swiotlb.c and DMA translation between STA2x11 + * AMBA memory mapping and the X86 memory mapping + * + * ST Microelectronics ConneXt (STA2X11/STA2X10) + * + * Copyright (c) 2010-2011 Wind River Systems, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include + +#define STA2X11_SWIOTLB_SIZE (4*1024*1024) +extern int swiotlb_late_init_with_default_size(size_t default_size); + +/* + * We build a list of bus numbers that are under the ConneXt. The + * main bridge hosts 4 busses, which are the 4 endpoints, in order. + */ +#define STA2X11_NR_EP 4 /* 0..3 included */ +#define STA2X11_NR_FUNCS 8 /* 0..7 included */ +#define STA2X11_AMBA_SIZE (512 << 20) + +struct sta2x11_ahb_regs { /* saved during suspend */ + u32 base, pexlbase, pexhbase, crw; +}; + +struct sta2x11_mapping { + u32 amba_base; + int is_suspended; + struct sta2x11_ahb_regs regs[STA2X11_NR_FUNCS]; +}; + +struct sta2x11_instance { + struct list_head list; + int bus0; + struct sta2x11_mapping map[STA2X11_NR_EP]; +}; + +static LIST_HEAD(sta2x11_instance_list); + +/* At probe time, record new instances of this bridge (likely one only) */ +static void sta2x11_new_instance(struct pci_dev *pdev) +{ + struct sta2x11_instance *instance; + + instance = kzalloc(sizeof(*instance), GFP_ATOMIC); + if (!instance) + return; + /* This has a subordinate bridge, with 4 more-subordinate ones */ + instance->bus0 = pdev->subordinate->number + 1; + + if (list_empty(&sta2x11_instance_list)) { + int size = STA2X11_SWIOTLB_SIZE; + /* First instance: register your own swiotlb area */ + dev_info(&pdev->dev, "Using SWIOTLB (size %i)\n", size); + if (swiotlb_late_init_with_default_size(size)) + dev_emerg(&pdev->dev, "init swiotlb failed\n"); + } + list_add(&instance->list, &sta2x11_instance_list); +} +DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_STMICRO, 0xcc17, sta2x11_new_instance); + +/* + * Utility functions used in this file from below + */ +static struct sta2x11_instance *sta2x11_pdev_to_instance(struct pci_dev *pdev) +{ + struct sta2x11_instance *instance; + int ep; + + list_for_each_entry(instance, &sta2x11_instance_list, list) { + ep = pdev->bus->number - instance->bus0; + if (ep >= 0 && ep < STA2X11_NR_EP) + return instance; + } + return NULL; +} + +static int sta2x11_pdev_to_ep(struct pci_dev *pdev) +{ + struct sta2x11_instance *instance; + + instance = sta2x11_pdev_to_instance(pdev); + if (!instance) + return -1; + + return pdev->bus->number - instance->bus0; +} + +static struct sta2x11_mapping *sta2x11_pdev_to_mapping(struct pci_dev *pdev) +{ + struct sta2x11_instance *instance; + int ep; + + instance = sta2x11_pdev_to_instance(pdev); + if (!instance) + return NULL; + ep = sta2x11_pdev_to_ep(pdev); + return instance->map + ep; +} + +/* This is exported, as some devices need to access the MFD registers */ +struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev) +{ + return sta2x11_pdev_to_instance(pdev); +} +EXPORT_SYMBOL(sta2x11_get_instance); + + +/** + * p2a - Translate physical address to STA2x11 AMBA address, + * used for DMA transfers to STA2x11 + * @p: Physical address + * @pdev: PCI device (must be hosted within the connext) + */ +static dma_addr_t p2a(dma_addr_t p, struct pci_dev *pdev) +{ + struct sta2x11_mapping *map; + dma_addr_t a; + + map = sta2x11_pdev_to_mapping(pdev); + a = p + map->amba_base; + return a; +} + +/** + * a2p - Translate STA2x11 AMBA address to physical address + * used for DMA transfers from STA2x11 + * @a: STA2x11 AMBA address + * @pdev: PCI device (must be hosted within the connext) + */ +static dma_addr_t a2p(dma_addr_t a, struct pci_dev *pdev) +{ + struct sta2x11_mapping *map; + dma_addr_t p; + + map = sta2x11_pdev_to_mapping(pdev); + p = a - map->amba_base; + return p; +} + +/** + * sta2x11_swiotlb_alloc_coherent - Allocate swiotlb bounce buffers + * returns virtual address. This is the only "special" function here. + * @dev: PCI device + * @size: Size of the buffer + * @dma_handle: DMA address + * @flags: memory flags + */ +static void *sta2x11_swiotlb_alloc_coherent(struct device *dev, + size_t size, + dma_addr_t *dma_handle, + gfp_t flags, + struct dma_attrs *attrs) +{ + void *vaddr; + + vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs); + *dma_handle = p2a(*dma_handle, to_pci_dev(dev)); + return vaddr; +} + +/* We have our own dma_ops: the same as swiotlb but from alloc (above) */ +static struct dma_map_ops sta2x11_dma_ops = { + .alloc = sta2x11_swiotlb_alloc_coherent, + .free = x86_swiotlb_free_coherent, + .map_page = swiotlb_map_page, + .unmap_page = swiotlb_unmap_page, + .map_sg = swiotlb_map_sg_attrs, + .unmap_sg = swiotlb_unmap_sg_attrs, + .sync_single_for_cpu = swiotlb_sync_single_for_cpu, + .sync_single_for_device = swiotlb_sync_single_for_device, + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = swiotlb_sync_sg_for_device, + .mapping_error = swiotlb_dma_mapping_error, + .dma_supported = NULL, /* FIXME: we should use this instead! */ +}; + +/* At setup time, we use our own ops if the device is a ConneXt one */ +static void sta2x11_setup_pdev(struct pci_dev *pdev) +{ + struct sta2x11_instance *instance = sta2x11_pdev_to_instance(pdev); + + if (!instance) /* either a sta2x11 bridge or another ST device */ + return; + pci_set_consistent_dma_mask(pdev, STA2X11_AMBA_SIZE - 1); + pci_set_dma_mask(pdev, STA2X11_AMBA_SIZE - 1); + pdev->dev.archdata.dma_ops = &sta2x11_dma_ops; + + /* We must enable all devices as master, for audio DMA to work */ + pci_set_master(pdev); +} +DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, sta2x11_setup_pdev); + +/* + * The following three functions are exported (used in swiotlb: FIXME) + */ +/** + * dma_capable - Check if device can manage DMA transfers (FIXME: kill it) + * @dev: device for a PCI device + * @addr: DMA address + * @size: DMA size + */ +bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + struct sta2x11_mapping *map; + + if (dev->archdata.dma_ops != &sta2x11_dma_ops) { + if (!dev->dma_mask) + return false; + return addr + size - 1 <= *dev->dma_mask; + } + + map = sta2x11_pdev_to_mapping(to_pci_dev(dev)); + + if (!map || (addr < map->amba_base)) + return false; + if (addr + size >= map->amba_base + STA2X11_AMBA_SIZE) { + return false; + } + + return true; +} + +/** + * phys_to_dma - Return the DMA AMBA address used for this STA2x11 device + * @dev: device for a PCI device + * @paddr: Physical address + */ +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + if (dev->archdata.dma_ops != &sta2x11_dma_ops) + return paddr; + return p2a(paddr, to_pci_dev(dev)); +} + +/** + * dma_to_phys - Return the physical address used for this STA2x11 DMA address + * @dev: device for a PCI device + * @daddr: STA2x11 AMBA DMA address + */ +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + if (dev->archdata.dma_ops != &sta2x11_dma_ops) + return daddr; + return a2p(daddr, to_pci_dev(dev)); +} + + +/* + * At boot we must set up the mappings for the pcie-to-amba bridge. + * It involves device access, and the same happens at suspend/resume time + */ + +#define AHB_MAPB 0xCA4 +#define AHB_CRW(i) (AHB_MAPB + 0 + (i) * 0x10) +#define AHB_CRW_SZMASK 0xfffffc00UL +#define AHB_CRW_ENABLE (1 << 0) +#define AHB_CRW_WTYPE_MEM (2 << 1) +#define AHB_CRW_ROE (1UL << 3) /* Relax Order Ena */ +#define AHB_CRW_NSE (1UL << 4) /* No Snoop Enable */ +#define AHB_BASE(i) (AHB_MAPB + 4 + (i) * 0x10) +#define AHB_PEXLBASE(i) (AHB_MAPB + 8 + (i) * 0x10) +#define AHB_PEXHBASE(i) (AHB_MAPB + 12 + (i) * 0x10) + +/* At probe time, enable mapping for each endpoint, using the pdev */ +static void sta2x11_map_ep(struct pci_dev *pdev) +{ + struct sta2x11_mapping *map = sta2x11_pdev_to_mapping(pdev); + int i; + + if (!map) + return; + pci_read_config_dword(pdev, AHB_BASE(0), &map->amba_base); + + /* Configure AHB mapping */ + pci_write_config_dword(pdev, AHB_PEXLBASE(0), 0); + pci_write_config_dword(pdev, AHB_PEXHBASE(0), 0); + pci_write_config_dword(pdev, AHB_CRW(0), STA2X11_AMBA_SIZE | + AHB_CRW_WTYPE_MEM | AHB_CRW_ENABLE); + + /* Disable all the other windows */ + for (i = 1; i < STA2X11_NR_FUNCS; i++) + pci_write_config_dword(pdev, AHB_CRW(i), 0); + + dev_info(&pdev->dev, + "sta2x11: Map EP %i: AMBA address %#8x-%#8x\n", + sta2x11_pdev_to_ep(pdev), map->amba_base, + map->amba_base + STA2X11_AMBA_SIZE - 1); +} +DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, sta2x11_map_ep); + +#ifdef CONFIG_PM /* Some register values must be saved and restored */ + +static void suspend_mapping(struct pci_dev *pdev) +{ + struct sta2x11_mapping *map = sta2x11_pdev_to_mapping(pdev); + int i; + + if (!map) + return; + + if (map->is_suspended) + return; + map->is_suspended = 1; + + /* Save all window configs */ + for (i = 0; i < STA2X11_NR_FUNCS; i++) { + struct sta2x11_ahb_regs *regs = map->regs + i; + + pci_read_config_dword(pdev, AHB_BASE(i), ®s->base); + pci_read_config_dword(pdev, AHB_PEXLBASE(i), ®s->pexlbase); + pci_read_config_dword(pdev, AHB_PEXHBASE(i), ®s->pexhbase); + pci_read_config_dword(pdev, AHB_CRW(i), ®s->crw); + } +} +DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, suspend_mapping); + +static void resume_mapping(struct pci_dev *pdev) +{ + struct sta2x11_mapping *map = sta2x11_pdev_to_mapping(pdev); + int i; + + if (!map) + return; + + + if (!map->is_suspended) + goto out; + map->is_suspended = 0; + + /* Restore all window configs */ + for (i = 0; i < STA2X11_NR_FUNCS; i++) { + struct sta2x11_ahb_regs *regs = map->regs + i; + + pci_write_config_dword(pdev, AHB_BASE(i), regs->base); + pci_write_config_dword(pdev, AHB_PEXLBASE(i), regs->pexlbase); + pci_write_config_dword(pdev, AHB_PEXHBASE(i), regs->pexhbase); + pci_write_config_dword(pdev, AHB_CRW(i), regs->crw); + } +out: + pci_set_master(pdev); /* Like at boot, enable master on all devices */ +} +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_STMICRO, PCI_ANY_ID, resume_mapping); + +#endif /* CONFIG_PM */ diff --git a/kernel/arch/x86/pci/xen.c b/kernel/arch/x86/pci/xen.c new file mode 100644 index 000000000..d22f4b5bb --- /dev/null +++ b/kernel/arch/x86/pci/xen.c @@ -0,0 +1,581 @@ +/* + * Xen PCI - handle PCI (INTx) and MSI infrastructure calls for PV, HVM and + * initial domain support. We also handle the DSDT _PRT callbacks for GSI's + * used in HVM and initial domain mode (PV does not parse ACPI, so it has no + * concept of GSIs). Under PV we hook under the pnbbios API for IRQs and + * 0xcf8 PCI configuration read/write. + * + * Author: Ryan Wilson + * Konrad Rzeszutek Wilk + * Stefano Stabellini + */ +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +static int xen_pcifront_enable_irq(struct pci_dev *dev) +{ + int rc; + int share = 1; + int pirq; + u8 gsi; + + rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi); + if (rc < 0) { + dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n", + rc); + return rc; + } + /* In PV DomU the Xen PCI backend puts the PIRQ in the interrupt line.*/ + pirq = gsi; + + if (gsi < nr_legacy_irqs()) + share = 0; + + rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront"); + if (rc < 0) { + dev_warn(&dev->dev, "Xen PCI: failed to bind GSI%d (PIRQ%d) to IRQ: %d\n", + gsi, pirq, rc); + return rc; + } + + dev->irq = rc; + dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq); + return 0; +} + +#ifdef CONFIG_ACPI +static int xen_register_pirq(u32 gsi, int gsi_override, int triggering, + bool set_pirq) +{ + int rc, pirq = -1, irq = -1; + struct physdev_map_pirq map_irq; + int shareable = 0; + char *name; + + irq = xen_irq_from_gsi(gsi); + if (irq > 0) + return irq; + + if (set_pirq) + pirq = gsi; + + map_irq.domid = DOMID_SELF; + map_irq.type = MAP_PIRQ_TYPE_GSI; + map_irq.index = gsi; + map_irq.pirq = pirq; + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); + if (rc) { + printk(KERN_WARNING "xen map irq failed %d\n", rc); + return -1; + } + + if (triggering == ACPI_EDGE_SENSITIVE) { + shareable = 0; + name = "ioapic-edge"; + } else { + shareable = 1; + name = "ioapic-level"; + } + + if (gsi_override >= 0) + gsi = gsi_override; + + irq = xen_bind_pirq_gsi_to_irq(gsi, map_irq.pirq, shareable, name); + if (irq < 0) + goto out; + + printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d (gsi=%d)\n", map_irq.pirq, irq, gsi); +out: + return irq; +} + +static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, + int trigger, int polarity) +{ + if (!xen_hvm_domain()) + return -1; + + return xen_register_pirq(gsi, -1 /* no GSI override */, trigger, + false /* no mapping of GSI to PIRQ */); +} + +#ifdef CONFIG_XEN_DOM0 +static int xen_register_gsi(u32 gsi, int gsi_override, int triggering, int polarity) +{ + int rc, irq; + struct physdev_setup_gsi setup_gsi; + + if (!xen_pv_domain()) + return -1; + + printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n", + gsi, triggering, polarity); + + irq = xen_register_pirq(gsi, gsi_override, triggering, true); + + setup_gsi.gsi = gsi; + setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1); + setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + + rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi); + if (rc == -EEXIST) + printk(KERN_INFO "Already setup the GSI :%d\n", gsi); + else if (rc) { + printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n", + gsi, rc); + } + + return irq; +} + +static int acpi_register_gsi_xen(struct device *dev, u32 gsi, + int trigger, int polarity) +{ + return xen_register_gsi(gsi, -1 /* no GSI override */, trigger, polarity); +} +#endif +#endif + +#if defined(CONFIG_PCI_MSI) +#include +#include + +struct xen_pci_frontend_ops *xen_pci_frontend; +EXPORT_SYMBOL_GPL(xen_pci_frontend); + +static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, ret, i; + struct msi_desc *msidesc; + int *v; + + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + + v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL); + if (!v) + return -ENOMEM; + + if (type == PCI_CAP_ID_MSIX) + ret = xen_pci_frontend_enable_msix(dev, v, nvec); + else + ret = xen_pci_frontend_enable_msi(dev, v); + if (ret) + goto error; + i = 0; + list_for_each_entry(msidesc, &dev->msi_list, list) { + irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], + (type == PCI_CAP_ID_MSI) ? nvec : 1, + (type == PCI_CAP_ID_MSIX) ? + "pcifront-msi-x" : + "pcifront-msi", + DOMID_SELF); + if (irq < 0) { + ret = irq; + goto free; + } + i++; + } + kfree(v); + return 0; + +error: + dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n"); +free: + kfree(v); + return ret; +} + +#define XEN_PIRQ_MSI_DATA (MSI_DATA_TRIGGER_EDGE | \ + MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0)) + +static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, + struct msi_msg *msg) +{ + /* We set vector == 0 to tell the hypervisor we don't care about it, + * but we want a pirq setup instead. + * We use the dest_id field to pass the pirq that we want. */ + msg->address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(pirq); + msg->address_lo = + MSI_ADDR_BASE_LO | + MSI_ADDR_DEST_MODE_PHYSICAL | + MSI_ADDR_REDIRECTION_CPU | + MSI_ADDR_DEST_ID(pirq); + + msg->data = XEN_PIRQ_MSI_DATA; +} + +static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int irq, pirq; + struct msi_desc *msidesc; + struct msi_msg msg; + + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + + list_for_each_entry(msidesc, &dev->msi_list, list) { + __pci_read_msi_msg(msidesc, &msg); + pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | + ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); + if (msg.data != XEN_PIRQ_MSI_DATA || + xen_irq_from_pirq(pirq) < 0) { + pirq = xen_allocate_pirq_msi(dev, msidesc); + if (pirq < 0) { + irq = -ENODEV; + goto error; + } + xen_msi_compose_msg(dev, pirq, &msg); + __pci_write_msi_msg(msidesc, &msg); + dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); + } else { + dev_dbg(&dev->dev, + "xen: msi already bound to pirq=%d\n", pirq); + } + irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, + (type == PCI_CAP_ID_MSI) ? nvec : 1, + (type == PCI_CAP_ID_MSIX) ? + "msi-x" : "msi", + DOMID_SELF); + if (irq < 0) + goto error; + dev_dbg(&dev->dev, + "xen: msi --> pirq=%d --> irq=%d\n", pirq, irq); + } + return 0; + +error: + dev_err(&dev->dev, + "Xen PCI frontend has not registered MSI/MSI-X support!\n"); + return irq; +} + +#ifdef CONFIG_XEN_DOM0 +static bool __read_mostly pci_seg_supported = true; + +static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + int ret = 0; + struct msi_desc *msidesc; + + list_for_each_entry(msidesc, &dev->msi_list, list) { + struct physdev_map_pirq map_irq; + domid_t domid; + + domid = ret = xen_find_device_domain_owner(dev); + /* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED, + * hence check ret value for < 0. */ + if (ret < 0) + domid = DOMID_SELF; + + memset(&map_irq, 0, sizeof(map_irq)); + map_irq.domid = domid; + map_irq.type = MAP_PIRQ_TYPE_MSI_SEG; + map_irq.index = -1; + map_irq.pirq = -1; + map_irq.bus = dev->bus->number | + (pci_domain_nr(dev->bus) << 16); + map_irq.devfn = dev->devfn; + + if (type == PCI_CAP_ID_MSI && nvec > 1) { + map_irq.type = MAP_PIRQ_TYPE_MULTI_MSI; + map_irq.entry_nr = nvec; + } else if (type == PCI_CAP_ID_MSIX) { + int pos; + unsigned long flags; + u32 table_offset, bir; + + pos = dev->msix_cap; + pci_read_config_dword(dev, pos + PCI_MSIX_TABLE, + &table_offset); + bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR); + flags = pci_resource_flags(dev, bir); + if (!flags || (flags & IORESOURCE_UNSET)) + return -EINVAL; + + map_irq.table_base = pci_resource_start(dev, bir); + map_irq.entry_nr = msidesc->msi_attrib.entry_nr; + } + + ret = -EINVAL; + if (pci_seg_supported) + ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, + &map_irq); + if (type == PCI_CAP_ID_MSI && nvec > 1 && ret) { + /* + * If MAP_PIRQ_TYPE_MULTI_MSI is not available + * there's nothing else we can do in this case. + * Just set ret > 0 so driver can retry with + * single MSI. + */ + ret = 1; + goto out; + } + if (ret == -EINVAL && !pci_domain_nr(dev->bus)) { + map_irq.type = MAP_PIRQ_TYPE_MSI; + map_irq.index = -1; + map_irq.pirq = -1; + map_irq.bus = dev->bus->number; + ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, + &map_irq); + if (ret != -EINVAL) + pci_seg_supported = false; + } + if (ret) { + dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n", + ret, domid); + goto out; + } + + ret = xen_bind_pirq_msi_to_irq(dev, msidesc, map_irq.pirq, + (type == PCI_CAP_ID_MSI) ? nvec : 1, + (type == PCI_CAP_ID_MSIX) ? "msi-x" : "msi", + domid); + if (ret < 0) + goto out; + } + ret = 0; +out: + return ret; +} + +static void xen_initdom_restore_msi_irqs(struct pci_dev *dev) +{ + int ret = 0; + + if (pci_seg_supported) { + struct physdev_pci_device restore_ext; + + restore_ext.seg = pci_domain_nr(dev->bus); + restore_ext.bus = dev->bus->number; + restore_ext.devfn = dev->devfn; + ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi_ext, + &restore_ext); + if (ret == -ENOSYS) + pci_seg_supported = false; + WARN(ret && ret != -ENOSYS, "restore_msi_ext -> %d\n", ret); + } + if (!pci_seg_supported) { + struct physdev_restore_msi restore; + + restore.bus = dev->bus->number; + restore.devfn = dev->devfn; + ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore); + WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret); + } +} +#endif + +static void xen_teardown_msi_irqs(struct pci_dev *dev) +{ + struct msi_desc *msidesc; + + msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); + if (msidesc->msi_attrib.is_msix) + xen_pci_frontend_disable_msix(dev); + else + xen_pci_frontend_disable_msi(dev); + + /* Free the IRQ's and the msidesc using the generic code. */ + default_teardown_msi_irqs(dev); +} + +static void xen_teardown_msi_irq(unsigned int irq) +{ + xen_destroy_irq(irq); +} + +#endif + +int __init pci_xen_init(void) +{ + if (!xen_pv_domain() || xen_initial_domain()) + return -ENODEV; + + printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n"); + + pcibios_set_cache_line_size(); + + pcibios_enable_irq = xen_pcifront_enable_irq; + pcibios_disable_irq = NULL; + +#ifdef CONFIG_ACPI + /* Keep ACPI out of the picture */ + acpi_noirq = 1; +#endif + +#ifdef CONFIG_PCI_MSI + x86_msi.setup_msi_irqs = xen_setup_msi_irqs; + x86_msi.teardown_msi_irq = xen_teardown_msi_irq; + x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs; + pci_msi_ignore_mask = 1; +#endif + return 0; +} + +#ifdef CONFIG_PCI_MSI +void __init xen_msi_init(void) +{ + if (!disable_apic) { + /* + * If hardware supports (x2)APIC virtualization (as indicated + * by hypervisor's leaf 4) then we don't need to use pirqs/ + * event channels for MSI handling and instead use regular + * APIC processing + */ + uint32_t eax = cpuid_eax(xen_cpuid_base() + 4); + + if (((eax & XEN_HVM_CPUID_X2APIC_VIRT) && x2apic_mode) || + ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && cpu_has_apic)) + return; + } + + x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs; + x86_msi.teardown_msi_irq = xen_teardown_msi_irq; +} +#endif + +int __init pci_xen_hvm_init(void) +{ + if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs)) + return 0; + +#ifdef CONFIG_ACPI + /* + * We don't want to change the actual ACPI delivery model, + * just how GSIs get registered. + */ + __acpi_register_gsi = acpi_register_gsi_xen_hvm; + __acpi_unregister_gsi = NULL; +#endif + +#ifdef CONFIG_PCI_MSI + /* + * We need to wait until after x2apic is initialized + * before we can set MSI IRQ ops. + */ + x86_platform.apic_post_init = xen_msi_init; +#endif + return 0; +} + +#ifdef CONFIG_XEN_DOM0 +int __init pci_xen_initial_domain(void) +{ + int irq; + +#ifdef CONFIG_PCI_MSI + x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs; + x86_msi.teardown_msi_irq = xen_teardown_msi_irq; + x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs; + pci_msi_ignore_mask = 1; +#endif + __acpi_register_gsi = acpi_register_gsi_xen; + __acpi_unregister_gsi = NULL; + /* Pre-allocate legacy irqs */ + for (irq = 0; irq < nr_legacy_irqs(); irq++) { + int trigger, polarity; + + if (acpi_get_override_irq(irq, &trigger, &polarity) == -1) + continue; + + xen_register_pirq(irq, -1 /* no GSI override */, + trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE, + true /* Map GSI to PIRQ */); + } + if (0 == nr_ioapics) { + for (irq = 0; irq < nr_legacy_irqs(); irq++) + xen_bind_pirq_gsi_to_irq(irq, irq, 0, "xt-pic"); + } + return 0; +} + +struct xen_device_domain_owner { + domid_t domain; + struct pci_dev *dev; + struct list_head list; +}; + +static DEFINE_SPINLOCK(dev_domain_list_spinlock); +static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list); + +static struct xen_device_domain_owner *find_device(struct pci_dev *dev) +{ + struct xen_device_domain_owner *owner; + + list_for_each_entry(owner, &dev_domain_list, list) { + if (owner->dev == dev) + return owner; + } + return NULL; +} + +int xen_find_device_domain_owner(struct pci_dev *dev) +{ + struct xen_device_domain_owner *owner; + int domain = -ENODEV; + + spin_lock(&dev_domain_list_spinlock); + owner = find_device(dev); + if (owner) + domain = owner->domain; + spin_unlock(&dev_domain_list_spinlock); + return domain; +} +EXPORT_SYMBOL_GPL(xen_find_device_domain_owner); + +int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain) +{ + struct xen_device_domain_owner *owner; + + owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL); + if (!owner) + return -ENODEV; + + spin_lock(&dev_domain_list_spinlock); + if (find_device(dev)) { + spin_unlock(&dev_domain_list_spinlock); + kfree(owner); + return -EEXIST; + } + owner->domain = domain; + owner->dev = dev; + list_add_tail(&owner->list, &dev_domain_list); + spin_unlock(&dev_domain_list_spinlock); + return 0; +} +EXPORT_SYMBOL_GPL(xen_register_device_domain_owner); + +int xen_unregister_device_domain_owner(struct pci_dev *dev) +{ + struct xen_device_domain_owner *owner; + + spin_lock(&dev_domain_list_spinlock); + owner = find_device(dev); + if (!owner) { + spin_unlock(&dev_domain_list_spinlock); + return -ENODEV; + } + list_del(&owner->list); + spin_unlock(&dev_domain_list_spinlock); + kfree(owner); + return 0; +} +EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner); +#endif -- cgit 1.2.3-korg