diff options
author | RajithaY <rajithax.yerrumsetty@intel.com> | 2017-04-25 03:31:15 -0700 |
---|---|---|
committer | Rajitha Yerrumchetty <rajithax.yerrumsetty@intel.com> | 2017-05-22 06:48:08 +0000 |
commit | bb756eebdac6fd24e8919e2c43f7d2c8c4091f59 (patch) | |
tree | ca11e03542edf2d8f631efeca5e1626d211107e3 /qemu/hw/i386 | |
parent | a14b48d18a9ed03ec191cf16b162206998a895ce (diff) |
Adding qemu as a submodule of KVMFORNFV
This Patch includes the changes to add qemu as a submodule to
kvmfornfv repo and make use of the updated latest qemu for the
execution of all testcase
Change-Id: I1280af507a857675c7f81d30c95255635667bdd7
Signed-off-by:RajithaY<rajithax.yerrumsetty@intel.com>
Diffstat (limited to 'qemu/hw/i386')
-rw-r--r-- | qemu/hw/i386/Makefile.objs | 10 | ||||
-rw-r--r-- | qemu/hw/i386/acpi-build.c | 2950 | ||||
-rw-r--r-- | qemu/hw/i386/acpi-build.h | 7 | ||||
-rw-r--r-- | qemu/hw/i386/intel_iommu.c | 2057 | ||||
-rw-r--r-- | qemu/hw/i386/intel_iommu_internal.h | 391 | ||||
-rw-r--r-- | qemu/hw/i386/kvm/Makefile.objs | 1 | ||||
-rw-r--r-- | qemu/hw/i386/kvm/apic.c | 219 | ||||
-rw-r--r-- | qemu/hw/i386/kvm/clock.c | 196 | ||||
-rw-r--r-- | qemu/hw/i386/kvm/i8254.c | 337 | ||||
-rw-r--r-- | qemu/hw/i386/kvm/i8259.c | 163 | ||||
-rw-r--r-- | qemu/hw/i386/kvm/ioapic.c | 179 | ||||
-rw-r--r-- | qemu/hw/i386/kvm/pci-assign.c | 1898 | ||||
-rw-r--r-- | qemu/hw/i386/kvmvapic.c | 866 | ||||
-rw-r--r-- | qemu/hw/i386/multiboot.c | 375 | ||||
-rw-r--r-- | qemu/hw/i386/multiboot.h | 14 | ||||
-rw-r--r-- | qemu/hw/i386/pc.c | 2017 | ||||
-rw-r--r-- | qemu/hw/i386/pc_piix.c | 1062 | ||||
-rw-r--r-- | qemu/hw/i386/pc_q35.c | 318 | ||||
-rw-r--r-- | qemu/hw/i386/pc_sysfw.c | 253 | ||||
-rw-r--r-- | qemu/hw/i386/pci-assign-load-rom.c | 85 | ||||
-rw-r--r-- | qemu/hw/i386/xen/Makefile.objs | 1 | ||||
-rw-r--r-- | qemu/hw/i386/xen/xen_apic.c | 95 | ||||
-rw-r--r-- | qemu/hw/i386/xen/xen_platform.c | 455 | ||||
-rw-r--r-- | qemu/hw/i386/xen/xen_pvdevice.c | 137 |
24 files changed, 0 insertions, 14086 deletions
diff --git a/qemu/hw/i386/Makefile.objs b/qemu/hw/i386/Makefile.objs deleted file mode 100644 index b52d5b875..000000000 --- a/qemu/hw/i386/Makefile.objs +++ /dev/null @@ -1,10 +0,0 @@ -obj-$(CONFIG_KVM) += kvm/ -obj-y += multiboot.o -obj-y += pc.o pc_piix.o pc_q35.o -obj-y += pc_sysfw.o -obj-y += intel_iommu.o -obj-$(CONFIG_XEN) += ../xenpv/ xen/ - -obj-y += kvmvapic.o -obj-y += acpi-build.o -obj-y += pci-assign-load-rom.o diff --git a/qemu/hw/i386/acpi-build.c b/qemu/hw/i386/acpi-build.c deleted file mode 100644 index 64770034f..000000000 --- a/qemu/hw/i386/acpi-build.c +++ /dev/null @@ -1,2950 +0,0 @@ -/* Support for generating ACPI tables and passing them to Guests - * - * Copyright (C) 2008-2010 Kevin O'Connor <kevin@koconnor.net> - * Copyright (C) 2006 Fabrice Bellard - * Copyright (C) 2013 Red Hat Inc - * - * Author: Michael S. Tsirkin <mst@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - - * You should have received a copy of the GNU General Public License along - * with this program; if not, see <http://www.gnu.org/licenses/>. - */ - -#include "qemu/osdep.h" -#include "qapi/error.h" -#include "acpi-build.h" -#include <glib.h> -#include "qemu-common.h" -#include "qemu/bitmap.h" -#include "qemu/error-report.h" -#include "hw/pci/pci.h" -#include "qom/cpu.h" -#include "hw/i386/pc.h" -#include "target-i386/cpu.h" -#include "hw/timer/hpet.h" -#include "hw/acpi/acpi-defs.h" -#include "hw/acpi/acpi.h" -#include "hw/nvram/fw_cfg.h" -#include "hw/acpi/bios-linker-loader.h" -#include "hw/loader.h" -#include "hw/isa/isa.h" -#include "hw/block/fdc.h" -#include "hw/acpi/memory_hotplug.h" -#include "sysemu/tpm.h" -#include "hw/acpi/tpm.h" -#include "sysemu/tpm_backend.h" -#include "hw/timer/mc146818rtc_regs.h" - -/* Supported chipsets: */ -#include "hw/acpi/piix4.h" -#include "hw/acpi/pcihp.h" -#include "hw/i386/ich9.h" -#include "hw/pci/pci_bus.h" -#include "hw/pci-host/q35.h" -#include "hw/i386/intel_iommu.h" -#include "hw/timer/hpet.h" - -#include "hw/acpi/aml-build.h" - -#include "qapi/qmp/qint.h" -#include "qom/qom-qobject.h" - -/* These are used to size the ACPI tables for -M pc-i440fx-1.7 and - * -M pc-i440fx-2.0. Even if the actual amount of AML generated grows - * a little bit, there should be plenty of free space since the DSDT - * shrunk by ~1.5k between QEMU 2.0 and QEMU 2.1. - */ -#define ACPI_BUILD_LEGACY_CPU_AML_SIZE 97 -#define ACPI_BUILD_ALIGN_SIZE 0x1000 - -#define ACPI_BUILD_TABLE_SIZE 0x20000 - -/* #define DEBUG_ACPI_BUILD */ -#ifdef DEBUG_ACPI_BUILD -#define ACPI_BUILD_DPRINTF(fmt, ...) \ - do {printf("ACPI_BUILD: " fmt, ## __VA_ARGS__); } while (0) -#else -#define ACPI_BUILD_DPRINTF(fmt, ...) -#endif - -typedef struct AcpiMcfgInfo { - uint64_t mcfg_base; - uint32_t mcfg_size; -} AcpiMcfgInfo; - -typedef struct AcpiPmInfo { - bool s3_disabled; - bool s4_disabled; - bool pcihp_bridge_en; - uint8_t s4_val; - uint16_t sci_int; - uint8_t acpi_enable_cmd; - uint8_t acpi_disable_cmd; - uint32_t gpe0_blk; - uint32_t gpe0_blk_len; - uint32_t io_base; - uint16_t cpu_hp_io_base; - uint16_t cpu_hp_io_len; - uint16_t mem_hp_io_base; - uint16_t mem_hp_io_len; - uint16_t pcihp_io_base; - uint16_t pcihp_io_len; -} AcpiPmInfo; - -typedef struct AcpiMiscInfo { - bool is_piix4; - bool has_hpet; - TPMVersion tpm_version; - const unsigned char *dsdt_code; - unsigned dsdt_size; - uint16_t pvpanic_port; - uint16_t applesmc_io_base; -} AcpiMiscInfo; - -typedef struct AcpiBuildPciBusHotplugState { - GArray *device_table; - GArray *notify_table; - struct AcpiBuildPciBusHotplugState *parent; - bool pcihp_bridge_en; -} AcpiBuildPciBusHotplugState; - -static void acpi_get_pm_info(AcpiPmInfo *pm) -{ - Object *piix = piix4_pm_find(); - Object *lpc = ich9_lpc_find(); - Object *obj = NULL; - QObject *o; - - pm->cpu_hp_io_base = 0; - pm->pcihp_io_base = 0; - pm->pcihp_io_len = 0; - if (piix) { - obj = piix; - pm->cpu_hp_io_base = PIIX4_CPU_HOTPLUG_IO_BASE; - pm->pcihp_io_base = - object_property_get_int(obj, ACPI_PCIHP_IO_BASE_PROP, NULL); - pm->pcihp_io_len = - object_property_get_int(obj, ACPI_PCIHP_IO_LEN_PROP, NULL); - } - if (lpc) { - obj = lpc; - pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; - } - assert(obj); - - pm->cpu_hp_io_len = ACPI_GPE_PROC_LEN; - pm->mem_hp_io_base = ACPI_MEMORY_HOTPLUG_BASE; - pm->mem_hp_io_len = ACPI_MEMORY_HOTPLUG_IO_LEN; - - /* Fill in optional s3/s4 related properties */ - o = object_property_get_qobject(obj, ACPI_PM_PROP_S3_DISABLED, NULL); - if (o) { - pm->s3_disabled = qint_get_int(qobject_to_qint(o)); - } else { - pm->s3_disabled = false; - } - qobject_decref(o); - o = object_property_get_qobject(obj, ACPI_PM_PROP_S4_DISABLED, NULL); - if (o) { - pm->s4_disabled = qint_get_int(qobject_to_qint(o)); - } else { - pm->s4_disabled = false; - } - qobject_decref(o); - o = object_property_get_qobject(obj, ACPI_PM_PROP_S4_VAL, NULL); - if (o) { - pm->s4_val = qint_get_int(qobject_to_qint(o)); - } else { - pm->s4_val = false; - } - qobject_decref(o); - - /* Fill in mandatory properties */ - pm->sci_int = object_property_get_int(obj, ACPI_PM_PROP_SCI_INT, NULL); - - pm->acpi_enable_cmd = object_property_get_int(obj, - ACPI_PM_PROP_ACPI_ENABLE_CMD, - NULL); - pm->acpi_disable_cmd = object_property_get_int(obj, - ACPI_PM_PROP_ACPI_DISABLE_CMD, - NULL); - pm->io_base = object_property_get_int(obj, ACPI_PM_PROP_PM_IO_BASE, - NULL); - pm->gpe0_blk = object_property_get_int(obj, ACPI_PM_PROP_GPE0_BLK, - NULL); - pm->gpe0_blk_len = object_property_get_int(obj, ACPI_PM_PROP_GPE0_BLK_LEN, - NULL); - pm->pcihp_bridge_en = - object_property_get_bool(obj, "acpi-pci-hotplug-with-bridge-support", - NULL); -} - -static void acpi_get_misc_info(AcpiMiscInfo *info) -{ - Object *piix = piix4_pm_find(); - Object *lpc = ich9_lpc_find(); - assert(!!piix != !!lpc); - - if (piix) { - info->is_piix4 = true; - } - if (lpc) { - info->is_piix4 = false; - } - - info->has_hpet = hpet_find(); - info->tpm_version = tpm_get_version(); - info->pvpanic_port = pvpanic_port(); - info->applesmc_io_base = applesmc_port(); -} - -/* - * Because of the PXB hosts we cannot simply query TYPE_PCI_HOST_BRIDGE. - * On i386 arch we only have two pci hosts, so we can look only for them. - */ -static Object *acpi_get_i386_pci_host(void) -{ - PCIHostState *host; - - host = OBJECT_CHECK(PCIHostState, - object_resolve_path("/machine/i440fx", NULL), - TYPE_PCI_HOST_BRIDGE); - if (!host) { - host = OBJECT_CHECK(PCIHostState, - object_resolve_path("/machine/q35", NULL), - TYPE_PCI_HOST_BRIDGE); - } - - return OBJECT(host); -} - -static void acpi_get_pci_info(PcPciInfo *info) -{ - Object *pci_host; - - - pci_host = acpi_get_i386_pci_host(); - g_assert(pci_host); - - info->w32.begin = object_property_get_int(pci_host, - PCI_HOST_PROP_PCI_HOLE_START, - NULL); - info->w32.end = object_property_get_int(pci_host, - PCI_HOST_PROP_PCI_HOLE_END, - NULL); - info->w64.begin = object_property_get_int(pci_host, - PCI_HOST_PROP_PCI_HOLE64_START, - NULL); - info->w64.end = object_property_get_int(pci_host, - PCI_HOST_PROP_PCI_HOLE64_END, - NULL); -} - -#define ACPI_PORT_SMI_CMD 0x00b2 /* TODO: this is APM_CNT_IOPORT */ - -static void acpi_align_size(GArray *blob, unsigned align) -{ - /* Align size to multiple of given size. This reduces the chance - * we need to change size in the future (breaking cross version migration). - */ - g_array_set_size(blob, ROUND_UP(acpi_data_len(blob), align)); -} - -/* FACS */ -static void -build_facs(GArray *table_data, GArray *linker) -{ - AcpiFacsDescriptorRev1 *facs = acpi_data_push(table_data, sizeof *facs); - memcpy(&facs->signature, "FACS", 4); - facs->length = cpu_to_le32(sizeof(*facs)); -} - -/* Load chipset information in FADT */ -static void fadt_setup(AcpiFadtDescriptorRev1 *fadt, AcpiPmInfo *pm) -{ - fadt->model = 1; - fadt->reserved1 = 0; - fadt->sci_int = cpu_to_le16(pm->sci_int); - fadt->smi_cmd = cpu_to_le32(ACPI_PORT_SMI_CMD); - fadt->acpi_enable = pm->acpi_enable_cmd; - fadt->acpi_disable = pm->acpi_disable_cmd; - /* EVT, CNT, TMR offset matches hw/acpi/core.c */ - fadt->pm1a_evt_blk = cpu_to_le32(pm->io_base); - fadt->pm1a_cnt_blk = cpu_to_le32(pm->io_base + 0x04); - fadt->pm_tmr_blk = cpu_to_le32(pm->io_base + 0x08); - fadt->gpe0_blk = cpu_to_le32(pm->gpe0_blk); - /* EVT, CNT, TMR length matches hw/acpi/core.c */ - fadt->pm1_evt_len = 4; - fadt->pm1_cnt_len = 2; - fadt->pm_tmr_len = 4; - fadt->gpe0_blk_len = pm->gpe0_blk_len; - fadt->plvl2_lat = cpu_to_le16(0xfff); /* C2 state not supported */ - fadt->plvl3_lat = cpu_to_le16(0xfff); /* C3 state not supported */ - fadt->flags = cpu_to_le32((1 << ACPI_FADT_F_WBINVD) | - (1 << ACPI_FADT_F_PROC_C1) | - (1 << ACPI_FADT_F_SLP_BUTTON) | - (1 << ACPI_FADT_F_RTC_S4)); - fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_USE_PLATFORM_CLOCK); - /* APIC destination mode ("Flat Logical") has an upper limit of 8 CPUs - * For more than 8 CPUs, "Clustered Logical" mode has to be used - */ - if (max_cpus > 8) { - fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_FORCE_APIC_CLUSTER_MODEL); - } - fadt->century = RTC_CENTURY; -} - - -/* FADT */ -static void -build_fadt(GArray *table_data, GArray *linker, AcpiPmInfo *pm, - unsigned facs, unsigned dsdt, - const char *oem_id, const char *oem_table_id) -{ - AcpiFadtDescriptorRev1 *fadt = acpi_data_push(table_data, sizeof(*fadt)); - - fadt->firmware_ctrl = cpu_to_le32(facs); - /* FACS address to be filled by Guest linker */ - bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, - ACPI_BUILD_TABLE_FILE, - table_data, &fadt->firmware_ctrl, - sizeof fadt->firmware_ctrl); - - fadt->dsdt = cpu_to_le32(dsdt); - /* DSDT address to be filled by Guest linker */ - bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, - ACPI_BUILD_TABLE_FILE, - table_data, &fadt->dsdt, - sizeof fadt->dsdt); - - fadt_setup(fadt, pm); - - build_header(linker, table_data, - (void *)fadt, "FACP", sizeof(*fadt), 1, oem_id, oem_table_id); -} - -static void -build_madt(GArray *table_data, GArray *linker, PCMachineState *pcms) -{ - MachineClass *mc = MACHINE_GET_CLASS(pcms); - CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(pcms)); - int madt_start = table_data->len; - - AcpiMultipleApicTable *madt; - AcpiMadtIoApic *io_apic; - AcpiMadtIntsrcovr *intsrcovr; - AcpiMadtLocalNmi *local_nmi; - int i; - - madt = acpi_data_push(table_data, sizeof *madt); - madt->local_apic_address = cpu_to_le32(APIC_DEFAULT_ADDRESS); - madt->flags = cpu_to_le32(1); - - for (i = 0; i < apic_ids->len; i++) { - AcpiMadtProcessorApic *apic = acpi_data_push(table_data, sizeof *apic); - int apic_id = apic_ids->cpus[i].arch_id; - - apic->type = ACPI_APIC_PROCESSOR; - apic->length = sizeof(*apic); - apic->processor_id = apic_id; - apic->local_apic_id = apic_id; - if (apic_ids->cpus[i].cpu != NULL) { - apic->flags = cpu_to_le32(1); - } else { - /* ACPI spec says that LAPIC entry for non present - * CPU may be omitted from MADT or it must be marked - * as disabled. However omitting non present CPU from - * MADT breaks hotplug on linux. So possible CPUs - * should be put in MADT but kept disabled. - */ - apic->flags = cpu_to_le32(0); - } - } - g_free(apic_ids); - - io_apic = acpi_data_push(table_data, sizeof *io_apic); - io_apic->type = ACPI_APIC_IO; - io_apic->length = sizeof(*io_apic); -#define ACPI_BUILD_IOAPIC_ID 0x0 - io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID; - io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS); - io_apic->interrupt = cpu_to_le32(0); - - if (pcms->apic_xrupt_override) { - intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr); - intsrcovr->type = ACPI_APIC_XRUPT_OVERRIDE; - intsrcovr->length = sizeof(*intsrcovr); - intsrcovr->source = 0; - intsrcovr->gsi = cpu_to_le32(2); - intsrcovr->flags = cpu_to_le16(0); /* conforms to bus specifications */ - } - for (i = 1; i < 16; i++) { -#define ACPI_BUILD_PCI_IRQS ((1<<5) | (1<<9) | (1<<10) | (1<<11)) - if (!(ACPI_BUILD_PCI_IRQS & (1 << i))) { - /* No need for a INT source override structure. */ - continue; - } - intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr); - intsrcovr->type = ACPI_APIC_XRUPT_OVERRIDE; - intsrcovr->length = sizeof(*intsrcovr); - intsrcovr->source = i; - intsrcovr->gsi = cpu_to_le32(i); - intsrcovr->flags = cpu_to_le16(0xd); /* active high, level triggered */ - } - - local_nmi = acpi_data_push(table_data, sizeof *local_nmi); - local_nmi->type = ACPI_APIC_LOCAL_NMI; - local_nmi->length = sizeof(*local_nmi); - local_nmi->processor_id = 0xff; /* all processors */ - local_nmi->flags = cpu_to_le16(0); - local_nmi->lint = 1; /* ACPI_LINT1 */ - - build_header(linker, table_data, - (void *)(table_data->data + madt_start), "APIC", - table_data->len - madt_start, 1, NULL, NULL); -} - -/* Assign BSEL property to all buses. In the future, this can be changed - * to only assign to buses that support hotplug. - */ -static void *acpi_set_bsel(PCIBus *bus, void *opaque) -{ - unsigned *bsel_alloc = opaque; - unsigned *bus_bsel; - - if (qbus_is_hotpluggable(BUS(bus))) { - bus_bsel = g_malloc(sizeof *bus_bsel); - - *bus_bsel = (*bsel_alloc)++; - object_property_add_uint32_ptr(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, - bus_bsel, NULL); - } - - return bsel_alloc; -} - -static void acpi_set_pci_info(void) -{ - PCIBus *bus = find_i440fx(); /* TODO: Q35 support */ - unsigned bsel_alloc = 0; - - if (bus) { - /* Scan all PCI buses. Set property to enable acpi based hotplug. */ - pci_for_each_bus_depth_first(bus, acpi_set_bsel, NULL, &bsel_alloc); - } -} - -static void build_append_pcihp_notify_entry(Aml *method, int slot) -{ - Aml *if_ctx; - int32_t devfn = PCI_DEVFN(slot, 0); - - if_ctx = aml_if(aml_and(aml_arg(0), aml_int(0x1U << slot), NULL)); - aml_append(if_ctx, aml_notify(aml_name("S%.02X", devfn), aml_arg(1))); - aml_append(method, if_ctx); -} - -static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, - bool pcihp_bridge_en) -{ - Aml *dev, *notify_method, *method; - QObject *bsel; - PCIBus *sec; - int i; - - bsel = object_property_get_qobject(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, NULL); - if (bsel) { - int64_t bsel_val = qint_get_int(qobject_to_qint(bsel)); - - aml_append(parent_scope, aml_name_decl("BSEL", aml_int(bsel_val))); - notify_method = aml_method("DVNT", 2, AML_NOTSERIALIZED); - } - - for (i = 0; i < ARRAY_SIZE(bus->devices); i += PCI_FUNC_MAX) { - DeviceClass *dc; - PCIDeviceClass *pc; - PCIDevice *pdev = bus->devices[i]; - int slot = PCI_SLOT(i); - bool hotplug_enabled_dev; - bool bridge_in_acpi; - - if (!pdev) { - if (bsel) { /* add hotplug slots for non present devices */ - dev = aml_device("S%.02X", PCI_DEVFN(slot, 0)); - aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); - aml_append(dev, aml_name_decl("_ADR", aml_int(slot << 16))); - method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); - aml_append(method, - aml_call2("PCEJ", aml_name("BSEL"), aml_name("_SUN")) - ); - aml_append(dev, method); - aml_append(parent_scope, dev); - - build_append_pcihp_notify_entry(notify_method, slot); - } - continue; - } - - pc = PCI_DEVICE_GET_CLASS(pdev); - dc = DEVICE_GET_CLASS(pdev); - - /* When hotplug for bridges is enabled, bridges are - * described in ACPI separately (see build_pci_bus_end). - * In this case they aren't themselves hot-pluggable. - * Hotplugged bridges *are* hot-pluggable. - */ - bridge_in_acpi = pc->is_bridge && pcihp_bridge_en && - !DEVICE(pdev)->hotplugged; - - hotplug_enabled_dev = bsel && dc->hotpluggable && !bridge_in_acpi; - - if (pc->class_id == PCI_CLASS_BRIDGE_ISA) { - continue; - } - - /* start to compose PCI slot descriptor */ - dev = aml_device("S%.02X", PCI_DEVFN(slot, 0)); - aml_append(dev, aml_name_decl("_ADR", aml_int(slot << 16))); - - if (pc->class_id == PCI_CLASS_DISPLAY_VGA) { - /* add VGA specific AML methods */ - int s3d; - - if (object_dynamic_cast(OBJECT(pdev), "qxl-vga")) { - s3d = 3; - } else { - s3d = 0; - } - - method = aml_method("_S1D", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_int(0))); - aml_append(dev, method); - - method = aml_method("_S2D", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_int(0))); - aml_append(dev, method); - - method = aml_method("_S3D", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_int(s3d))); - aml_append(dev, method); - } else if (hotplug_enabled_dev) { - /* add _SUN/_EJ0 to make slot hotpluggable */ - aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); - - method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); - aml_append(method, - aml_call2("PCEJ", aml_name("BSEL"), aml_name("_SUN")) - ); - aml_append(dev, method); - - if (bsel) { - build_append_pcihp_notify_entry(notify_method, slot); - } - } else if (bridge_in_acpi) { - /* - * device is coldplugged bridge, - * add child device descriptions into its scope - */ - PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev)); - - build_append_pci_bus_devices(dev, sec_bus, pcihp_bridge_en); - } - /* slot descriptor has been composed, add it into parent context */ - aml_append(parent_scope, dev); - } - - if (bsel) { - aml_append(parent_scope, notify_method); - } - - /* Append PCNT method to notify about events on local and child buses. - * Add unconditionally for root since DSDT expects it. - */ - method = aml_method("PCNT", 0, AML_NOTSERIALIZED); - - /* If bus supports hotplug select it and notify about local events */ - if (bsel) { - int64_t bsel_val = qint_get_int(qobject_to_qint(bsel)); - aml_append(method, aml_store(aml_int(bsel_val), aml_name("BNUM"))); - aml_append(method, - aml_call2("DVNT", aml_name("PCIU"), aml_int(1) /* Device Check */) - ); - aml_append(method, - aml_call2("DVNT", aml_name("PCID"), aml_int(3)/* Eject Request */) - ); - } - - /* Notify about child bus events in any case */ - if (pcihp_bridge_en) { - QLIST_FOREACH(sec, &bus->child, sibling) { - int32_t devfn = sec->parent_dev->devfn; - - aml_append(method, aml_name("^S%.02X.PCNT", devfn)); - } - } - aml_append(parent_scope, method); - qobject_decref(bsel); -} - -/** - * build_prt_entry: - * @link_name: link name for PCI route entry - * - * build AML package containing a PCI route entry for @link_name - */ -static Aml *build_prt_entry(const char *link_name) -{ - Aml *a_zero = aml_int(0); - Aml *pkg = aml_package(4); - aml_append(pkg, a_zero); - aml_append(pkg, a_zero); - aml_append(pkg, aml_name("%s", link_name)); - aml_append(pkg, a_zero); - return pkg; -} - -/* - * initialize_route - Initialize the interrupt routing rule - * through a specific LINK: - * if (lnk_idx == idx) - * route using link 'link_name' - */ -static Aml *initialize_route(Aml *route, const char *link_name, - Aml *lnk_idx, int idx) -{ - Aml *if_ctx = aml_if(aml_equal(lnk_idx, aml_int(idx))); - Aml *pkg = build_prt_entry(link_name); - - aml_append(if_ctx, aml_store(pkg, route)); - - return if_ctx; -} - -/* - * build_prt - Define interrupt rounting rules - * - * Returns an array of 128 routes, one for each device, - * based on device location. - * The main goal is to equaly distribute the interrupts - * over the 4 existing ACPI links (works only for i440fx). - * The hash function is (slot + pin) & 3 -> "LNK[D|A|B|C]". - * - */ -static Aml *build_prt(bool is_pci0_prt) -{ - Aml *method, *while_ctx, *pin, *res; - - method = aml_method("_PRT", 0, AML_NOTSERIALIZED); - res = aml_local(0); - pin = aml_local(1); - aml_append(method, aml_store(aml_package(128), res)); - aml_append(method, aml_store(aml_int(0), pin)); - - /* while (pin < 128) */ - while_ctx = aml_while(aml_lless(pin, aml_int(128))); - { - Aml *slot = aml_local(2); - Aml *lnk_idx = aml_local(3); - Aml *route = aml_local(4); - - /* slot = pin >> 2 */ - aml_append(while_ctx, - aml_store(aml_shiftright(pin, aml_int(2), NULL), slot)); - /* lnk_idx = (slot + pin) & 3 */ - aml_append(while_ctx, - aml_store(aml_and(aml_add(pin, slot, NULL), aml_int(3), NULL), - lnk_idx)); - - /* route[2] = "LNK[D|A|B|C]", selection based on pin % 3 */ - aml_append(while_ctx, initialize_route(route, "LNKD", lnk_idx, 0)); - if (is_pci0_prt) { - Aml *if_device_1, *if_pin_4, *else_pin_4; - - /* device 1 is the power-management device, needs SCI */ - if_device_1 = aml_if(aml_equal(lnk_idx, aml_int(1))); - { - if_pin_4 = aml_if(aml_equal(pin, aml_int(4))); - { - aml_append(if_pin_4, - aml_store(build_prt_entry("LNKS"), route)); - } - aml_append(if_device_1, if_pin_4); - else_pin_4 = aml_else(); - { - aml_append(else_pin_4, - aml_store(build_prt_entry("LNKA"), route)); - } - aml_append(if_device_1, else_pin_4); - } - aml_append(while_ctx, if_device_1); - } else { - aml_append(while_ctx, initialize_route(route, "LNKA", lnk_idx, 1)); - } - aml_append(while_ctx, initialize_route(route, "LNKB", lnk_idx, 2)); - aml_append(while_ctx, initialize_route(route, "LNKC", lnk_idx, 3)); - - /* route[0] = 0x[slot]FFFF */ - aml_append(while_ctx, - aml_store(aml_or(aml_shiftleft(slot, aml_int(16)), aml_int(0xFFFF), - NULL), - aml_index(route, aml_int(0)))); - /* route[1] = pin & 3 */ - aml_append(while_ctx, - aml_store(aml_and(pin, aml_int(3), NULL), - aml_index(route, aml_int(1)))); - /* res[pin] = route */ - aml_append(while_ctx, aml_store(route, aml_index(res, pin))); - /* pin++ */ - aml_append(while_ctx, aml_increment(pin)); - } - aml_append(method, while_ctx); - /* return res*/ - aml_append(method, aml_return(res)); - - return method; -} - -typedef struct CrsRangeEntry { - uint64_t base; - uint64_t limit; -} CrsRangeEntry; - -static void crs_range_insert(GPtrArray *ranges, uint64_t base, uint64_t limit) -{ - CrsRangeEntry *entry; - - entry = g_malloc(sizeof(*entry)); - entry->base = base; - entry->limit = limit; - - g_ptr_array_add(ranges, entry); -} - -static void crs_range_free(gpointer data) -{ - CrsRangeEntry *entry = (CrsRangeEntry *)data; - g_free(entry); -} - -static gint crs_range_compare(gconstpointer a, gconstpointer b) -{ - CrsRangeEntry *entry_a = *(CrsRangeEntry **)a; - CrsRangeEntry *entry_b = *(CrsRangeEntry **)b; - - return (int64_t)entry_a->base - (int64_t)entry_b->base; -} - -/* - * crs_replace_with_free_ranges - given the 'used' ranges within [start - end] - * interval, computes the 'free' ranges from the same interval. - * Example: If the input array is { [a1 - a2],[b1 - b2] }, the function - * will return { [base - a1], [a2 - b1], [b2 - limit] }. - */ -static void crs_replace_with_free_ranges(GPtrArray *ranges, - uint64_t start, uint64_t end) -{ - GPtrArray *free_ranges = g_ptr_array_new_with_free_func(crs_range_free); - uint64_t free_base = start; - int i; - - g_ptr_array_sort(ranges, crs_range_compare); - for (i = 0; i < ranges->len; i++) { - CrsRangeEntry *used = g_ptr_array_index(ranges, i); - - if (free_base < used->base) { - crs_range_insert(free_ranges, free_base, used->base - 1); - } - - free_base = used->limit + 1; - } - - if (free_base < end) { - crs_range_insert(free_ranges, free_base, end); - } - - g_ptr_array_set_size(ranges, 0); - for (i = 0; i < free_ranges->len; i++) { - g_ptr_array_add(ranges, g_ptr_array_index(free_ranges, i)); - } - - g_ptr_array_free(free_ranges, false); -} - -/* - * crs_range_merge - merges adjacent ranges in the given array. - * Array elements are deleted and replaced with the merged ranges. - */ -static void crs_range_merge(GPtrArray *range) -{ - GPtrArray *tmp = g_ptr_array_new_with_free_func(crs_range_free); - CrsRangeEntry *entry; - uint64_t range_base, range_limit; - int i; - - if (!range->len) { - return; - } - - g_ptr_array_sort(range, crs_range_compare); - - entry = g_ptr_array_index(range, 0); - range_base = entry->base; - range_limit = entry->limit; - for (i = 1; i < range->len; i++) { - entry = g_ptr_array_index(range, i); - if (entry->base - 1 == range_limit) { - range_limit = entry->limit; - } else { - crs_range_insert(tmp, range_base, range_limit); - range_base = entry->base; - range_limit = entry->limit; - } - } - crs_range_insert(tmp, range_base, range_limit); - - g_ptr_array_set_size(range, 0); - for (i = 0; i < tmp->len; i++) { - entry = g_ptr_array_index(tmp, i); - crs_range_insert(range, entry->base, entry->limit); - } - g_ptr_array_free(tmp, true); -} - -static Aml *build_crs(PCIHostState *host, - GPtrArray *io_ranges, GPtrArray *mem_ranges) -{ - Aml *crs = aml_resource_template(); - GPtrArray *host_io_ranges = g_ptr_array_new_with_free_func(crs_range_free); - GPtrArray *host_mem_ranges = g_ptr_array_new_with_free_func(crs_range_free); - CrsRangeEntry *entry; - uint8_t max_bus = pci_bus_num(host->bus); - uint8_t type; - int devfn; - int i; - - for (devfn = 0; devfn < ARRAY_SIZE(host->bus->devices); devfn++) { - uint64_t range_base, range_limit; - PCIDevice *dev = host->bus->devices[devfn]; - - if (!dev) { - continue; - } - - for (i = 0; i < PCI_NUM_REGIONS; i++) { - PCIIORegion *r = &dev->io_regions[i]; - - range_base = r->addr; - range_limit = r->addr + r->size - 1; - - /* - * Work-around for old bioses - * that do not support multiple root buses - */ - if (!range_base || range_base > range_limit) { - continue; - } - - if (r->type & PCI_BASE_ADDRESS_SPACE_IO) { - crs_range_insert(host_io_ranges, range_base, range_limit); - } else { /* "memory" */ - crs_range_insert(host_mem_ranges, range_base, range_limit); - } - } - - type = dev->config[PCI_HEADER_TYPE] & ~PCI_HEADER_TYPE_MULTI_FUNCTION; - if (type == PCI_HEADER_TYPE_BRIDGE) { - uint8_t subordinate = dev->config[PCI_SUBORDINATE_BUS]; - if (subordinate > max_bus) { - max_bus = subordinate; - } - - range_base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_IO); - range_limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_IO); - - /* - * Work-around for old bioses - * that do not support multiple root buses - */ - if (range_base && range_base <= range_limit) { - crs_range_insert(host_io_ranges, range_base, range_limit); - } - - range_base = - pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_MEMORY); - range_limit = - pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_MEMORY); - - /* - * Work-around for old bioses - * that do not support multiple root buses - */ - if (range_base && range_base <= range_limit) { - crs_range_insert(host_mem_ranges, range_base, range_limit); - } - - range_base = - pci_bridge_get_base(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); - range_limit = - pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); - - /* - * Work-around for old bioses - * that do not support multiple root buses - */ - if (range_base && range_base <= range_limit) { - crs_range_insert(host_mem_ranges, range_base, range_limit); - } - } - } - - crs_range_merge(host_io_ranges); - for (i = 0; i < host_io_ranges->len; i++) { - entry = g_ptr_array_index(host_io_ranges, i); - aml_append(crs, - aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, - AML_POS_DECODE, AML_ENTIRE_RANGE, - 0, entry->base, entry->limit, 0, - entry->limit - entry->base + 1)); - crs_range_insert(io_ranges, entry->base, entry->limit); - } - g_ptr_array_free(host_io_ranges, true); - - crs_range_merge(host_mem_ranges); - for (i = 0; i < host_mem_ranges->len; i++) { - entry = g_ptr_array_index(host_mem_ranges, i); - aml_append(crs, - aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, - AML_MAX_FIXED, AML_NON_CACHEABLE, - AML_READ_WRITE, - 0, entry->base, entry->limit, 0, - entry->limit - entry->base + 1)); - crs_range_insert(mem_ranges, entry->base, entry->limit); - } - g_ptr_array_free(host_mem_ranges, true); - - aml_append(crs, - aml_word_bus_number(AML_MIN_FIXED, AML_MAX_FIXED, AML_POS_DECODE, - 0, - pci_bus_num(host->bus), - max_bus, - 0, - max_bus - pci_bus_num(host->bus) + 1)); - - return crs; -} - -static void build_processor_devices(Aml *sb_scope, MachineState *machine, - AcpiPmInfo *pm) -{ - int i, apic_idx; - Aml *dev; - Aml *crs; - Aml *pkg; - Aml *field; - Aml *ifctx; - Aml *method; - MachineClass *mc = MACHINE_GET_CLASS(machine); - CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); - PCMachineState *pcms = PC_MACHINE(machine); - - /* The current AML generator can cover the APIC ID range [0..255], - * inclusive, for VCPU hotplug. */ - QEMU_BUILD_BUG_ON(ACPI_CPU_HOTPLUG_ID_LIMIT > 256); - g_assert(pcms->apic_id_limit <= ACPI_CPU_HOTPLUG_ID_LIMIT); - - /* create PCI0.PRES device and its _CRS to reserve CPU hotplug MMIO */ - dev = aml_device("PCI0." stringify(CPU_HOTPLUG_RESOURCE_DEVICE)); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A06"))); - aml_append(dev, - aml_name_decl("_UID", aml_string("CPU Hotplug resources")) - ); - /* device present, functioning, decoding, not shown in UI */ - aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); - crs = aml_resource_template(); - aml_append(crs, - aml_io(AML_DECODE16, pm->cpu_hp_io_base, pm->cpu_hp_io_base, 1, - pm->cpu_hp_io_len) - ); - aml_append(dev, aml_name_decl("_CRS", crs)); - aml_append(sb_scope, dev); - /* declare CPU hotplug MMIO region and PRS field to access it */ - aml_append(sb_scope, aml_operation_region( - "PRST", AML_SYSTEM_IO, aml_int(pm->cpu_hp_io_base), pm->cpu_hp_io_len)); - field = aml_field("PRST", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE); - aml_append(field, aml_named_field("PRS", 256)); - aml_append(sb_scope, field); - - /* build Processor object for each processor */ - for (i = 0; i < apic_ids->len; i++) { - int apic_id = apic_ids->cpus[i].arch_id; - - assert(apic_id < ACPI_CPU_HOTPLUG_ID_LIMIT); - - dev = aml_processor(apic_id, 0, 0, "CP%.02X", apic_id); - - method = aml_method("_MAT", 0, AML_NOTSERIALIZED); - aml_append(method, - aml_return(aml_call1(CPU_MAT_METHOD, aml_int(apic_id)))); - aml_append(dev, method); - - method = aml_method("_STA", 0, AML_NOTSERIALIZED); - aml_append(method, - aml_return(aml_call1(CPU_STATUS_METHOD, aml_int(apic_id)))); - aml_append(dev, method); - - method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); - aml_append(method, - aml_return(aml_call2(CPU_EJECT_METHOD, aml_int(apic_id), - aml_arg(0))) - ); - aml_append(dev, method); - - aml_append(sb_scope, dev); - } - - /* build this code: - * Method(NTFY, 2) {If (LEqual(Arg0, 0x00)) {Notify(CP00, Arg1)} ...} - */ - /* Arg0 = Processor ID = APIC ID */ - method = aml_method(AML_NOTIFY_METHOD, 2, AML_NOTSERIALIZED); - for (i = 0; i < apic_ids->len; i++) { - int apic_id = apic_ids->cpus[i].arch_id; - - ifctx = aml_if(aml_equal(aml_arg(0), aml_int(apic_id))); - aml_append(ifctx, - aml_notify(aml_name("CP%.02X", apic_id), aml_arg(1)) - ); - aml_append(method, ifctx); - } - aml_append(sb_scope, method); - - /* build "Name(CPON, Package() { One, One, ..., Zero, Zero, ... })" - * - * Note: The ability to create variable-sized packages was first - * introduced in ACPI 2.0. ACPI 1.0 only allowed fixed-size packages - * ith up to 255 elements. Windows guests up to win2k8 fail when - * VarPackageOp is used. - */ - pkg = pcms->apic_id_limit <= 255 ? aml_package(pcms->apic_id_limit) : - aml_varpackage(pcms->apic_id_limit); - - for (i = 0, apic_idx = 0; i < apic_ids->len; i++) { - int apic_id = apic_ids->cpus[i].arch_id; - - for (; apic_idx < apic_id; apic_idx++) { - aml_append(pkg, aml_int(0)); - } - aml_append(pkg, aml_int(apic_ids->cpus[i].cpu ? 1 : 0)); - apic_idx = apic_id + 1; - } - aml_append(sb_scope, aml_name_decl(CPU_ON_BITMAP, pkg)); - g_free(apic_ids); -} - -static void build_memory_devices(Aml *sb_scope, int nr_mem, - uint16_t io_base, uint16_t io_len) -{ - int i; - Aml *scope; - Aml *crs; - Aml *field; - Aml *dev; - Aml *method; - Aml *ifctx; - - /* build memory devices */ - assert(nr_mem <= ACPI_MAX_RAM_SLOTS); - scope = aml_scope("\\_SB.PCI0." MEMORY_HOTPLUG_DEVICE); - aml_append(scope, - aml_name_decl(MEMORY_SLOTS_NUMBER, aml_int(nr_mem)) - ); - - crs = aml_resource_template(); - aml_append(crs, - aml_io(AML_DECODE16, io_base, io_base, 0, io_len) - ); - aml_append(scope, aml_name_decl("_CRS", crs)); - - aml_append(scope, aml_operation_region( - MEMORY_HOTPLUG_IO_REGION, AML_SYSTEM_IO, - aml_int(io_base), io_len) - ); - - field = aml_field(MEMORY_HOTPLUG_IO_REGION, AML_DWORD_ACC, - AML_NOLOCK, AML_PRESERVE); - aml_append(field, /* read only */ - aml_named_field(MEMORY_SLOT_ADDR_LOW, 32)); - aml_append(field, /* read only */ - aml_named_field(MEMORY_SLOT_ADDR_HIGH, 32)); - aml_append(field, /* read only */ - aml_named_field(MEMORY_SLOT_SIZE_LOW, 32)); - aml_append(field, /* read only */ - aml_named_field(MEMORY_SLOT_SIZE_HIGH, 32)); - aml_append(field, /* read only */ - aml_named_field(MEMORY_SLOT_PROXIMITY, 32)); - aml_append(scope, field); - - field = aml_field(MEMORY_HOTPLUG_IO_REGION, AML_BYTE_ACC, - AML_NOLOCK, AML_WRITE_AS_ZEROS); - aml_append(field, aml_reserved_field(160 /* bits, Offset(20) */)); - aml_append(field, /* 1 if enabled, read only */ - aml_named_field(MEMORY_SLOT_ENABLED, 1)); - aml_append(field, - /*(read) 1 if has a insert event. (write) 1 to clear event */ - aml_named_field(MEMORY_SLOT_INSERT_EVENT, 1)); - aml_append(field, - /* (read) 1 if has a remove event. (write) 1 to clear event */ - aml_named_field(MEMORY_SLOT_REMOVE_EVENT, 1)); - aml_append(field, - /* initiates device eject, write only */ - aml_named_field(MEMORY_SLOT_EJECT, 1)); - aml_append(scope, field); - - field = aml_field(MEMORY_HOTPLUG_IO_REGION, AML_DWORD_ACC, - AML_NOLOCK, AML_PRESERVE); - aml_append(field, /* DIMM selector, write only */ - aml_named_field(MEMORY_SLOT_SLECTOR, 32)); - aml_append(field, /* _OST event code, write only */ - aml_named_field(MEMORY_SLOT_OST_EVENT, 32)); - aml_append(field, /* _OST status code, write only */ - aml_named_field(MEMORY_SLOT_OST_STATUS, 32)); - aml_append(scope, field); - aml_append(sb_scope, scope); - - for (i = 0; i < nr_mem; i++) { - #define BASEPATH "\\_SB.PCI0." MEMORY_HOTPLUG_DEVICE "." - const char *s; - - dev = aml_device("MP%02X", i); - aml_append(dev, aml_name_decl("_UID", aml_string("0x%02X", i))); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0C80"))); - - method = aml_method("_CRS", 0, AML_NOTSERIALIZED); - s = BASEPATH MEMORY_SLOT_CRS_METHOD; - aml_append(method, aml_return(aml_call1(s, aml_name("_UID")))); - aml_append(dev, method); - - method = aml_method("_STA", 0, AML_NOTSERIALIZED); - s = BASEPATH MEMORY_SLOT_STATUS_METHOD; - aml_append(method, aml_return(aml_call1(s, aml_name("_UID")))); - aml_append(dev, method); - - method = aml_method("_PXM", 0, AML_NOTSERIALIZED); - s = BASEPATH MEMORY_SLOT_PROXIMITY_METHOD; - aml_append(method, aml_return(aml_call1(s, aml_name("_UID")))); - aml_append(dev, method); - - method = aml_method("_OST", 3, AML_NOTSERIALIZED); - s = BASEPATH MEMORY_SLOT_OST_METHOD; - - aml_append(method, aml_return(aml_call4( - s, aml_name("_UID"), aml_arg(0), aml_arg(1), aml_arg(2) - ))); - aml_append(dev, method); - - method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); - s = BASEPATH MEMORY_SLOT_EJECT_METHOD; - aml_append(method, aml_return(aml_call2( - s, aml_name("_UID"), aml_arg(0)))); - aml_append(dev, method); - - aml_append(sb_scope, dev); - } - - /* build Method(MEMORY_SLOT_NOTIFY_METHOD, 2) { - * If (LEqual(Arg0, 0x00)) {Notify(MP00, Arg1)} ... } - */ - method = aml_method(MEMORY_SLOT_NOTIFY_METHOD, 2, AML_NOTSERIALIZED); - for (i = 0; i < nr_mem; i++) { - ifctx = aml_if(aml_equal(aml_arg(0), aml_int(i))); - aml_append(ifctx, - aml_notify(aml_name("MP%.02X", i), aml_arg(1)) - ); - aml_append(method, ifctx); - } - aml_append(sb_scope, method); -} - -static void build_hpet_aml(Aml *table) -{ - Aml *crs; - Aml *field; - Aml *method; - Aml *if_ctx; - Aml *scope = aml_scope("_SB"); - Aml *dev = aml_device("HPET"); - Aml *zero = aml_int(0); - Aml *id = aml_local(0); - Aml *period = aml_local(1); - - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0103"))); - aml_append(dev, aml_name_decl("_UID", zero)); - - aml_append(dev, - aml_operation_region("HPTM", AML_SYSTEM_MEMORY, aml_int(HPET_BASE), - HPET_LEN)); - field = aml_field("HPTM", AML_DWORD_ACC, AML_LOCK, AML_PRESERVE); - aml_append(field, aml_named_field("VEND", 32)); - aml_append(field, aml_named_field("PRD", 32)); - aml_append(dev, field); - - method = aml_method("_STA", 0, AML_NOTSERIALIZED); - aml_append(method, aml_store(aml_name("VEND"), id)); - aml_append(method, aml_store(aml_name("PRD"), period)); - aml_append(method, aml_shiftright(id, aml_int(16), id)); - if_ctx = aml_if(aml_lor(aml_equal(id, zero), - aml_equal(id, aml_int(0xffff)))); - { - aml_append(if_ctx, aml_return(zero)); - } - aml_append(method, if_ctx); - - if_ctx = aml_if(aml_lor(aml_equal(period, zero), - aml_lgreater(period, aml_int(100000000)))); - { - aml_append(if_ctx, aml_return(zero)); - } - aml_append(method, if_ctx); - - aml_append(method, aml_return(aml_int(0x0F))); - aml_append(dev, method); - - crs = aml_resource_template(); - aml_append(crs, aml_memory32_fixed(HPET_BASE, HPET_LEN, AML_READ_ONLY)); - aml_append(dev, aml_name_decl("_CRS", crs)); - - aml_append(scope, dev); - aml_append(table, scope); -} - -static Aml *build_fdinfo_aml(int idx, FloppyDriveType type) -{ - Aml *dev, *fdi; - uint8_t maxc, maxh, maxs; - - isa_fdc_get_drive_max_chs(type, &maxc, &maxh, &maxs); - - dev = aml_device("FLP%c", 'A' + idx); - - aml_append(dev, aml_name_decl("_ADR", aml_int(idx))); - - fdi = aml_package(16); - aml_append(fdi, aml_int(idx)); /* Drive Number */ - aml_append(fdi, - aml_int(cmos_get_fd_drive_type(type))); /* Device Type */ - /* - * the values below are the limits of the drive, and are thus independent - * of the inserted media - */ - aml_append(fdi, aml_int(maxc)); /* Maximum Cylinder Number */ - aml_append(fdi, aml_int(maxs)); /* Maximum Sector Number */ - aml_append(fdi, aml_int(maxh)); /* Maximum Head Number */ - /* - * SeaBIOS returns the below values for int 0x13 func 0x08 regardless of - * the drive type, so shall we - */ - aml_append(fdi, aml_int(0xAF)); /* disk_specify_1 */ - aml_append(fdi, aml_int(0x02)); /* disk_specify_2 */ - aml_append(fdi, aml_int(0x25)); /* disk_motor_wait */ - aml_append(fdi, aml_int(0x02)); /* disk_sector_siz */ - aml_append(fdi, aml_int(0x12)); /* disk_eot */ - aml_append(fdi, aml_int(0x1B)); /* disk_rw_gap */ - aml_append(fdi, aml_int(0xFF)); /* disk_dtl */ - aml_append(fdi, aml_int(0x6C)); /* disk_formt_gap */ - aml_append(fdi, aml_int(0xF6)); /* disk_fill */ - aml_append(fdi, aml_int(0x0F)); /* disk_head_sttl */ - aml_append(fdi, aml_int(0x08)); /* disk_motor_strt */ - - aml_append(dev, aml_name_decl("_FDI", fdi)); - return dev; -} - -static Aml *build_fdc_device_aml(ISADevice *fdc) -{ - int i; - Aml *dev; - Aml *crs; - -#define ACPI_FDE_MAX_FD 4 - uint32_t fde_buf[5] = { - 0, 0, 0, 0, /* presence of floppy drives #0 - #3 */ - cpu_to_le32(2) /* tape presence (2 == never present) */ - }; - - dev = aml_device("FDC0"); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0700"))); - - crs = aml_resource_template(); - aml_append(crs, aml_io(AML_DECODE16, 0x03F2, 0x03F2, 0x00, 0x04)); - aml_append(crs, aml_io(AML_DECODE16, 0x03F7, 0x03F7, 0x00, 0x01)); - aml_append(crs, aml_irq_no_flags(6)); - aml_append(crs, - aml_dma(AML_COMPATIBILITY, AML_NOTBUSMASTER, AML_TRANSFER8, 2)); - aml_append(dev, aml_name_decl("_CRS", crs)); - - for (i = 0; i < MIN(MAX_FD, ACPI_FDE_MAX_FD); i++) { - FloppyDriveType type = isa_fdc_get_drive_type(fdc, i); - - if (type < FLOPPY_DRIVE_TYPE_NONE) { - fde_buf[i] = cpu_to_le32(1); /* drive present */ - aml_append(dev, build_fdinfo_aml(i, type)); - } - } - aml_append(dev, aml_name_decl("_FDE", - aml_buffer(sizeof(fde_buf), (uint8_t *)fde_buf))); - - return dev; -} - -static Aml *build_rtc_device_aml(void) -{ - Aml *dev; - Aml *crs; - - dev = aml_device("RTC"); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0B00"))); - crs = aml_resource_template(); - aml_append(crs, aml_io(AML_DECODE16, 0x0070, 0x0070, 0x10, 0x02)); - aml_append(crs, aml_irq_no_flags(8)); - aml_append(crs, aml_io(AML_DECODE16, 0x0072, 0x0072, 0x02, 0x06)); - aml_append(dev, aml_name_decl("_CRS", crs)); - - return dev; -} - -static Aml *build_kbd_device_aml(void) -{ - Aml *dev; - Aml *crs; - Aml *method; - - dev = aml_device("KBD"); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0303"))); - - method = aml_method("_STA", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_int(0x0f))); - aml_append(dev, method); - - crs = aml_resource_template(); - aml_append(crs, aml_io(AML_DECODE16, 0x0060, 0x0060, 0x01, 0x01)); - aml_append(crs, aml_io(AML_DECODE16, 0x0064, 0x0064, 0x01, 0x01)); - aml_append(crs, aml_irq_no_flags(1)); - aml_append(dev, aml_name_decl("_CRS", crs)); - - return dev; -} - -static Aml *build_mouse_device_aml(void) -{ - Aml *dev; - Aml *crs; - Aml *method; - - dev = aml_device("MOU"); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0F13"))); - - method = aml_method("_STA", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_int(0x0f))); - aml_append(dev, method); - - crs = aml_resource_template(); - aml_append(crs, aml_irq_no_flags(12)); - aml_append(dev, aml_name_decl("_CRS", crs)); - - return dev; -} - -static Aml *build_lpt_device_aml(void) -{ - Aml *dev; - Aml *crs; - Aml *method; - Aml *if_ctx; - Aml *else_ctx; - Aml *zero = aml_int(0); - Aml *is_present = aml_local(0); - - dev = aml_device("LPT"); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0400"))); - - method = aml_method("_STA", 0, AML_NOTSERIALIZED); - aml_append(method, aml_store(aml_name("LPEN"), is_present)); - if_ctx = aml_if(aml_equal(is_present, zero)); - { - aml_append(if_ctx, aml_return(aml_int(0x00))); - } - aml_append(method, if_ctx); - else_ctx = aml_else(); - { - aml_append(else_ctx, aml_return(aml_int(0x0f))); - } - aml_append(method, else_ctx); - aml_append(dev, method); - - crs = aml_resource_template(); - aml_append(crs, aml_io(AML_DECODE16, 0x0378, 0x0378, 0x08, 0x08)); - aml_append(crs, aml_irq_no_flags(7)); - aml_append(dev, aml_name_decl("_CRS", crs)); - - return dev; -} - -static Aml *build_com_device_aml(uint8_t uid) -{ - Aml *dev; - Aml *crs; - Aml *method; - Aml *if_ctx; - Aml *else_ctx; - Aml *zero = aml_int(0); - Aml *is_present = aml_local(0); - const char *enabled_field = "CAEN"; - uint8_t irq = 4; - uint16_t io_port = 0x03F8; - - assert(uid == 1 || uid == 2); - if (uid == 2) { - enabled_field = "CBEN"; - irq = 3; - io_port = 0x02F8; - } - - dev = aml_device("COM%d", uid); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0501"))); - aml_append(dev, aml_name_decl("_UID", aml_int(uid))); - - method = aml_method("_STA", 0, AML_NOTSERIALIZED); - aml_append(method, aml_store(aml_name("%s", enabled_field), is_present)); - if_ctx = aml_if(aml_equal(is_present, zero)); - { - aml_append(if_ctx, aml_return(aml_int(0x00))); - } - aml_append(method, if_ctx); - else_ctx = aml_else(); - { - aml_append(else_ctx, aml_return(aml_int(0x0f))); - } - aml_append(method, else_ctx); - aml_append(dev, method); - - crs = aml_resource_template(); - aml_append(crs, aml_io(AML_DECODE16, io_port, io_port, 0x00, 0x08)); - aml_append(crs, aml_irq_no_flags(irq)); - aml_append(dev, aml_name_decl("_CRS", crs)); - - return dev; -} - -static void build_isa_devices_aml(Aml *table) -{ - ISADevice *fdc = pc_find_fdc0(); - - Aml *scope = aml_scope("_SB.PCI0.ISA"); - - aml_append(scope, build_rtc_device_aml()); - aml_append(scope, build_kbd_device_aml()); - aml_append(scope, build_mouse_device_aml()); - if (fdc) { - aml_append(scope, build_fdc_device_aml(fdc)); - } - aml_append(scope, build_lpt_device_aml()); - aml_append(scope, build_com_device_aml(1)); - aml_append(scope, build_com_device_aml(2)); - - aml_append(table, scope); -} - -static void build_dbg_aml(Aml *table) -{ - Aml *field; - Aml *method; - Aml *while_ctx; - Aml *scope = aml_scope("\\"); - Aml *buf = aml_local(0); - Aml *len = aml_local(1); - Aml *idx = aml_local(2); - - aml_append(scope, - aml_operation_region("DBG", AML_SYSTEM_IO, aml_int(0x0402), 0x01)); - field = aml_field("DBG", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE); - aml_append(field, aml_named_field("DBGB", 8)); - aml_append(scope, field); - - method = aml_method("DBUG", 1, AML_NOTSERIALIZED); - - aml_append(method, aml_to_hexstring(aml_arg(0), buf)); - aml_append(method, aml_to_buffer(buf, buf)); - aml_append(method, aml_subtract(aml_sizeof(buf), aml_int(1), len)); - aml_append(method, aml_store(aml_int(0), idx)); - - while_ctx = aml_while(aml_lless(idx, len)); - aml_append(while_ctx, - aml_store(aml_derefof(aml_index(buf, idx)), aml_name("DBGB"))); - aml_append(while_ctx, aml_increment(idx)); - aml_append(method, while_ctx); - - aml_append(method, aml_store(aml_int(0x0A), aml_name("DBGB"))); - aml_append(scope, method); - - aml_append(table, scope); -} - -static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg) -{ - Aml *dev; - Aml *crs; - Aml *method; - uint32_t irqs[] = {5, 10, 11}; - - dev = aml_device("%s", name); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0C0F"))); - aml_append(dev, aml_name_decl("_UID", aml_int(uid))); - - crs = aml_resource_template(); - aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH, - AML_SHARED, irqs, ARRAY_SIZE(irqs))); - aml_append(dev, aml_name_decl("_PRS", crs)); - - method = aml_method("_STA", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_call1("IQST", reg))); - aml_append(dev, method); - - method = aml_method("_DIS", 0, AML_NOTSERIALIZED); - aml_append(method, aml_or(reg, aml_int(0x80), reg)); - aml_append(dev, method); - - method = aml_method("_CRS", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_call1("IQCR", reg))); - aml_append(dev, method); - - method = aml_method("_SRS", 1, AML_NOTSERIALIZED); - aml_append(method, aml_create_dword_field(aml_arg(0), aml_int(5), "PRRI")); - aml_append(method, aml_store(aml_name("PRRI"), reg)); - aml_append(dev, method); - - return dev; - } - -static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi) -{ - Aml *dev; - Aml *crs; - Aml *method; - uint32_t irqs; - - dev = aml_device("%s", name); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0C0F"))); - aml_append(dev, aml_name_decl("_UID", aml_int(uid))); - - crs = aml_resource_template(); - irqs = gsi; - aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH, - AML_SHARED, &irqs, 1)); - aml_append(dev, aml_name_decl("_PRS", crs)); - - aml_append(dev, aml_name_decl("_CRS", crs)); - - /* - * _DIS can be no-op because the interrupt cannot be disabled. - */ - method = aml_method("_DIS", 0, AML_NOTSERIALIZED); - aml_append(dev, method); - - method = aml_method("_SRS", 1, AML_NOTSERIALIZED); - aml_append(dev, method); - - return dev; -} - -/* _CRS method - get current settings */ -static Aml *build_iqcr_method(bool is_piix4) -{ - Aml *if_ctx; - uint32_t irqs; - Aml *method = aml_method("IQCR", 1, AML_SERIALIZED); - Aml *crs = aml_resource_template(); - - irqs = 0; - aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL, - AML_ACTIVE_HIGH, AML_SHARED, &irqs, 1)); - aml_append(method, aml_name_decl("PRR0", crs)); - - aml_append(method, - aml_create_dword_field(aml_name("PRR0"), aml_int(5), "PRRI")); - - if (is_piix4) { - if_ctx = aml_if(aml_lless(aml_arg(0), aml_int(0x80))); - aml_append(if_ctx, aml_store(aml_arg(0), aml_name("PRRI"))); - aml_append(method, if_ctx); - } else { - aml_append(method, - aml_store(aml_and(aml_arg(0), aml_int(0xF), NULL), - aml_name("PRRI"))); - } - - aml_append(method, aml_return(aml_name("PRR0"))); - return method; -} - -/* _STA method - get status */ -static Aml *build_irq_status_method(void) -{ - Aml *if_ctx; - Aml *method = aml_method("IQST", 1, AML_NOTSERIALIZED); - - if_ctx = aml_if(aml_and(aml_int(0x80), aml_arg(0), NULL)); - aml_append(if_ctx, aml_return(aml_int(0x09))); - aml_append(method, if_ctx); - aml_append(method, aml_return(aml_int(0x0B))); - return method; -} - -static void build_piix4_pci0_int(Aml *table) -{ - Aml *dev; - Aml *crs; - Aml *field; - Aml *method; - uint32_t irqs; - Aml *sb_scope = aml_scope("_SB"); - Aml *pci0_scope = aml_scope("PCI0"); - - aml_append(pci0_scope, build_prt(true)); - aml_append(sb_scope, pci0_scope); - - field = aml_field("PCI0.ISA.P40C", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE); - aml_append(field, aml_named_field("PRQ0", 8)); - aml_append(field, aml_named_field("PRQ1", 8)); - aml_append(field, aml_named_field("PRQ2", 8)); - aml_append(field, aml_named_field("PRQ3", 8)); - aml_append(sb_scope, field); - - aml_append(sb_scope, build_irq_status_method()); - aml_append(sb_scope, build_iqcr_method(true)); - - aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQ0"))); - aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQ1"))); - aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQ2"))); - aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQ3"))); - - dev = aml_device("LNKS"); - { - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0C0F"))); - aml_append(dev, aml_name_decl("_UID", aml_int(4))); - - crs = aml_resource_template(); - irqs = 9; - aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL, - AML_ACTIVE_HIGH, AML_SHARED, - &irqs, 1)); - aml_append(dev, aml_name_decl("_PRS", crs)); - - /* The SCI cannot be disabled and is always attached to GSI 9, - * so these are no-ops. We only need this link to override the - * polarity to active high and match the content of the MADT. - */ - method = aml_method("_STA", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_int(0x0b))); - aml_append(dev, method); - - method = aml_method("_DIS", 0, AML_NOTSERIALIZED); - aml_append(dev, method); - - method = aml_method("_CRS", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_name("_PRS"))); - aml_append(dev, method); - - method = aml_method("_SRS", 1, AML_NOTSERIALIZED); - aml_append(dev, method); - } - aml_append(sb_scope, dev); - - aml_append(table, sb_scope); -} - -static void append_q35_prt_entry(Aml *ctx, uint32_t nr, const char *name) -{ - int i; - int head; - Aml *pkg; - char base = name[3] < 'E' ? 'A' : 'E'; - char *s = g_strdup(name); - Aml *a_nr = aml_int((nr << 16) | 0xffff); - - assert(strlen(s) == 4); - - head = name[3] - base; - for (i = 0; i < 4; i++) { - if (head + i > 3) { - head = i * -1; - } - s[3] = base + head + i; - pkg = aml_package(4); - aml_append(pkg, a_nr); - aml_append(pkg, aml_int(i)); - aml_append(pkg, aml_name("%s", s)); - aml_append(pkg, aml_int(0)); - aml_append(ctx, pkg); - } - g_free(s); -} - -static Aml *build_q35_routing_table(const char *str) -{ - int i; - Aml *pkg; - char *name = g_strdup_printf("%s ", str); - - pkg = aml_package(128); - for (i = 0; i < 0x18; i++) { - name[3] = 'E' + (i & 0x3); - append_q35_prt_entry(pkg, i, name); - } - - name[3] = 'E'; - append_q35_prt_entry(pkg, 0x18, name); - - /* INTA -> PIRQA for slot 25 - 31, see the default value of D<N>IR */ - for (i = 0x0019; i < 0x1e; i++) { - name[3] = 'A'; - append_q35_prt_entry(pkg, i, name); - } - - /* PCIe->PCI bridge. use PIRQ[E-H] */ - name[3] = 'E'; - append_q35_prt_entry(pkg, 0x1e, name); - name[3] = 'A'; - append_q35_prt_entry(pkg, 0x1f, name); - - g_free(name); - return pkg; -} - -static void build_q35_pci0_int(Aml *table) -{ - Aml *field; - Aml *method; - Aml *sb_scope = aml_scope("_SB"); - Aml *pci0_scope = aml_scope("PCI0"); - - /* Zero => PIC mode, One => APIC Mode */ - aml_append(table, aml_name_decl("PICF", aml_int(0))); - method = aml_method("_PIC", 1, AML_NOTSERIALIZED); - { - aml_append(method, aml_store(aml_arg(0), aml_name("PICF"))); - } - aml_append(table, method); - - aml_append(pci0_scope, - aml_name_decl("PRTP", build_q35_routing_table("LNK"))); - aml_append(pci0_scope, - aml_name_decl("PRTA", build_q35_routing_table("GSI"))); - - method = aml_method("_PRT", 0, AML_NOTSERIALIZED); - { - Aml *if_ctx; - Aml *else_ctx; - - /* PCI IRQ routing table, example from ACPI 2.0a specification, - section 6.2.8.1 */ - /* Note: we provide the same info as the PCI routing - table of the Bochs BIOS */ - if_ctx = aml_if(aml_equal(aml_name("PICF"), aml_int(0))); - aml_append(if_ctx, aml_return(aml_name("PRTP"))); - aml_append(method, if_ctx); - else_ctx = aml_else(); - aml_append(else_ctx, aml_return(aml_name("PRTA"))); - aml_append(method, else_ctx); - } - aml_append(pci0_scope, method); - aml_append(sb_scope, pci0_scope); - - field = aml_field("PCI0.ISA.PIRQ", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE); - aml_append(field, aml_named_field("PRQA", 8)); - aml_append(field, aml_named_field("PRQB", 8)); - aml_append(field, aml_named_field("PRQC", 8)); - aml_append(field, aml_named_field("PRQD", 8)); - aml_append(field, aml_reserved_field(0x20)); - aml_append(field, aml_named_field("PRQE", 8)); - aml_append(field, aml_named_field("PRQF", 8)); - aml_append(field, aml_named_field("PRQG", 8)); - aml_append(field, aml_named_field("PRQH", 8)); - aml_append(sb_scope, field); - - aml_append(sb_scope, build_irq_status_method()); - aml_append(sb_scope, build_iqcr_method(false)); - - aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQA"))); - aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQB"))); - aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQC"))); - aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQD"))); - aml_append(sb_scope, build_link_dev("LNKE", 4, aml_name("PRQE"))); - aml_append(sb_scope, build_link_dev("LNKF", 5, aml_name("PRQF"))); - aml_append(sb_scope, build_link_dev("LNKG", 6, aml_name("PRQG"))); - aml_append(sb_scope, build_link_dev("LNKH", 7, aml_name("PRQH"))); - - aml_append(sb_scope, build_gsi_link_dev("GSIA", 0x10, 0x10)); - aml_append(sb_scope, build_gsi_link_dev("GSIB", 0x11, 0x11)); - aml_append(sb_scope, build_gsi_link_dev("GSIC", 0x12, 0x12)); - aml_append(sb_scope, build_gsi_link_dev("GSID", 0x13, 0x13)); - aml_append(sb_scope, build_gsi_link_dev("GSIE", 0x14, 0x14)); - aml_append(sb_scope, build_gsi_link_dev("GSIF", 0x15, 0x15)); - aml_append(sb_scope, build_gsi_link_dev("GSIG", 0x16, 0x16)); - aml_append(sb_scope, build_gsi_link_dev("GSIH", 0x17, 0x17)); - - aml_append(table, sb_scope); -} - -static void build_q35_isa_bridge(Aml *table) -{ - Aml *dev; - Aml *scope; - Aml *field; - - scope = aml_scope("_SB.PCI0"); - dev = aml_device("ISA"); - aml_append(dev, aml_name_decl("_ADR", aml_int(0x001F0000))); - - /* ICH9 PCI to ISA irq remapping */ - aml_append(dev, aml_operation_region("PIRQ", AML_PCI_CONFIG, - aml_int(0x60), 0x0C)); - - aml_append(dev, aml_operation_region("LPCD", AML_PCI_CONFIG, - aml_int(0x80), 0x02)); - field = aml_field("LPCD", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE); - aml_append(field, aml_named_field("COMA", 3)); - aml_append(field, aml_reserved_field(1)); - aml_append(field, aml_named_field("COMB", 3)); - aml_append(field, aml_reserved_field(1)); - aml_append(field, aml_named_field("LPTD", 2)); - aml_append(dev, field); - - aml_append(dev, aml_operation_region("LPCE", AML_PCI_CONFIG, - aml_int(0x82), 0x02)); - /* enable bits */ - field = aml_field("LPCE", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE); - aml_append(field, aml_named_field("CAEN", 1)); - aml_append(field, aml_named_field("CBEN", 1)); - aml_append(field, aml_named_field("LPEN", 1)); - aml_append(dev, field); - - aml_append(scope, dev); - aml_append(table, scope); -} - -static void build_piix4_pm(Aml *table) -{ - Aml *dev; - Aml *scope; - - scope = aml_scope("_SB.PCI0"); - dev = aml_device("PX13"); - aml_append(dev, aml_name_decl("_ADR", aml_int(0x00010003))); - - aml_append(dev, aml_operation_region("P13C", AML_PCI_CONFIG, - aml_int(0x00), 0xff)); - aml_append(scope, dev); - aml_append(table, scope); -} - -static void build_piix4_isa_bridge(Aml *table) -{ - Aml *dev; - Aml *scope; - Aml *field; - - scope = aml_scope("_SB.PCI0"); - dev = aml_device("ISA"); - aml_append(dev, aml_name_decl("_ADR", aml_int(0x00010000))); - - /* PIIX PCI to ISA irq remapping */ - aml_append(dev, aml_operation_region("P40C", AML_PCI_CONFIG, - aml_int(0x60), 0x04)); - /* enable bits */ - field = aml_field("^PX13.P13C", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE); - /* Offset(0x5f),, 7, */ - aml_append(field, aml_reserved_field(0x2f8)); - aml_append(field, aml_reserved_field(7)); - aml_append(field, aml_named_field("LPEN", 1)); - /* Offset(0x67),, 3, */ - aml_append(field, aml_reserved_field(0x38)); - aml_append(field, aml_reserved_field(3)); - aml_append(field, aml_named_field("CAEN", 1)); - aml_append(field, aml_reserved_field(3)); - aml_append(field, aml_named_field("CBEN", 1)); - aml_append(dev, field); - - aml_append(scope, dev); - aml_append(table, scope); -} - -static void build_piix4_pci_hotplug(Aml *table) -{ - Aml *scope; - Aml *field; - Aml *method; - - scope = aml_scope("_SB.PCI0"); - - aml_append(scope, - aml_operation_region("PCST", AML_SYSTEM_IO, aml_int(0xae00), 0x08)); - field = aml_field("PCST", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); - aml_append(field, aml_named_field("PCIU", 32)); - aml_append(field, aml_named_field("PCID", 32)); - aml_append(scope, field); - - aml_append(scope, - aml_operation_region("SEJ", AML_SYSTEM_IO, aml_int(0xae08), 0x04)); - field = aml_field("SEJ", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); - aml_append(field, aml_named_field("B0EJ", 32)); - aml_append(scope, field); - - aml_append(scope, - aml_operation_region("BNMR", AML_SYSTEM_IO, aml_int(0xae10), 0x04)); - field = aml_field("BNMR", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); - aml_append(field, aml_named_field("BNUM", 32)); - aml_append(scope, field); - - aml_append(scope, aml_mutex("BLCK", 0)); - - method = aml_method("PCEJ", 2, AML_NOTSERIALIZED); - aml_append(method, aml_acquire(aml_name("BLCK"), 0xFFFF)); - aml_append(method, aml_store(aml_arg(0), aml_name("BNUM"))); - aml_append(method, - aml_store(aml_shiftleft(aml_int(1), aml_arg(1)), aml_name("B0EJ"))); - aml_append(method, aml_release(aml_name("BLCK"))); - aml_append(method, aml_return(aml_int(0))); - aml_append(scope, method); - - aml_append(table, scope); -} - -static Aml *build_q35_osc_method(void) -{ - Aml *if_ctx; - Aml *if_ctx2; - Aml *else_ctx; - Aml *method; - Aml *a_cwd1 = aml_name("CDW1"); - Aml *a_ctrl = aml_name("CTRL"); - - method = aml_method("_OSC", 4, AML_NOTSERIALIZED); - aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1")); - - if_ctx = aml_if(aml_equal( - aml_arg(0), aml_touuid("33DB4D5B-1FF7-401C-9657-7441C03DD766"))); - aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2")); - aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3")); - - aml_append(if_ctx, aml_store(aml_name("CDW2"), aml_name("SUPP"))); - aml_append(if_ctx, aml_store(aml_name("CDW3"), a_ctrl)); - - /* - * Always allow native PME, AER (no dependencies) - * Never allow SHPC (no SHPC controller in this system) - */ - aml_append(if_ctx, aml_and(a_ctrl, aml_int(0x1D), a_ctrl)); - - if_ctx2 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1)))); - /* Unknown revision */ - aml_append(if_ctx2, aml_or(a_cwd1, aml_int(0x08), a_cwd1)); - aml_append(if_ctx, if_ctx2); - - if_ctx2 = aml_if(aml_lnot(aml_equal(aml_name("CDW3"), a_ctrl))); - /* Capabilities bits were masked */ - aml_append(if_ctx2, aml_or(a_cwd1, aml_int(0x10), a_cwd1)); - aml_append(if_ctx, if_ctx2); - - /* Update DWORD3 in the buffer */ - aml_append(if_ctx, aml_store(a_ctrl, aml_name("CDW3"))); - aml_append(method, if_ctx); - - else_ctx = aml_else(); - /* Unrecognized UUID */ - aml_append(else_ctx, aml_or(a_cwd1, aml_int(4), a_cwd1)); - aml_append(method, else_ctx); - - aml_append(method, aml_return(aml_arg(3))); - return method; -} - -static void -build_dsdt(GArray *table_data, GArray *linker, - AcpiPmInfo *pm, AcpiMiscInfo *misc, - PcPciInfo *pci, MachineState *machine) -{ - CrsRangeEntry *entry; - Aml *dsdt, *sb_scope, *scope, *dev, *method, *field, *pkg, *crs; - GPtrArray *mem_ranges = g_ptr_array_new_with_free_func(crs_range_free); - GPtrArray *io_ranges = g_ptr_array_new_with_free_func(crs_range_free); - PCMachineState *pcms = PC_MACHINE(machine); - uint32_t nr_mem = machine->ram_slots; - int root_bus_limit = 0xFF; - PCIBus *bus = NULL; - int i; - - dsdt = init_aml_allocator(); - - /* Reserve space for header */ - acpi_data_push(dsdt->buf, sizeof(AcpiTableHeader)); - - build_dbg_aml(dsdt); - if (misc->is_piix4) { - sb_scope = aml_scope("_SB"); - dev = aml_device("PCI0"); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); - aml_append(dev, aml_name_decl("_ADR", aml_int(0))); - aml_append(dev, aml_name_decl("_UID", aml_int(1))); - aml_append(sb_scope, dev); - aml_append(dsdt, sb_scope); - - build_hpet_aml(dsdt); - build_piix4_pm(dsdt); - build_piix4_isa_bridge(dsdt); - build_isa_devices_aml(dsdt); - build_piix4_pci_hotplug(dsdt); - build_piix4_pci0_int(dsdt); - } else { - sb_scope = aml_scope("_SB"); - aml_append(sb_scope, - aml_operation_region("PCST", AML_SYSTEM_IO, aml_int(0xae00), 0x0c)); - aml_append(sb_scope, - aml_operation_region("PCSB", AML_SYSTEM_IO, aml_int(0xae0c), 0x01)); - field = aml_field("PCSB", AML_ANY_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); - aml_append(field, aml_named_field("PCIB", 8)); - aml_append(sb_scope, field); - aml_append(dsdt, sb_scope); - - sb_scope = aml_scope("_SB"); - dev = aml_device("PCI0"); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); - aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); - aml_append(dev, aml_name_decl("_ADR", aml_int(0))); - aml_append(dev, aml_name_decl("_UID", aml_int(1))); - aml_append(dev, aml_name_decl("SUPP", aml_int(0))); - aml_append(dev, aml_name_decl("CTRL", aml_int(0))); - aml_append(dev, build_q35_osc_method()); - aml_append(sb_scope, dev); - aml_append(dsdt, sb_scope); - - build_hpet_aml(dsdt); - build_q35_isa_bridge(dsdt); - build_isa_devices_aml(dsdt); - build_q35_pci0_int(dsdt); - } - - build_cpu_hotplug_aml(dsdt); - build_memory_hotplug_aml(dsdt, nr_mem, pm->mem_hp_io_base, - pm->mem_hp_io_len); - - scope = aml_scope("_GPE"); - { - aml_append(scope, aml_name_decl("_HID", aml_string("ACPI0006"))); - - aml_append(scope, aml_method("_L00", 0, AML_NOTSERIALIZED)); - - if (misc->is_piix4) { - method = aml_method("_E01", 0, AML_NOTSERIALIZED); - aml_append(method, - aml_acquire(aml_name("\\_SB.PCI0.BLCK"), 0xFFFF)); - aml_append(method, aml_call0("\\_SB.PCI0.PCNT")); - aml_append(method, aml_release(aml_name("\\_SB.PCI0.BLCK"))); - aml_append(scope, method); - } else { - aml_append(scope, aml_method("_L01", 0, AML_NOTSERIALIZED)); - } - - method = aml_method("_E02", 0, AML_NOTSERIALIZED); - aml_append(method, aml_call0("\\_SB." CPU_SCAN_METHOD)); - aml_append(scope, method); - - method = aml_method("_E03", 0, AML_NOTSERIALIZED); - aml_append(method, aml_call0(MEMORY_HOTPLUG_HANDLER_PATH)); - aml_append(scope, method); - - aml_append(scope, aml_method("_L04", 0, AML_NOTSERIALIZED)); - aml_append(scope, aml_method("_L05", 0, AML_NOTSERIALIZED)); - aml_append(scope, aml_method("_L06", 0, AML_NOTSERIALIZED)); - aml_append(scope, aml_method("_L07", 0, AML_NOTSERIALIZED)); - aml_append(scope, aml_method("_L08", 0, AML_NOTSERIALIZED)); - aml_append(scope, aml_method("_L09", 0, AML_NOTSERIALIZED)); - aml_append(scope, aml_method("_L0A", 0, AML_NOTSERIALIZED)); - aml_append(scope, aml_method("_L0B", 0, AML_NOTSERIALIZED)); - aml_append(scope, aml_method("_L0C", 0, AML_NOTSERIALIZED)); - aml_append(scope, aml_method("_L0D", 0, AML_NOTSERIALIZED)); - aml_append(scope, aml_method("_L0E", 0, AML_NOTSERIALIZED)); - aml_append(scope, aml_method("_L0F", 0, AML_NOTSERIALIZED)); - } - aml_append(dsdt, scope); - - bus = PC_MACHINE(machine)->bus; - if (bus) { - QLIST_FOREACH(bus, &bus->child, sibling) { - uint8_t bus_num = pci_bus_num(bus); - uint8_t numa_node = pci_bus_numa_node(bus); - - /* look only for expander root buses */ - if (!pci_bus_is_root(bus)) { - continue; - } - - if (bus_num < root_bus_limit) { - root_bus_limit = bus_num - 1; - } - - scope = aml_scope("\\_SB"); - dev = aml_device("PC%.02X", bus_num); - aml_append(dev, aml_name_decl("_UID", aml_int(bus_num))); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); - aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num))); - - if (numa_node != NUMA_NODE_UNASSIGNED) { - aml_append(dev, aml_name_decl("_PXM", aml_int(numa_node))); - } - - aml_append(dev, build_prt(false)); - crs = build_crs(PCI_HOST_BRIDGE(BUS(bus)->parent), - io_ranges, mem_ranges); - aml_append(dev, aml_name_decl("_CRS", crs)); - aml_append(scope, dev); - aml_append(dsdt, scope); - } - } - - scope = aml_scope("\\_SB.PCI0"); - /* build PCI0._CRS */ - crs = aml_resource_template(); - aml_append(crs, - aml_word_bus_number(AML_MIN_FIXED, AML_MAX_FIXED, AML_POS_DECODE, - 0x0000, 0x0, root_bus_limit, - 0x0000, root_bus_limit + 1)); - aml_append(crs, aml_io(AML_DECODE16, 0x0CF8, 0x0CF8, 0x01, 0x08)); - - aml_append(crs, - aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, - AML_POS_DECODE, AML_ENTIRE_RANGE, - 0x0000, 0x0000, 0x0CF7, 0x0000, 0x0CF8)); - - crs_replace_with_free_ranges(io_ranges, 0x0D00, 0xFFFF); - for (i = 0; i < io_ranges->len; i++) { - entry = g_ptr_array_index(io_ranges, i); - aml_append(crs, - aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, - AML_POS_DECODE, AML_ENTIRE_RANGE, - 0x0000, entry->base, entry->limit, - 0x0000, entry->limit - entry->base + 1)); - } - - aml_append(crs, - aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, - AML_CACHEABLE, AML_READ_WRITE, - 0, 0x000A0000, 0x000BFFFF, 0, 0x00020000)); - - crs_replace_with_free_ranges(mem_ranges, pci->w32.begin, pci->w32.end - 1); - for (i = 0; i < mem_ranges->len; i++) { - entry = g_ptr_array_index(mem_ranges, i); - aml_append(crs, - aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, - AML_NON_CACHEABLE, AML_READ_WRITE, - 0, entry->base, entry->limit, - 0, entry->limit - entry->base + 1)); - } - - if (pci->w64.begin) { - aml_append(crs, - aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, - AML_CACHEABLE, AML_READ_WRITE, - 0, pci->w64.begin, pci->w64.end - 1, 0, - pci->w64.end - pci->w64.begin)); - } - - if (misc->tpm_version != TPM_VERSION_UNSPEC) { - aml_append(crs, aml_memory32_fixed(TPM_TIS_ADDR_BASE, - TPM_TIS_ADDR_SIZE, AML_READ_WRITE)); - } - aml_append(scope, aml_name_decl("_CRS", crs)); - - /* reserve GPE0 block resources */ - dev = aml_device("GPE0"); - aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A06"))); - aml_append(dev, aml_name_decl("_UID", aml_string("GPE0 resources"))); - /* device present, functioning, decoding, not shown in UI */ - aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); - crs = aml_resource_template(); - aml_append(crs, - aml_io(AML_DECODE16, pm->gpe0_blk, pm->gpe0_blk, 1, pm->gpe0_blk_len) - ); - aml_append(dev, aml_name_decl("_CRS", crs)); - aml_append(scope, dev); - - g_ptr_array_free(io_ranges, true); - g_ptr_array_free(mem_ranges, true); - - /* reserve PCIHP resources */ - if (pm->pcihp_io_len) { - dev = aml_device("PHPR"); - aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A06"))); - aml_append(dev, - aml_name_decl("_UID", aml_string("PCI Hotplug resources"))); - /* device present, functioning, decoding, not shown in UI */ - aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); - crs = aml_resource_template(); - aml_append(crs, - aml_io(AML_DECODE16, pm->pcihp_io_base, pm->pcihp_io_base, 1, - pm->pcihp_io_len) - ); - aml_append(dev, aml_name_decl("_CRS", crs)); - aml_append(scope, dev); - } - aml_append(dsdt, scope); - - /* create S3_ / S4_ / S5_ packages if necessary */ - scope = aml_scope("\\"); - if (!pm->s3_disabled) { - pkg = aml_package(4); - aml_append(pkg, aml_int(1)); /* PM1a_CNT.SLP_TYP */ - aml_append(pkg, aml_int(1)); /* PM1b_CNT.SLP_TYP, FIXME: not impl. */ - aml_append(pkg, aml_int(0)); /* reserved */ - aml_append(pkg, aml_int(0)); /* reserved */ - aml_append(scope, aml_name_decl("_S3", pkg)); - } - - if (!pm->s4_disabled) { - pkg = aml_package(4); - aml_append(pkg, aml_int(pm->s4_val)); /* PM1a_CNT.SLP_TYP */ - /* PM1b_CNT.SLP_TYP, FIXME: not impl. */ - aml_append(pkg, aml_int(pm->s4_val)); - aml_append(pkg, aml_int(0)); /* reserved */ - aml_append(pkg, aml_int(0)); /* reserved */ - aml_append(scope, aml_name_decl("_S4", pkg)); - } - - pkg = aml_package(4); - aml_append(pkg, aml_int(0)); /* PM1a_CNT.SLP_TYP */ - aml_append(pkg, aml_int(0)); /* PM1b_CNT.SLP_TYP not impl. */ - aml_append(pkg, aml_int(0)); /* reserved */ - aml_append(pkg, aml_int(0)); /* reserved */ - aml_append(scope, aml_name_decl("_S5", pkg)); - aml_append(dsdt, scope); - - /* create fw_cfg node, unconditionally */ - { - /* when using port i/o, the 8-bit data register *always* overlaps - * with half of the 16-bit control register. Hence, the total size - * of the i/o region used is FW_CFG_CTL_SIZE; when using DMA, the - * DMA control register is located at FW_CFG_DMA_IO_BASE + 4 */ - uint8_t io_size = object_property_get_bool(OBJECT(pcms->fw_cfg), - "dma_enabled", NULL) ? - ROUND_UP(FW_CFG_CTL_SIZE, 4) + sizeof(dma_addr_t) : - FW_CFG_CTL_SIZE; - - scope = aml_scope("\\_SB.PCI0"); - dev = aml_device("FWCF"); - - aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0002"))); - - /* device present, functioning, decoding, not shown in UI */ - aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); - - crs = aml_resource_template(); - aml_append(crs, - aml_io(AML_DECODE16, FW_CFG_IO_BASE, FW_CFG_IO_BASE, 0x01, io_size) - ); - aml_append(dev, aml_name_decl("_CRS", crs)); - - aml_append(scope, dev); - aml_append(dsdt, scope); - } - - if (misc->applesmc_io_base) { - scope = aml_scope("\\_SB.PCI0.ISA"); - dev = aml_device("SMC"); - - aml_append(dev, aml_name_decl("_HID", aml_eisaid("APP0001"))); - /* device present, functioning, decoding, not shown in UI */ - aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); - - crs = aml_resource_template(); - aml_append(crs, - aml_io(AML_DECODE16, misc->applesmc_io_base, misc->applesmc_io_base, - 0x01, APPLESMC_MAX_DATA_LENGTH) - ); - aml_append(crs, aml_irq_no_flags(6)); - aml_append(dev, aml_name_decl("_CRS", crs)); - - aml_append(scope, dev); - aml_append(dsdt, scope); - } - - if (misc->pvpanic_port) { - scope = aml_scope("\\_SB.PCI0.ISA"); - - dev = aml_device("PEVT"); - aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0001"))); - - crs = aml_resource_template(); - aml_append(crs, - aml_io(AML_DECODE16, misc->pvpanic_port, misc->pvpanic_port, 1, 1) - ); - aml_append(dev, aml_name_decl("_CRS", crs)); - - aml_append(dev, aml_operation_region("PEOR", AML_SYSTEM_IO, - aml_int(misc->pvpanic_port), 1)); - field = aml_field("PEOR", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE); - aml_append(field, aml_named_field("PEPT", 8)); - aml_append(dev, field); - - /* device present, functioning, decoding, shown in UI */ - aml_append(dev, aml_name_decl("_STA", aml_int(0xF))); - - method = aml_method("RDPT", 0, AML_NOTSERIALIZED); - aml_append(method, aml_store(aml_name("PEPT"), aml_local(0))); - aml_append(method, aml_return(aml_local(0))); - aml_append(dev, method); - - method = aml_method("WRPT", 1, AML_NOTSERIALIZED); - aml_append(method, aml_store(aml_arg(0), aml_name("PEPT"))); - aml_append(dev, method); - - aml_append(scope, dev); - aml_append(dsdt, scope); - } - - sb_scope = aml_scope("\\_SB"); - { - build_processor_devices(sb_scope, machine, pm); - - build_memory_devices(sb_scope, nr_mem, pm->mem_hp_io_base, - pm->mem_hp_io_len); - - { - Object *pci_host; - PCIBus *bus = NULL; - - pci_host = acpi_get_i386_pci_host(); - if (pci_host) { - bus = PCI_HOST_BRIDGE(pci_host)->bus; - } - - if (bus) { - Aml *scope = aml_scope("PCI0"); - /* Scan all PCI buses. Generate tables to support hotplug. */ - build_append_pci_bus_devices(scope, bus, pm->pcihp_bridge_en); - - if (misc->tpm_version != TPM_VERSION_UNSPEC) { - dev = aml_device("ISA.TPM"); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0C31"))); - aml_append(dev, aml_name_decl("_STA", aml_int(0xF))); - crs = aml_resource_template(); - aml_append(crs, aml_memory32_fixed(TPM_TIS_ADDR_BASE, - TPM_TIS_ADDR_SIZE, AML_READ_WRITE)); - /* - FIXME: TPM_TIS_IRQ=5 conflicts with PNP0C0F irqs, - Rewrite to take IRQ from TPM device model and - fix default IRQ value there to use some unused IRQ - */ - /* aml_append(crs, aml_irq_no_flags(TPM_TIS_IRQ)); */ - aml_append(dev, aml_name_decl("_CRS", crs)); - aml_append(scope, dev); - } - - aml_append(sb_scope, scope); - } - } - aml_append(dsdt, sb_scope); - } - - /* copy AML table into ACPI tables blob and patch header there */ - g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len); - build_header(linker, table_data, - (void *)(table_data->data + table_data->len - dsdt->buf->len), - "DSDT", dsdt->buf->len, 1, NULL, NULL); - free_aml_allocator(); -} - -static void -build_hpet(GArray *table_data, GArray *linker) -{ - Acpi20Hpet *hpet; - - hpet = acpi_data_push(table_data, sizeof(*hpet)); - /* Note timer_block_id value must be kept in sync with value advertised by - * emulated hpet - */ - hpet->timer_block_id = cpu_to_le32(0x8086a201); - hpet->addr.address = cpu_to_le64(HPET_BASE); - build_header(linker, table_data, - (void *)hpet, "HPET", sizeof(*hpet), 1, NULL, NULL); -} - -static void -build_tpm_tcpa(GArray *table_data, GArray *linker, GArray *tcpalog) -{ - Acpi20Tcpa *tcpa = acpi_data_push(table_data, sizeof *tcpa); - uint64_t log_area_start_address = acpi_data_len(tcpalog); - - tcpa->platform_class = cpu_to_le16(TPM_TCPA_ACPI_CLASS_CLIENT); - tcpa->log_area_minimum_length = cpu_to_le32(TPM_LOG_AREA_MINIMUM_SIZE); - tcpa->log_area_start_address = cpu_to_le64(log_area_start_address); - - bios_linker_loader_alloc(linker, ACPI_BUILD_TPMLOG_FILE, 1, - false /* high memory */); - - /* log area start address to be filled by Guest linker */ - bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, - ACPI_BUILD_TPMLOG_FILE, - table_data, &tcpa->log_area_start_address, - sizeof(tcpa->log_area_start_address)); - - build_header(linker, table_data, - (void *)tcpa, "TCPA", sizeof(*tcpa), 2, NULL, NULL); - - acpi_data_push(tcpalog, TPM_LOG_AREA_MINIMUM_SIZE); -} - -static void -build_tpm2(GArray *table_data, GArray *linker) -{ - Acpi20TPM2 *tpm2_ptr; - - tpm2_ptr = acpi_data_push(table_data, sizeof *tpm2_ptr); - - tpm2_ptr->platform_class = cpu_to_le16(TPM2_ACPI_CLASS_CLIENT); - tpm2_ptr->control_area_address = cpu_to_le64(0); - tpm2_ptr->start_method = cpu_to_le32(TPM2_START_METHOD_MMIO); - - build_header(linker, table_data, - (void *)tpm2_ptr, "TPM2", sizeof(*tpm2_ptr), 4, NULL, NULL); -} - -typedef enum { - MEM_AFFINITY_NOFLAGS = 0, - MEM_AFFINITY_ENABLED = (1 << 0), - MEM_AFFINITY_HOTPLUGGABLE = (1 << 1), - MEM_AFFINITY_NON_VOLATILE = (1 << 2), -} MemoryAffinityFlags; - -static void -acpi_build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, - uint64_t len, int node, MemoryAffinityFlags flags) -{ - numamem->type = ACPI_SRAT_MEMORY; - numamem->length = sizeof(*numamem); - memset(numamem->proximity, 0, 4); - numamem->proximity[0] = node; - numamem->flags = cpu_to_le32(flags); - numamem->base_addr = cpu_to_le64(base); - numamem->range_length = cpu_to_le64(len); -} - -static void -build_srat(GArray *table_data, GArray *linker, MachineState *machine) -{ - AcpiSystemResourceAffinityTable *srat; - AcpiSratProcessorAffinity *core; - AcpiSratMemoryAffinity *numamem; - - int i; - uint64_t curnode; - int srat_start, numa_start, slots; - uint64_t mem_len, mem_base, next_base; - MachineClass *mc = MACHINE_GET_CLASS(machine); - CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); - PCMachineState *pcms = PC_MACHINE(machine); - ram_addr_t hotplugabble_address_space_size = - object_property_get_int(OBJECT(pcms), PC_MACHINE_MEMHP_REGION_SIZE, - NULL); - - srat_start = table_data->len; - - srat = acpi_data_push(table_data, sizeof *srat); - srat->reserved1 = cpu_to_le32(1); - - for (i = 0; i < apic_ids->len; i++) { - int apic_id = apic_ids->cpus[i].arch_id; - - core = acpi_data_push(table_data, sizeof *core); - core->type = ACPI_SRAT_PROCESSOR; - core->length = sizeof(*core); - core->local_apic_id = apic_id; - curnode = pcms->node_cpu[apic_id]; - core->proximity_lo = curnode; - memset(core->proximity_hi, 0, 3); - core->local_sapic_eid = 0; - core->flags = cpu_to_le32(1); - } - - - /* the memory map is a bit tricky, it contains at least one hole - * from 640k-1M and possibly another one from 3.5G-4G. - */ - next_base = 0; - numa_start = table_data->len; - - numamem = acpi_data_push(table_data, sizeof *numamem); - acpi_build_srat_memory(numamem, 0, 640*1024, 0, MEM_AFFINITY_ENABLED); - next_base = 1024 * 1024; - for (i = 1; i < pcms->numa_nodes + 1; ++i) { - mem_base = next_base; - mem_len = pcms->node_mem[i - 1]; - if (i == 1) { - mem_len -= 1024 * 1024; - } - next_base = mem_base + mem_len; - - /* Cut out the ACPI_PCI hole */ - if (mem_base <= pcms->below_4g_mem_size && - next_base > pcms->below_4g_mem_size) { - mem_len -= next_base - pcms->below_4g_mem_size; - if (mem_len > 0) { - numamem = acpi_data_push(table_data, sizeof *numamem); - acpi_build_srat_memory(numamem, mem_base, mem_len, i - 1, - MEM_AFFINITY_ENABLED); - } - mem_base = 1ULL << 32; - mem_len = next_base - pcms->below_4g_mem_size; - next_base += (1ULL << 32) - pcms->below_4g_mem_size; - } - numamem = acpi_data_push(table_data, sizeof *numamem); - acpi_build_srat_memory(numamem, mem_base, mem_len, i - 1, - MEM_AFFINITY_ENABLED); - } - slots = (table_data->len - numa_start) / sizeof *numamem; - for (; slots < pcms->numa_nodes + 2; slots++) { - numamem = acpi_data_push(table_data, sizeof *numamem); - acpi_build_srat_memory(numamem, 0, 0, 0, MEM_AFFINITY_NOFLAGS); - } - - /* - * Entry is required for Windows to enable memory hotplug in OS. - * Memory devices may override proximity set by this entry, - * providing _PXM method if necessary. - */ - if (hotplugabble_address_space_size) { - numamem = acpi_data_push(table_data, sizeof *numamem); - acpi_build_srat_memory(numamem, pcms->hotplug_memory.base, - hotplugabble_address_space_size, 0, - MEM_AFFINITY_HOTPLUGGABLE | - MEM_AFFINITY_ENABLED); - } - - build_header(linker, table_data, - (void *)(table_data->data + srat_start), - "SRAT", - table_data->len - srat_start, 1, NULL, NULL); - g_free(apic_ids); -} - -static void -build_mcfg_q35(GArray *table_data, GArray *linker, AcpiMcfgInfo *info) -{ - AcpiTableMcfg *mcfg; - const char *sig; - int len = sizeof(*mcfg) + 1 * sizeof(mcfg->allocation[0]); - - mcfg = acpi_data_push(table_data, len); - mcfg->allocation[0].address = cpu_to_le64(info->mcfg_base); - /* Only a single allocation so no need to play with segments */ - mcfg->allocation[0].pci_segment = cpu_to_le16(0); - mcfg->allocation[0].start_bus_number = 0; - mcfg->allocation[0].end_bus_number = PCIE_MMCFG_BUS(info->mcfg_size - 1); - - /* MCFG is used for ECAM which can be enabled or disabled by guest. - * To avoid table size changes (which create migration issues), - * always create the table even if there are no allocations, - * but set the signature to a reserved value in this case. - * ACPI spec requires OSPMs to ignore such tables. - */ - if (info->mcfg_base == PCIE_BASE_ADDR_UNMAPPED) { - /* Reserved signature: ignored by OSPM */ - sig = "QEMU"; - } else { - sig = "MCFG"; - } - build_header(linker, table_data, (void *)mcfg, sig, len, 1, NULL, NULL); -} - -static void -build_dmar_q35(GArray *table_data, GArray *linker) -{ - int dmar_start = table_data->len; - - AcpiTableDmar *dmar; - AcpiDmarHardwareUnit *drhd; - - dmar = acpi_data_push(table_data, sizeof(*dmar)); - dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1; - dmar->flags = 0; /* No intr_remap for now */ - - /* DMAR Remapping Hardware Unit Definition structure */ - drhd = acpi_data_push(table_data, sizeof(*drhd)); - drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT); - drhd->length = cpu_to_le16(sizeof(*drhd)); /* No device scope now */ - drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL; - drhd->pci_segment = cpu_to_le16(0); - drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR); - - build_header(linker, table_data, (void *)(table_data->data + dmar_start), - "DMAR", table_data->len - dmar_start, 1, NULL, NULL); -} - -static GArray * -build_rsdp(GArray *rsdp_table, GArray *linker, unsigned rsdt) -{ - AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp); - - bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, 16, - true /* fseg memory */); - - memcpy(&rsdp->signature, "RSD PTR ", 8); - memcpy(rsdp->oem_id, ACPI_BUILD_APPNAME6, 6); - rsdp->rsdt_physical_address = cpu_to_le32(rsdt); - /* Address to be filled by Guest linker */ - bios_linker_loader_add_pointer(linker, ACPI_BUILD_RSDP_FILE, - ACPI_BUILD_TABLE_FILE, - rsdp_table, &rsdp->rsdt_physical_address, - sizeof rsdp->rsdt_physical_address); - rsdp->checksum = 0; - /* Checksum to be filled by Guest linker */ - bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, - rsdp_table, rsdp, sizeof *rsdp, - &rsdp->checksum); - - return rsdp_table; -} - -typedef -struct AcpiBuildState { - /* Copy of table in RAM (for patching). */ - MemoryRegion *table_mr; - /* Is table patched? */ - uint8_t patched; - void *rsdp; - MemoryRegion *rsdp_mr; - MemoryRegion *linker_mr; -} AcpiBuildState; - -static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg) -{ - Object *pci_host; - QObject *o; - - pci_host = acpi_get_i386_pci_host(); - g_assert(pci_host); - - o = object_property_get_qobject(pci_host, PCIE_HOST_MCFG_BASE, NULL); - if (!o) { - return false; - } - mcfg->mcfg_base = qint_get_int(qobject_to_qint(o)); - qobject_decref(o); - - o = object_property_get_qobject(pci_host, PCIE_HOST_MCFG_SIZE, NULL); - assert(o); - mcfg->mcfg_size = qint_get_int(qobject_to_qint(o)); - qobject_decref(o); - return true; -} - -static bool acpi_has_iommu(void) -{ - bool ambiguous; - Object *intel_iommu; - - intel_iommu = object_resolve_path_type("", TYPE_INTEL_IOMMU_DEVICE, - &ambiguous); - return intel_iommu && !ambiguous; -} - -static -void acpi_build(AcpiBuildTables *tables, MachineState *machine) -{ - PCMachineState *pcms = PC_MACHINE(machine); - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); - GArray *table_offsets; - unsigned facs, dsdt, rsdt, fadt; - AcpiPmInfo pm; - AcpiMiscInfo misc; - AcpiMcfgInfo mcfg; - PcPciInfo pci; - uint8_t *u; - size_t aml_len = 0; - GArray *tables_blob = tables->table_data; - AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL }; - - acpi_get_pm_info(&pm); - acpi_get_misc_info(&misc); - acpi_get_pci_info(&pci); - acpi_get_slic_oem(&slic_oem); - - table_offsets = g_array_new(false, true /* clear */, - sizeof(uint32_t)); - ACPI_BUILD_DPRINTF("init ACPI tables\n"); - - bios_linker_loader_alloc(tables->linker, ACPI_BUILD_TABLE_FILE, - 64 /* Ensure FACS is aligned */, - false /* high memory */); - - /* - * FACS is pointed to by FADT. - * We place it first since it's the only table that has alignment - * requirements. - */ - facs = tables_blob->len; - build_facs(tables_blob, tables->linker); - - /* DSDT is pointed to by FADT */ - dsdt = tables_blob->len; - build_dsdt(tables_blob, tables->linker, &pm, &misc, &pci, machine); - - /* Count the size of the DSDT and SSDT, we will need it for legacy - * sizing of ACPI tables. - */ - aml_len += tables_blob->len - dsdt; - - /* ACPI tables pointed to by RSDT */ - fadt = tables_blob->len; - acpi_add_table(table_offsets, tables_blob); - build_fadt(tables_blob, tables->linker, &pm, facs, dsdt, - slic_oem.id, slic_oem.table_id); - aml_len += tables_blob->len - fadt; - - acpi_add_table(table_offsets, tables_blob); - build_madt(tables_blob, tables->linker, pcms); - - if (misc.has_hpet) { - acpi_add_table(table_offsets, tables_blob); - build_hpet(tables_blob, tables->linker); - } - if (misc.tpm_version != TPM_VERSION_UNSPEC) { - acpi_add_table(table_offsets, tables_blob); - build_tpm_tcpa(tables_blob, tables->linker, tables->tcpalog); - - if (misc.tpm_version == TPM_VERSION_2_0) { - acpi_add_table(table_offsets, tables_blob); - build_tpm2(tables_blob, tables->linker); - } - } - if (pcms->numa_nodes) { - acpi_add_table(table_offsets, tables_blob); - build_srat(tables_blob, tables->linker, machine); - } - if (acpi_get_mcfg(&mcfg)) { - acpi_add_table(table_offsets, tables_blob); - build_mcfg_q35(tables_blob, tables->linker, &mcfg); - } - if (acpi_has_iommu()) { - acpi_add_table(table_offsets, tables_blob); - build_dmar_q35(tables_blob, tables->linker); - } - if (pcms->acpi_nvdimm_state.is_enabled) { - nvdimm_build_acpi(table_offsets, tables_blob, tables->linker); - } - - /* Add tables supplied by user (if any) */ - for (u = acpi_table_first(); u; u = acpi_table_next(u)) { - unsigned len = acpi_table_len(u); - - acpi_add_table(table_offsets, tables_blob); - g_array_append_vals(tables_blob, u, len); - } - - /* RSDT is pointed to by RSDP */ - rsdt = tables_blob->len; - build_rsdt(tables_blob, tables->linker, table_offsets, - slic_oem.id, slic_oem.table_id); - - /* RSDP is in FSEG memory, so allocate it separately */ - build_rsdp(tables->rsdp, tables->linker, rsdt); - - /* We'll expose it all to Guest so we want to reduce - * chance of size changes. - * - * We used to align the tables to 4k, but of course this would - * too simple to be enough. 4k turned out to be too small an - * alignment very soon, and in fact it is almost impossible to - * keep the table size stable for all (max_cpus, max_memory_slots) - * combinations. So the table size is always 64k for pc-i440fx-2.1 - * and we give an error if the table grows beyond that limit. - * - * We still have the problem of migrating from "-M pc-i440fx-2.0". For - * that, we exploit the fact that QEMU 2.1 generates _smaller_ tables - * than 2.0 and we can always pad the smaller tables with zeros. We can - * then use the exact size of the 2.0 tables. - * - * All this is for PIIX4, since QEMU 2.0 didn't support Q35 migration. - */ - if (pcmc->legacy_acpi_table_size) { - /* Subtracting aml_len gives the size of fixed tables. Then add the - * size of the PIIX4 DSDT/SSDT in QEMU 2.0. - */ - int legacy_aml_len = - pcmc->legacy_acpi_table_size + - ACPI_BUILD_LEGACY_CPU_AML_SIZE * max_cpus; - int legacy_table_size = - ROUND_UP(tables_blob->len - aml_len + legacy_aml_len, - ACPI_BUILD_ALIGN_SIZE); - if (tables_blob->len > legacy_table_size) { - /* Should happen only with PCI bridges and -M pc-i440fx-2.0. */ - error_report("Warning: migration may not work."); - } - g_array_set_size(tables_blob, legacy_table_size); - } else { - /* Make sure we have a buffer in case we need to resize the tables. */ - if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) { - /* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots. */ - error_report("Warning: ACPI tables are larger than 64k."); - error_report("Warning: migration may not work."); - error_report("Warning: please remove CPUs, NUMA nodes, " - "memory slots or PCI bridges."); - } - acpi_align_size(tables_blob, ACPI_BUILD_TABLE_SIZE); - } - - acpi_align_size(tables->linker, ACPI_BUILD_ALIGN_SIZE); - - /* Cleanup memory that's no longer used. */ - g_array_free(table_offsets, true); -} - -static void acpi_ram_update(MemoryRegion *mr, GArray *data) -{ - uint32_t size = acpi_data_len(data); - - /* Make sure RAM size is correct - in case it got changed e.g. by migration */ - memory_region_ram_resize(mr, size, &error_abort); - - memcpy(memory_region_get_ram_ptr(mr), data->data, size); - memory_region_set_dirty(mr, 0, size); -} - -static void acpi_build_update(void *build_opaque) -{ - AcpiBuildState *build_state = build_opaque; - AcpiBuildTables tables; - - /* No state to update or already patched? Nothing to do. */ - if (!build_state || build_state->patched) { - return; - } - build_state->patched = 1; - - acpi_build_tables_init(&tables); - - acpi_build(&tables, MACHINE(qdev_get_machine())); - - acpi_ram_update(build_state->table_mr, tables.table_data); - - if (build_state->rsdp) { - memcpy(build_state->rsdp, tables.rsdp->data, acpi_data_len(tables.rsdp)); - } else { - acpi_ram_update(build_state->rsdp_mr, tables.rsdp); - } - - acpi_ram_update(build_state->linker_mr, tables.linker); - acpi_build_tables_cleanup(&tables, true); -} - -static void acpi_build_reset(void *build_opaque) -{ - AcpiBuildState *build_state = build_opaque; - build_state->patched = 0; -} - -static MemoryRegion *acpi_add_rom_blob(AcpiBuildState *build_state, - GArray *blob, const char *name, - uint64_t max_size) -{ - return rom_add_blob(name, blob->data, acpi_data_len(blob), max_size, -1, - name, acpi_build_update, build_state); -} - -static const VMStateDescription vmstate_acpi_build = { - .name = "acpi_build", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT8(patched, AcpiBuildState), - VMSTATE_END_OF_LIST() - }, -}; - -void acpi_setup(void) -{ - PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); - AcpiBuildTables tables; - AcpiBuildState *build_state; - - if (!pcms->fw_cfg) { - ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n"); - return; - } - - if (!pcmc->has_acpi_build) { - ACPI_BUILD_DPRINTF("ACPI build disabled. Bailing out.\n"); - return; - } - - if (!acpi_enabled) { - ACPI_BUILD_DPRINTF("ACPI disabled. Bailing out.\n"); - return; - } - - build_state = g_malloc0(sizeof *build_state); - - acpi_set_pci_info(); - - acpi_build_tables_init(&tables); - acpi_build(&tables, MACHINE(pcms)); - - /* Now expose it all to Guest */ - build_state->table_mr = acpi_add_rom_blob(build_state, tables.table_data, - ACPI_BUILD_TABLE_FILE, - ACPI_BUILD_TABLE_MAX_SIZE); - assert(build_state->table_mr != NULL); - - build_state->linker_mr = - acpi_add_rom_blob(build_state, tables.linker, "etc/table-loader", 0); - - fw_cfg_add_file(pcms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, - tables.tcpalog->data, acpi_data_len(tables.tcpalog)); - - if (!pcmc->rsdp_in_ram) { - /* - * Keep for compatibility with old machine types. - * Though RSDP is small, its contents isn't immutable, so - * we'll update it along with the rest of tables on guest access. - */ - uint32_t rsdp_size = acpi_data_len(tables.rsdp); - - build_state->rsdp = g_memdup(tables.rsdp->data, rsdp_size); - fw_cfg_add_file_callback(pcms->fw_cfg, ACPI_BUILD_RSDP_FILE, - acpi_build_update, build_state, - build_state->rsdp, rsdp_size); - build_state->rsdp_mr = NULL; - } else { - build_state->rsdp = NULL; - build_state->rsdp_mr = acpi_add_rom_blob(build_state, tables.rsdp, - ACPI_BUILD_RSDP_FILE, 0); - } - - qemu_register_reset(acpi_build_reset, build_state); - acpi_build_reset(build_state); - vmstate_register(NULL, 0, &vmstate_acpi_build, build_state); - - /* Cleanup tables but don't free the memory: we track it - * in build_state. - */ - acpi_build_tables_cleanup(&tables, false); -} diff --git a/qemu/hw/i386/acpi-build.h b/qemu/hw/i386/acpi-build.h deleted file mode 100644 index 007332e51..000000000 --- a/qemu/hw/i386/acpi-build.h +++ /dev/null @@ -1,7 +0,0 @@ - -#ifndef HW_I386_ACPI_BUILD_H -#define HW_I386_ACPI_BUILD_H - -void acpi_setup(void); - -#endif diff --git a/qemu/hw/i386/intel_iommu.c b/qemu/hw/i386/intel_iommu.c deleted file mode 100644 index 347718f93..000000000 --- a/qemu/hw/i386/intel_iommu.c +++ /dev/null @@ -1,2057 +0,0 @@ -/* - * QEMU emulation of an Intel IOMMU (VT-d) - * (DMA Remapping device) - * - * Copyright (C) 2013 Knut Omang, Oracle <knut.omang@oracle.com> - * Copyright (C) 2014 Le Tan, <tamlokveer@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - - * You should have received a copy of the GNU General Public License along - * with this program; if not, see <http://www.gnu.org/licenses/>. - */ - -#include "qemu/osdep.h" -#include "hw/sysbus.h" -#include "exec/address-spaces.h" -#include "intel_iommu_internal.h" -#include "hw/pci/pci.h" - -/*#define DEBUG_INTEL_IOMMU*/ -#ifdef DEBUG_INTEL_IOMMU -enum { - DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG, - DEBUG_CACHE, -}; -#define VTD_DBGBIT(x) (1 << DEBUG_##x) -static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR); - -#define VTD_DPRINTF(what, fmt, ...) do { \ - if (vtd_dbgflags & VTD_DBGBIT(what)) { \ - fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \ - ## __VA_ARGS__); } \ - } while (0) -#else -#define VTD_DPRINTF(what, fmt, ...) do {} while (0) -#endif - -static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val, - uint64_t wmask, uint64_t w1cmask) -{ - stq_le_p(&s->csr[addr], val); - stq_le_p(&s->wmask[addr], wmask); - stq_le_p(&s->w1cmask[addr], w1cmask); -} - -static void vtd_define_quad_wo(IntelIOMMUState *s, hwaddr addr, uint64_t mask) -{ - stq_le_p(&s->womask[addr], mask); -} - -static void vtd_define_long(IntelIOMMUState *s, hwaddr addr, uint32_t val, - uint32_t wmask, uint32_t w1cmask) -{ - stl_le_p(&s->csr[addr], val); - stl_le_p(&s->wmask[addr], wmask); - stl_le_p(&s->w1cmask[addr], w1cmask); -} - -static void vtd_define_long_wo(IntelIOMMUState *s, hwaddr addr, uint32_t mask) -{ - stl_le_p(&s->womask[addr], mask); -} - -/* "External" get/set operations */ -static void vtd_set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val) -{ - uint64_t oldval = ldq_le_p(&s->csr[addr]); - uint64_t wmask = ldq_le_p(&s->wmask[addr]); - uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]); - stq_le_p(&s->csr[addr], - ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val)); -} - -static void vtd_set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val) -{ - uint32_t oldval = ldl_le_p(&s->csr[addr]); - uint32_t wmask = ldl_le_p(&s->wmask[addr]); - uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]); - stl_le_p(&s->csr[addr], - ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val)); -} - -static uint64_t vtd_get_quad(IntelIOMMUState *s, hwaddr addr) -{ - uint64_t val = ldq_le_p(&s->csr[addr]); - uint64_t womask = ldq_le_p(&s->womask[addr]); - return val & ~womask; -} - -static uint32_t vtd_get_long(IntelIOMMUState *s, hwaddr addr) -{ - uint32_t val = ldl_le_p(&s->csr[addr]); - uint32_t womask = ldl_le_p(&s->womask[addr]); - return val & ~womask; -} - -/* "Internal" get/set operations */ -static uint64_t vtd_get_quad_raw(IntelIOMMUState *s, hwaddr addr) -{ - return ldq_le_p(&s->csr[addr]); -} - -static uint32_t vtd_get_long_raw(IntelIOMMUState *s, hwaddr addr) -{ - return ldl_le_p(&s->csr[addr]); -} - -static void vtd_set_quad_raw(IntelIOMMUState *s, hwaddr addr, uint64_t val) -{ - stq_le_p(&s->csr[addr], val); -} - -static uint32_t vtd_set_clear_mask_long(IntelIOMMUState *s, hwaddr addr, - uint32_t clear, uint32_t mask) -{ - uint32_t new_val = (ldl_le_p(&s->csr[addr]) & ~clear) | mask; - stl_le_p(&s->csr[addr], new_val); - return new_val; -} - -static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr, - uint64_t clear, uint64_t mask) -{ - uint64_t new_val = (ldq_le_p(&s->csr[addr]) & ~clear) | mask; - stq_le_p(&s->csr[addr], new_val); - return new_val; -} - -/* GHashTable functions */ -static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2) -{ - return *((const uint64_t *)v1) == *((const uint64_t *)v2); -} - -static guint vtd_uint64_hash(gconstpointer v) -{ - return (guint)*(const uint64_t *)v; -} - -static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value, - gpointer user_data) -{ - VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value; - uint16_t domain_id = *(uint16_t *)user_data; - return entry->domain_id == domain_id; -} - -/* The shift of an addr for a certain level of paging structure */ -static inline uint32_t vtd_slpt_level_shift(uint32_t level) -{ - return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS; -} - -static inline uint64_t vtd_slpt_level_page_mask(uint32_t level) -{ - return ~((1ULL << vtd_slpt_level_shift(level)) - 1); -} - -static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value, - gpointer user_data) -{ - VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value; - VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data; - uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask; - uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K; - return (entry->domain_id == info->domain_id) && - (((entry->gfn & info->mask) == gfn) || - (entry->gfn == gfn_tlb)); -} - -/* Reset all the gen of VTDAddressSpace to zero and set the gen of - * IntelIOMMUState to 1. - */ -static void vtd_reset_context_cache(IntelIOMMUState *s) -{ - VTDAddressSpace *vtd_as; - VTDBus *vtd_bus; - GHashTableIter bus_it; - uint32_t devfn_it; - - g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr); - - VTD_DPRINTF(CACHE, "global context_cache_gen=1"); - while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) { - for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) { - vtd_as = vtd_bus->dev_as[devfn_it]; - if (!vtd_as) { - continue; - } - vtd_as->context_cache_entry.context_cache_gen = 0; - } - } - s->context_cache_gen = 1; -} - -static void vtd_reset_iotlb(IntelIOMMUState *s) -{ - assert(s->iotlb); - g_hash_table_remove_all(s->iotlb); -} - -static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint8_t source_id, - uint32_t level) -{ - return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) | - ((uint64_t)(level) << VTD_IOTLB_LVL_SHIFT); -} - -static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level) -{ - return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K; -} - -static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id, - hwaddr addr) -{ - VTDIOTLBEntry *entry; - uint64_t key; - int level; - - for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) { - key = vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr, level), - source_id, level); - entry = g_hash_table_lookup(s->iotlb, &key); - if (entry) { - goto out; - } - } - -out: - return entry; -} - -static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, - uint16_t domain_id, hwaddr addr, uint64_t slpte, - bool read_flags, bool write_flags, - uint32_t level) -{ - VTDIOTLBEntry *entry = g_malloc(sizeof(*entry)); - uint64_t *key = g_malloc(sizeof(*key)); - uint64_t gfn = vtd_get_iotlb_gfn(addr, level); - - VTD_DPRINTF(CACHE, "update iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64 - " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, slpte, - domain_id); - if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) { - VTD_DPRINTF(CACHE, "iotlb exceeds size limit, forced to reset"); - vtd_reset_iotlb(s); - } - - entry->gfn = gfn; - entry->domain_id = domain_id; - entry->slpte = slpte; - entry->read_flags = read_flags; - entry->write_flags = write_flags; - entry->mask = vtd_slpt_level_page_mask(level); - *key = vtd_get_iotlb_key(gfn, source_id, level); - g_hash_table_replace(s->iotlb, key, entry); -} - -/* Given the reg addr of both the message data and address, generate an - * interrupt via MSI. - */ -static void vtd_generate_interrupt(IntelIOMMUState *s, hwaddr mesg_addr_reg, - hwaddr mesg_data_reg) -{ - hwaddr addr; - uint32_t data; - - assert(mesg_data_reg < DMAR_REG_SIZE); - assert(mesg_addr_reg < DMAR_REG_SIZE); - - addr = vtd_get_long_raw(s, mesg_addr_reg); - data = vtd_get_long_raw(s, mesg_data_reg); - - VTD_DPRINTF(FLOG, "msi: addr 0x%"PRIx64 " data 0x%"PRIx32, addr, data); - address_space_stl_le(&address_space_memory, addr, data, - MEMTXATTRS_UNSPECIFIED, NULL); -} - -/* Generate a fault event to software via MSI if conditions are met. - * Notice that the value of FSTS_REG being passed to it should be the one - * before any update. - */ -static void vtd_generate_fault_event(IntelIOMMUState *s, uint32_t pre_fsts) -{ - if (pre_fsts & VTD_FSTS_PPF || pre_fsts & VTD_FSTS_PFO || - pre_fsts & VTD_FSTS_IQE) { - VTD_DPRINTF(FLOG, "there are previous interrupt conditions " - "to be serviced by software, fault event is not generated " - "(FSTS_REG 0x%"PRIx32 ")", pre_fsts); - return; - } - vtd_set_clear_mask_long(s, DMAR_FECTL_REG, 0, VTD_FECTL_IP); - if (vtd_get_long_raw(s, DMAR_FECTL_REG) & VTD_FECTL_IM) { - VTD_DPRINTF(FLOG, "Interrupt Mask set, fault event is not generated"); - } else { - vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG); - vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); - } -} - -/* Check if the Fault (F) field of the Fault Recording Register referenced by - * @index is Set. - */ -static bool vtd_is_frcd_set(IntelIOMMUState *s, uint16_t index) -{ - /* Each reg is 128-bit */ - hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); - addr += 8; /* Access the high 64-bit half */ - - assert(index < DMAR_FRCD_REG_NR); - - return vtd_get_quad_raw(s, addr) & VTD_FRCD_F; -} - -/* Update the PPF field of Fault Status Register. - * Should be called whenever change the F field of any fault recording - * registers. - */ -static void vtd_update_fsts_ppf(IntelIOMMUState *s) -{ - uint32_t i; - uint32_t ppf_mask = 0; - - for (i = 0; i < DMAR_FRCD_REG_NR; i++) { - if (vtd_is_frcd_set(s, i)) { - ppf_mask = VTD_FSTS_PPF; - break; - } - } - vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_PPF, ppf_mask); - VTD_DPRINTF(FLOG, "set PPF of FSTS_REG to %d", ppf_mask ? 1 : 0); -} - -static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index) -{ - /* Each reg is 128-bit */ - hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); - addr += 8; /* Access the high 64-bit half */ - - assert(index < DMAR_FRCD_REG_NR); - - vtd_set_clear_mask_quad(s, addr, 0, VTD_FRCD_F); - vtd_update_fsts_ppf(s); -} - -/* Must not update F field now, should be done later */ -static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index, - uint16_t source_id, hwaddr addr, - VTDFaultReason fault, bool is_write) -{ - uint64_t hi = 0, lo; - hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); - - assert(index < DMAR_FRCD_REG_NR); - - lo = VTD_FRCD_FI(addr); - hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault); - if (!is_write) { - hi |= VTD_FRCD_T; - } - vtd_set_quad_raw(s, frcd_reg_addr, lo); - vtd_set_quad_raw(s, frcd_reg_addr + 8, hi); - VTD_DPRINTF(FLOG, "record to FRCD_REG #%"PRIu16 ": hi 0x%"PRIx64 - ", lo 0x%"PRIx64, index, hi, lo); -} - -/* Try to collapse multiple pending faults from the same requester */ -static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id) -{ - uint32_t i; - uint64_t frcd_reg; - hwaddr addr = DMAR_FRCD_REG_OFFSET + 8; /* The high 64-bit half */ - - for (i = 0; i < DMAR_FRCD_REG_NR; i++) { - frcd_reg = vtd_get_quad_raw(s, addr); - VTD_DPRINTF(FLOG, "frcd_reg #%d 0x%"PRIx64, i, frcd_reg); - if ((frcd_reg & VTD_FRCD_F) && - ((frcd_reg & VTD_FRCD_SID_MASK) == source_id)) { - return true; - } - addr += 16; /* 128-bit for each */ - } - return false; -} - -/* Log and report an DMAR (address translation) fault to software */ -static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, - hwaddr addr, VTDFaultReason fault, - bool is_write) -{ - uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG); - - assert(fault < VTD_FR_MAX); - - if (fault == VTD_FR_RESERVED_ERR) { - /* This is not a normal fault reason case. Drop it. */ - return; - } - VTD_DPRINTF(FLOG, "sid 0x%"PRIx16 ", fault %d, addr 0x%"PRIx64 - ", is_write %d", source_id, fault, addr, is_write); - if (fsts_reg & VTD_FSTS_PFO) { - VTD_DPRINTF(FLOG, "new fault is not recorded due to " - "Primary Fault Overflow"); - return; - } - if (vtd_try_collapse_fault(s, source_id)) { - VTD_DPRINTF(FLOG, "new fault is not recorded due to " - "compression of faults"); - return; - } - if (vtd_is_frcd_set(s, s->next_frcd_reg)) { - VTD_DPRINTF(FLOG, "Primary Fault Overflow and " - "new fault is not recorded, set PFO field"); - vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_PFO); - return; - } - - vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write); - - if (fsts_reg & VTD_FSTS_PPF) { - VTD_DPRINTF(FLOG, "there are pending faults already, " - "fault event is not generated"); - vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg); - s->next_frcd_reg++; - if (s->next_frcd_reg == DMAR_FRCD_REG_NR) { - s->next_frcd_reg = 0; - } - } else { - vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_FRI_MASK, - VTD_FSTS_FRI(s->next_frcd_reg)); - vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg); /* Will set PPF */ - s->next_frcd_reg++; - if (s->next_frcd_reg == DMAR_FRCD_REG_NR) { - s->next_frcd_reg = 0; - } - /* This case actually cause the PPF to be Set. - * So generate fault event (interrupt). - */ - vtd_generate_fault_event(s, fsts_reg); - } -} - -/* Handle Invalidation Queue Errors of queued invalidation interface error - * conditions. - */ -static void vtd_handle_inv_queue_error(IntelIOMMUState *s) -{ - uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG); - - vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_IQE); - vtd_generate_fault_event(s, fsts_reg); -} - -/* Set the IWC field and try to generate an invalidation completion interrupt */ -static void vtd_generate_completion_event(IntelIOMMUState *s) -{ - VTD_DPRINTF(INV, "completes an invalidation wait command with " - "Interrupt Flag"); - if (vtd_get_long_raw(s, DMAR_ICS_REG) & VTD_ICS_IWC) { - VTD_DPRINTF(INV, "there is a previous interrupt condition to be " - "serviced by software, " - "new invalidation event is not generated"); - return; - } - vtd_set_clear_mask_long(s, DMAR_ICS_REG, 0, VTD_ICS_IWC); - vtd_set_clear_mask_long(s, DMAR_IECTL_REG, 0, VTD_IECTL_IP); - if (vtd_get_long_raw(s, DMAR_IECTL_REG) & VTD_IECTL_IM) { - VTD_DPRINTF(INV, "IM filed in IECTL_REG is set, new invalidation " - "event is not generated"); - return; - } else { - /* Generate the interrupt event */ - vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG); - vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0); - } -} - -static inline bool vtd_root_entry_present(VTDRootEntry *root) -{ - return root->val & VTD_ROOT_ENTRY_P; -} - -static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index, - VTDRootEntry *re) -{ - dma_addr_t addr; - - addr = s->root + index * sizeof(*re); - if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) { - VTD_DPRINTF(GENERAL, "error: fail to access root-entry at 0x%"PRIx64 - " + %"PRIu8, s->root, index); - re->val = 0; - return -VTD_FR_ROOT_TABLE_INV; - } - re->val = le64_to_cpu(re->val); - return 0; -} - -static inline bool vtd_context_entry_present(VTDContextEntry *context) -{ - return context->lo & VTD_CONTEXT_ENTRY_P; -} - -static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index, - VTDContextEntry *ce) -{ - dma_addr_t addr; - - if (!vtd_root_entry_present(root)) { - VTD_DPRINTF(GENERAL, "error: root-entry is not present"); - return -VTD_FR_ROOT_ENTRY_P; - } - addr = (root->val & VTD_ROOT_ENTRY_CTP) + index * sizeof(*ce); - if (dma_memory_read(&address_space_memory, addr, ce, sizeof(*ce))) { - VTD_DPRINTF(GENERAL, "error: fail to access context-entry at 0x%"PRIx64 - " + %"PRIu8, - (uint64_t)(root->val & VTD_ROOT_ENTRY_CTP), index); - return -VTD_FR_CONTEXT_TABLE_INV; - } - ce->lo = le64_to_cpu(ce->lo); - ce->hi = le64_to_cpu(ce->hi); - return 0; -} - -static inline dma_addr_t vtd_get_slpt_base_from_context(VTDContextEntry *ce) -{ - return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR; -} - -static inline uint64_t vtd_get_slpte_addr(uint64_t slpte) -{ - return slpte & VTD_SL_PT_BASE_ADDR_MASK; -} - -/* Whether the pte indicates the address of the page frame */ -static inline bool vtd_is_last_slpte(uint64_t slpte, uint32_t level) -{ - return level == VTD_SL_PT_LEVEL || (slpte & VTD_SL_PT_PAGE_SIZE_MASK); -} - -/* Get the content of a spte located in @base_addr[@index] */ -static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index) -{ - uint64_t slpte; - - assert(index < VTD_SL_PT_ENTRY_NR); - - if (dma_memory_read(&address_space_memory, - base_addr + index * sizeof(slpte), &slpte, - sizeof(slpte))) { - slpte = (uint64_t)-1; - return slpte; - } - slpte = le64_to_cpu(slpte); - return slpte; -} - -/* Given a gpa and the level of paging structure, return the offset of current - * level. - */ -static inline uint32_t vtd_gpa_level_offset(uint64_t gpa, uint32_t level) -{ - return (gpa >> vtd_slpt_level_shift(level)) & - ((1ULL << VTD_SL_LEVEL_BITS) - 1); -} - -/* Check Capability Register to see if the @level of page-table is supported */ -static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level) -{ - return VTD_CAP_SAGAW_MASK & s->cap & - (1ULL << (level - 2 + VTD_CAP_SAGAW_SHIFT)); -} - -/* Get the page-table level that hardware should use for the second-level - * page-table walk from the Address Width field of context-entry. - */ -static inline uint32_t vtd_get_level_from_context_entry(VTDContextEntry *ce) -{ - return 2 + (ce->hi & VTD_CONTEXT_ENTRY_AW); -} - -static inline uint32_t vtd_get_agaw_from_context_entry(VTDContextEntry *ce) -{ - return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9; -} - -static const uint64_t vtd_paging_entry_rsvd_field[] = { - [0] = ~0ULL, - /* For not large page */ - [1] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [2] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [3] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [4] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - /* For large page */ - [5] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [6] = 0x1ff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [7] = 0x3ffff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [8] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), -}; - -static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) -{ - if (slpte & VTD_SL_PT_PAGE_SIZE_MASK) { - /* Maybe large page */ - return slpte & vtd_paging_entry_rsvd_field[level + 4]; - } else { - return slpte & vtd_paging_entry_rsvd_field[level]; - } -} - -/* Given the @gpa, get relevant @slptep. @slpte_level will be the last level - * of the translation, can be used for deciding the size of large page. - */ -static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write, - uint64_t *slptep, uint32_t *slpte_level, - bool *reads, bool *writes) -{ - dma_addr_t addr = vtd_get_slpt_base_from_context(ce); - uint32_t level = vtd_get_level_from_context_entry(ce); - uint32_t offset; - uint64_t slpte; - uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce); - uint64_t access_right_check; - - /* Check if @gpa is above 2^X-1, where X is the minimum of MGAW in CAP_REG - * and AW in context-entry. - */ - if (gpa & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) { - VTD_DPRINTF(GENERAL, "error: gpa 0x%"PRIx64 " exceeds limits", gpa); - return -VTD_FR_ADDR_BEYOND_MGAW; - } - - /* FIXME: what is the Atomics request here? */ - access_right_check = is_write ? VTD_SL_W : VTD_SL_R; - - while (true) { - offset = vtd_gpa_level_offset(gpa, level); - slpte = vtd_get_slpte(addr, offset); - - if (slpte == (uint64_t)-1) { - VTD_DPRINTF(GENERAL, "error: fail to access second-level paging " - "entry at level %"PRIu32 " for gpa 0x%"PRIx64, - level, gpa); - if (level == vtd_get_level_from_context_entry(ce)) { - /* Invalid programming of context-entry */ - return -VTD_FR_CONTEXT_ENTRY_INV; - } else { - return -VTD_FR_PAGING_ENTRY_INV; - } - } - *reads = (*reads) && (slpte & VTD_SL_R); - *writes = (*writes) && (slpte & VTD_SL_W); - if (!(slpte & access_right_check)) { - VTD_DPRINTF(GENERAL, "error: lack of %s permission for " - "gpa 0x%"PRIx64 " slpte 0x%"PRIx64, - (is_write ? "write" : "read"), gpa, slpte); - return is_write ? -VTD_FR_WRITE : -VTD_FR_READ; - } - if (vtd_slpte_nonzero_rsvd(slpte, level)) { - VTD_DPRINTF(GENERAL, "error: non-zero reserved field in second " - "level paging entry level %"PRIu32 " slpte 0x%"PRIx64, - level, slpte); - return -VTD_FR_PAGING_ENTRY_RSVD; - } - - if (vtd_is_last_slpte(slpte, level)) { - *slptep = slpte; - *slpte_level = level; - return 0; - } - addr = vtd_get_slpte_addr(slpte); - level--; - } -} - -/* Map a device to its corresponding domain (context-entry) */ -static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, - uint8_t devfn, VTDContextEntry *ce) -{ - VTDRootEntry re; - int ret_fr; - - ret_fr = vtd_get_root_entry(s, bus_num, &re); - if (ret_fr) { - return ret_fr; - } - - if (!vtd_root_entry_present(&re)) { - VTD_DPRINTF(GENERAL, "error: root-entry #%"PRIu8 " is not present", - bus_num); - return -VTD_FR_ROOT_ENTRY_P; - } else if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) { - VTD_DPRINTF(GENERAL, "error: non-zero reserved field in root-entry " - "hi 0x%"PRIx64 " lo 0x%"PRIx64, re.rsvd, re.val); - return -VTD_FR_ROOT_ENTRY_RSVD; - } - - ret_fr = vtd_get_context_entry_from_root(&re, devfn, ce); - if (ret_fr) { - return ret_fr; - } - - if (!vtd_context_entry_present(ce)) { - VTD_DPRINTF(GENERAL, - "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") " - "is not present", devfn, bus_num); - return -VTD_FR_CONTEXT_ENTRY_P; - } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) || - (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) { - VTD_DPRINTF(GENERAL, - "error: non-zero reserved field in context-entry " - "hi 0x%"PRIx64 " lo 0x%"PRIx64, ce->hi, ce->lo); - return -VTD_FR_CONTEXT_ENTRY_RSVD; - } - /* Check if the programming of context-entry is valid */ - if (!vtd_is_level_supported(s, vtd_get_level_from_context_entry(ce))) { - VTD_DPRINTF(GENERAL, "error: unsupported Address Width value in " - "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64, - ce->hi, ce->lo); - return -VTD_FR_CONTEXT_ENTRY_INV; - } else if (ce->lo & VTD_CONTEXT_ENTRY_TT) { - VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in " - "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64, - ce->hi, ce->lo); - return -VTD_FR_CONTEXT_ENTRY_INV; - } - return 0; -} - -static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn) -{ - return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL); -} - -static const bool vtd_qualified_faults[] = { - [VTD_FR_RESERVED] = false, - [VTD_FR_ROOT_ENTRY_P] = false, - [VTD_FR_CONTEXT_ENTRY_P] = true, - [VTD_FR_CONTEXT_ENTRY_INV] = true, - [VTD_FR_ADDR_BEYOND_MGAW] = true, - [VTD_FR_WRITE] = true, - [VTD_FR_READ] = true, - [VTD_FR_PAGING_ENTRY_INV] = true, - [VTD_FR_ROOT_TABLE_INV] = false, - [VTD_FR_CONTEXT_TABLE_INV] = false, - [VTD_FR_ROOT_ENTRY_RSVD] = false, - [VTD_FR_PAGING_ENTRY_RSVD] = true, - [VTD_FR_CONTEXT_ENTRY_TT] = true, - [VTD_FR_RESERVED_ERR] = false, - [VTD_FR_MAX] = false, -}; - -/* To see if a fault condition is "qualified", which is reported to software - * only if the FPD field in the context-entry used to process the faulting - * request is 0. - */ -static inline bool vtd_is_qualified_fault(VTDFaultReason fault) -{ - return vtd_qualified_faults[fault]; -} - -static inline bool vtd_is_interrupt_addr(hwaddr addr) -{ - return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST; -} - -/* Map dev to context-entry then do a paging-structures walk to do a iommu - * translation. - * - * Called from RCU critical section. - * - * @bus_num: The bus number - * @devfn: The devfn, which is the combined of device and function number - * @is_write: The access is a write operation - * @entry: IOMMUTLBEntry that contain the addr to be translated and result - */ -static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, - uint8_t devfn, hwaddr addr, bool is_write, - IOMMUTLBEntry *entry) -{ - IntelIOMMUState *s = vtd_as->iommu_state; - VTDContextEntry ce; - uint8_t bus_num = pci_bus_num(bus); - VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry; - uint64_t slpte, page_mask; - uint32_t level; - uint16_t source_id = vtd_make_source_id(bus_num, devfn); - int ret_fr; - bool is_fpd_set = false; - bool reads = true; - bool writes = true; - VTDIOTLBEntry *iotlb_entry; - - /* Check if the request is in interrupt address range */ - if (vtd_is_interrupt_addr(addr)) { - if (is_write) { - /* FIXME: since we don't know the length of the access here, we - * treat Non-DWORD length write requests without PASID as - * interrupt requests, too. Withoud interrupt remapping support, - * we just use 1:1 mapping. - */ - VTD_DPRINTF(MMU, "write request to interrupt address " - "gpa 0x%"PRIx64, addr); - entry->iova = addr & VTD_PAGE_MASK_4K; - entry->translated_addr = addr & VTD_PAGE_MASK_4K; - entry->addr_mask = ~VTD_PAGE_MASK_4K; - entry->perm = IOMMU_WO; - return; - } else { - VTD_DPRINTF(GENERAL, "error: read request from interrupt address " - "gpa 0x%"PRIx64, addr); - vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ, is_write); - return; - } - } - /* Try to fetch slpte form IOTLB */ - iotlb_entry = vtd_lookup_iotlb(s, source_id, addr); - if (iotlb_entry) { - VTD_DPRINTF(CACHE, "hit iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64 - " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, - iotlb_entry->slpte, iotlb_entry->domain_id); - slpte = iotlb_entry->slpte; - reads = iotlb_entry->read_flags; - writes = iotlb_entry->write_flags; - page_mask = iotlb_entry->mask; - goto out; - } - /* Try to fetch context-entry from cache first */ - if (cc_entry->context_cache_gen == s->context_cache_gen) { - VTD_DPRINTF(CACHE, "hit context-cache bus %d devfn %d " - "(hi %"PRIx64 " lo %"PRIx64 " gen %"PRIu32 ")", - bus_num, devfn, cc_entry->context_entry.hi, - cc_entry->context_entry.lo, cc_entry->context_cache_gen); - ce = cc_entry->context_entry; - is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; - } else { - ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce); - is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; - if (ret_fr) { - ret_fr = -ret_fr; - if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { - VTD_DPRINTF(FLOG, "fault processing is disabled for DMA " - "requests through this context-entry " - "(with FPD Set)"); - } else { - vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); - } - return; - } - /* Update context-cache */ - VTD_DPRINTF(CACHE, "update context-cache bus %d devfn %d " - "(hi %"PRIx64 " lo %"PRIx64 " gen %"PRIu32 "->%"PRIu32 ")", - bus_num, devfn, ce.hi, ce.lo, - cc_entry->context_cache_gen, s->context_cache_gen); - cc_entry->context_entry = ce; - cc_entry->context_cache_gen = s->context_cache_gen; - } - - ret_fr = vtd_gpa_to_slpte(&ce, addr, is_write, &slpte, &level, - &reads, &writes); - if (ret_fr) { - ret_fr = -ret_fr; - if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { - VTD_DPRINTF(FLOG, "fault processing is disabled for DMA requests " - "through this context-entry (with FPD Set)"); - } else { - vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); - } - return; - } - - page_mask = vtd_slpt_level_page_mask(level); - vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte, - reads, writes, level); -out: - entry->iova = addr & page_mask; - entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask; - entry->addr_mask = ~page_mask; - entry->perm = (writes ? 2 : 0) + (reads ? 1 : 0); -} - -static void vtd_root_table_setup(IntelIOMMUState *s) -{ - s->root = vtd_get_quad_raw(s, DMAR_RTADDR_REG); - s->root_extended = s->root & VTD_RTADDR_RTT; - s->root &= VTD_RTADDR_ADDR_MASK; - - VTD_DPRINTF(CSR, "root_table addr 0x%"PRIx64 " %s", s->root, - (s->root_extended ? "(extended)" : "")); -} - -static void vtd_context_global_invalidate(IntelIOMMUState *s) -{ - s->context_cache_gen++; - if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) { - vtd_reset_context_cache(s); - } -} - - -/* Find the VTD address space currently associated with a given bus number, - */ -static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) -{ - VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num]; - if (!vtd_bus) { - /* Iterate over the registered buses to find the one - * which currently hold this bus number, and update the bus_num lookup table: - */ - GHashTableIter iter; - - g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); - while (g_hash_table_iter_next (&iter, NULL, (void**)&vtd_bus)) { - if (pci_bus_num(vtd_bus->bus) == bus_num) { - s->vtd_as_by_bus_num[bus_num] = vtd_bus; - return vtd_bus; - } - } - } - return vtd_bus; -} - -/* Do a context-cache device-selective invalidation. - * @func_mask: FM field after shifting - */ -static void vtd_context_device_invalidate(IntelIOMMUState *s, - uint16_t source_id, - uint16_t func_mask) -{ - uint16_t mask; - VTDBus *vtd_bus; - VTDAddressSpace *vtd_as; - uint16_t devfn; - uint16_t devfn_it; - - switch (func_mask & 3) { - case 0: - mask = 0; /* No bits in the SID field masked */ - break; - case 1: - mask = 4; /* Mask bit 2 in the SID field */ - break; - case 2: - mask = 6; /* Mask bit 2:1 in the SID field */ - break; - case 3: - mask = 7; /* Mask bit 2:0 in the SID field */ - break; - } - VTD_DPRINTF(INV, "device-selective invalidation source 0x%"PRIx16 - " mask %"PRIu16, source_id, mask); - vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id)); - if (vtd_bus) { - devfn = VTD_SID_TO_DEVFN(source_id); - for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) { - vtd_as = vtd_bus->dev_as[devfn_it]; - if (vtd_as && ((devfn_it & mask) == (devfn & mask))) { - VTD_DPRINTF(INV, "invalidate context-cahce of devfn 0x%"PRIx16, - devfn_it); - vtd_as->context_cache_entry.context_cache_gen = 0; - } - } - } -} - -/* Context-cache invalidation - * Returns the Context Actual Invalidation Granularity. - * @val: the content of the CCMD_REG - */ -static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val) -{ - uint64_t caig; - uint64_t type = val & VTD_CCMD_CIRG_MASK; - - switch (type) { - case VTD_CCMD_DOMAIN_INVL: - VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16, - (uint16_t)VTD_CCMD_DID(val)); - /* Fall through */ - case VTD_CCMD_GLOBAL_INVL: - VTD_DPRINTF(INV, "global invalidation"); - caig = VTD_CCMD_GLOBAL_INVL_A; - vtd_context_global_invalidate(s); - break; - - case VTD_CCMD_DEVICE_INVL: - caig = VTD_CCMD_DEVICE_INVL_A; - vtd_context_device_invalidate(s, VTD_CCMD_SID(val), VTD_CCMD_FM(val)); - break; - - default: - VTD_DPRINTF(GENERAL, "error: invalid granularity"); - caig = 0; - } - return caig; -} - -static void vtd_iotlb_global_invalidate(IntelIOMMUState *s) -{ - vtd_reset_iotlb(s); -} - -static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) -{ - g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain, - &domain_id); -} - -static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, - hwaddr addr, uint8_t am) -{ - VTDIOTLBPageInvInfo info; - - assert(am <= VTD_MAMV); - info.domain_id = domain_id; - info.addr = addr; - info.mask = ~((1 << am) - 1); - g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info); -} - -/* Flush IOTLB - * Returns the IOTLB Actual Invalidation Granularity. - * @val: the content of the IOTLB_REG - */ -static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val) -{ - uint64_t iaig; - uint64_t type = val & VTD_TLB_FLUSH_GRANU_MASK; - uint16_t domain_id; - hwaddr addr; - uint8_t am; - - switch (type) { - case VTD_TLB_GLOBAL_FLUSH: - VTD_DPRINTF(INV, "global invalidation"); - iaig = VTD_TLB_GLOBAL_FLUSH_A; - vtd_iotlb_global_invalidate(s); - break; - - case VTD_TLB_DSI_FLUSH: - domain_id = VTD_TLB_DID(val); - VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16, - domain_id); - iaig = VTD_TLB_DSI_FLUSH_A; - vtd_iotlb_domain_invalidate(s, domain_id); - break; - - case VTD_TLB_PSI_FLUSH: - domain_id = VTD_TLB_DID(val); - addr = vtd_get_quad_raw(s, DMAR_IVA_REG); - am = VTD_IVA_AM(addr); - addr = VTD_IVA_ADDR(addr); - VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16 - " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am); - if (am > VTD_MAMV) { - VTD_DPRINTF(GENERAL, "error: supported max address mask value is " - "%"PRIu8, (uint8_t)VTD_MAMV); - iaig = 0; - break; - } - iaig = VTD_TLB_PSI_FLUSH_A; - vtd_iotlb_page_invalidate(s, domain_id, addr, am); - break; - - default: - VTD_DPRINTF(GENERAL, "error: invalid granularity"); - iaig = 0; - } - return iaig; -} - -static inline bool vtd_queued_inv_enable_check(IntelIOMMUState *s) -{ - return s->iq_tail == 0; -} - -static inline bool vtd_queued_inv_disable_check(IntelIOMMUState *s) -{ - return s->qi_enabled && (s->iq_tail == s->iq_head) && - (s->iq_last_desc_type == VTD_INV_DESC_WAIT); -} - -static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en) -{ - uint64_t iqa_val = vtd_get_quad_raw(s, DMAR_IQA_REG); - - VTD_DPRINTF(INV, "Queued Invalidation Enable %s", (en ? "on" : "off")); - if (en) { - if (vtd_queued_inv_enable_check(s)) { - s->iq = iqa_val & VTD_IQA_IQA_MASK; - /* 2^(x+8) entries */ - s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8); - s->qi_enabled = true; - VTD_DPRINTF(INV, "DMAR_IQA_REG 0x%"PRIx64, iqa_val); - VTD_DPRINTF(INV, "Invalidation Queue addr 0x%"PRIx64 " size %d", - s->iq, s->iq_size); - /* Ok - report back to driver */ - vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES); - } else { - VTD_DPRINTF(GENERAL, "error: can't enable Queued Invalidation: " - "tail %"PRIu16, s->iq_tail); - } - } else { - if (vtd_queued_inv_disable_check(s)) { - /* disable Queued Invalidation */ - vtd_set_quad_raw(s, DMAR_IQH_REG, 0); - s->iq_head = 0; - s->qi_enabled = false; - /* Ok - report back to driver */ - vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_QIES, 0); - } else { - VTD_DPRINTF(GENERAL, "error: can't disable Queued Invalidation: " - "head %"PRIu16 ", tail %"PRIu16 - ", last_descriptor %"PRIu8, - s->iq_head, s->iq_tail, s->iq_last_desc_type); - } - } -} - -/* Set Root Table Pointer */ -static void vtd_handle_gcmd_srtp(IntelIOMMUState *s) -{ - VTD_DPRINTF(CSR, "set Root Table Pointer"); - - vtd_root_table_setup(s); - /* Ok - report back to driver */ - vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS); -} - -/* Handle Translation Enable/Disable */ -static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) -{ - VTD_DPRINTF(CSR, "Translation Enable %s", (en ? "on" : "off")); - - if (en) { - s->dmar_enabled = true; - /* Ok - report back to driver */ - vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_TES); - } else { - s->dmar_enabled = false; - - /* Clear the index of Fault Recording Register */ - s->next_frcd_reg = 0; - /* Ok - report back to driver */ - vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0); - } -} - -/* Handle write to Global Command Register */ -static void vtd_handle_gcmd_write(IntelIOMMUState *s) -{ - uint32_t status = vtd_get_long_raw(s, DMAR_GSTS_REG); - uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG); - uint32_t changed = status ^ val; - - VTD_DPRINTF(CSR, "value 0x%"PRIx32 " status 0x%"PRIx32, val, status); - if (changed & VTD_GCMD_TE) { - /* Translation enable/disable */ - vtd_handle_gcmd_te(s, val & VTD_GCMD_TE); - } - if (val & VTD_GCMD_SRTP) { - /* Set/update the root-table pointer */ - vtd_handle_gcmd_srtp(s); - } - if (changed & VTD_GCMD_QIE) { - /* Queued Invalidation Enable */ - vtd_handle_gcmd_qie(s, val & VTD_GCMD_QIE); - } -} - -/* Handle write to Context Command Register */ -static void vtd_handle_ccmd_write(IntelIOMMUState *s) -{ - uint64_t ret; - uint64_t val = vtd_get_quad_raw(s, DMAR_CCMD_REG); - - /* Context-cache invalidation request */ - if (val & VTD_CCMD_ICC) { - if (s->qi_enabled) { - VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, " - "should not use register-based invalidation"); - return; - } - ret = vtd_context_cache_invalidate(s, val); - /* Invalidation completed. Change something to show */ - vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_ICC, 0ULL); - ret = vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_CAIG_MASK, - ret); - VTD_DPRINTF(INV, "CCMD_REG write-back val: 0x%"PRIx64, ret); - } -} - -/* Handle write to IOTLB Invalidation Register */ -static void vtd_handle_iotlb_write(IntelIOMMUState *s) -{ - uint64_t ret; - uint64_t val = vtd_get_quad_raw(s, DMAR_IOTLB_REG); - - /* IOTLB invalidation request */ - if (val & VTD_TLB_IVT) { - if (s->qi_enabled) { - VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, " - "should not use register-based invalidation"); - return; - } - ret = vtd_iotlb_flush(s, val); - /* Invalidation completed. Change something to show */ - vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG, VTD_TLB_IVT, 0ULL); - ret = vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG, - VTD_TLB_FLUSH_GRANU_MASK_A, ret); - VTD_DPRINTF(INV, "IOTLB_REG write-back val: 0x%"PRIx64, ret); - } -} - -/* Fetch an Invalidation Descriptor from the Invalidation Queue */ -static bool vtd_get_inv_desc(dma_addr_t base_addr, uint32_t offset, - VTDInvDesc *inv_desc) -{ - dma_addr_t addr = base_addr + offset * sizeof(*inv_desc); - if (dma_memory_read(&address_space_memory, addr, inv_desc, - sizeof(*inv_desc))) { - VTD_DPRINTF(GENERAL, "error: fail to fetch Invalidation Descriptor " - "base_addr 0x%"PRIx64 " offset %"PRIu32, base_addr, offset); - inv_desc->lo = 0; - inv_desc->hi = 0; - - return false; - } - inv_desc->lo = le64_to_cpu(inv_desc->lo); - inv_desc->hi = le64_to_cpu(inv_desc->hi); - return true; -} - -static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) -{ - if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) || - (inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) { - VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Invalidation " - "Wait Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64, - inv_desc->hi, inv_desc->lo); - return false; - } - if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) { - /* Status Write */ - uint32_t status_data = (uint32_t)(inv_desc->lo >> - VTD_INV_DESC_WAIT_DATA_SHIFT); - - assert(!(inv_desc->lo & VTD_INV_DESC_WAIT_IF)); - - /* FIXME: need to be masked with HAW? */ - dma_addr_t status_addr = inv_desc->hi; - VTD_DPRINTF(INV, "status data 0x%x, status addr 0x%"PRIx64, - status_data, status_addr); - status_data = cpu_to_le32(status_data); - if (dma_memory_write(&address_space_memory, status_addr, &status_data, - sizeof(status_data))) { - VTD_DPRINTF(GENERAL, "error: fail to perform a coherent write"); - return false; - } - } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) { - /* Interrupt flag */ - VTD_DPRINTF(INV, "Invalidation Wait Descriptor interrupt completion"); - vtd_generate_completion_event(s); - } else { - VTD_DPRINTF(GENERAL, "error: invalid Invalidation Wait Descriptor: " - "hi 0x%"PRIx64 " lo 0x%"PRIx64, inv_desc->hi, inv_desc->lo); - return false; - } - return true; -} - -static bool vtd_process_context_cache_desc(IntelIOMMUState *s, - VTDInvDesc *inv_desc) -{ - if ((inv_desc->lo & VTD_INV_DESC_CC_RSVD) || inv_desc->hi) { - VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Context-cache " - "Invalidate Descriptor"); - return false; - } - switch (inv_desc->lo & VTD_INV_DESC_CC_G) { - case VTD_INV_DESC_CC_DOMAIN: - VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16, - (uint16_t)VTD_INV_DESC_CC_DID(inv_desc->lo)); - /* Fall through */ - case VTD_INV_DESC_CC_GLOBAL: - VTD_DPRINTF(INV, "global invalidation"); - vtd_context_global_invalidate(s); - break; - - case VTD_INV_DESC_CC_DEVICE: - vtd_context_device_invalidate(s, VTD_INV_DESC_CC_SID(inv_desc->lo), - VTD_INV_DESC_CC_FM(inv_desc->lo)); - break; - - default: - VTD_DPRINTF(GENERAL, "error: invalid granularity in Context-cache " - "Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64, - inv_desc->hi, inv_desc->lo); - return false; - } - return true; -} - -static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) -{ - uint16_t domain_id; - uint8_t am; - hwaddr addr; - - if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) || - (inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) { - VTD_DPRINTF(GENERAL, "error: non-zero reserved field in IOTLB " - "Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64, - inv_desc->hi, inv_desc->lo); - return false; - } - - switch (inv_desc->lo & VTD_INV_DESC_IOTLB_G) { - case VTD_INV_DESC_IOTLB_GLOBAL: - VTD_DPRINTF(INV, "global invalidation"); - vtd_iotlb_global_invalidate(s); - break; - - case VTD_INV_DESC_IOTLB_DOMAIN: - domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo); - VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16, - domain_id); - vtd_iotlb_domain_invalidate(s, domain_id); - break; - - case VTD_INV_DESC_IOTLB_PAGE: - domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo); - addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi); - am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi); - VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16 - " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am); - if (am > VTD_MAMV) { - VTD_DPRINTF(GENERAL, "error: supported max address mask value is " - "%"PRIu8, (uint8_t)VTD_MAMV); - return false; - } - vtd_iotlb_page_invalidate(s, domain_id, addr, am); - break; - - default: - VTD_DPRINTF(GENERAL, "error: invalid granularity in IOTLB Invalidate " - "Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64, - inv_desc->hi, inv_desc->lo); - return false; - } - return true; -} - -static bool vtd_process_inv_desc(IntelIOMMUState *s) -{ - VTDInvDesc inv_desc; - uint8_t desc_type; - - VTD_DPRINTF(INV, "iq head %"PRIu16, s->iq_head); - if (!vtd_get_inv_desc(s->iq, s->iq_head, &inv_desc)) { - s->iq_last_desc_type = VTD_INV_DESC_NONE; - return false; - } - desc_type = inv_desc.lo & VTD_INV_DESC_TYPE; - /* FIXME: should update at first or at last? */ - s->iq_last_desc_type = desc_type; - - switch (desc_type) { - case VTD_INV_DESC_CC: - VTD_DPRINTF(INV, "Context-cache Invalidate Descriptor hi 0x%"PRIx64 - " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo); - if (!vtd_process_context_cache_desc(s, &inv_desc)) { - return false; - } - break; - - case VTD_INV_DESC_IOTLB: - VTD_DPRINTF(INV, "IOTLB Invalidate Descriptor hi 0x%"PRIx64 - " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo); - if (!vtd_process_iotlb_desc(s, &inv_desc)) { - return false; - } - break; - - case VTD_INV_DESC_WAIT: - VTD_DPRINTF(INV, "Invalidation Wait Descriptor hi 0x%"PRIx64 - " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo); - if (!vtd_process_wait_desc(s, &inv_desc)) { - return false; - } - break; - - default: - VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type " - "hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8, - inv_desc.hi, inv_desc.lo, desc_type); - return false; - } - s->iq_head++; - if (s->iq_head == s->iq_size) { - s->iq_head = 0; - } - return true; -} - -/* Try to fetch and process more Invalidation Descriptors */ -static void vtd_fetch_inv_desc(IntelIOMMUState *s) -{ - VTD_DPRINTF(INV, "fetch Invalidation Descriptors"); - if (s->iq_tail >= s->iq_size) { - /* Detects an invalid Tail pointer */ - VTD_DPRINTF(GENERAL, "error: iq_tail is %"PRIu16 - " while iq_size is %"PRIu16, s->iq_tail, s->iq_size); - vtd_handle_inv_queue_error(s); - return; - } - while (s->iq_head != s->iq_tail) { - if (!vtd_process_inv_desc(s)) { - /* Invalidation Queue Errors */ - vtd_handle_inv_queue_error(s); - break; - } - /* Must update the IQH_REG in time */ - vtd_set_quad_raw(s, DMAR_IQH_REG, - (((uint64_t)(s->iq_head)) << VTD_IQH_QH_SHIFT) & - VTD_IQH_QH_MASK); - } -} - -/* Handle write to Invalidation Queue Tail Register */ -static void vtd_handle_iqt_write(IntelIOMMUState *s) -{ - uint64_t val = vtd_get_quad_raw(s, DMAR_IQT_REG); - - s->iq_tail = VTD_IQT_QT(val); - VTD_DPRINTF(INV, "set iq tail %"PRIu16, s->iq_tail); - if (s->qi_enabled && !(vtd_get_long_raw(s, DMAR_FSTS_REG) & VTD_FSTS_IQE)) { - /* Process Invalidation Queue here */ - vtd_fetch_inv_desc(s); - } -} - -static void vtd_handle_fsts_write(IntelIOMMUState *s) -{ - uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG); - uint32_t fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG); - uint32_t status_fields = VTD_FSTS_PFO | VTD_FSTS_PPF | VTD_FSTS_IQE; - - if ((fectl_reg & VTD_FECTL_IP) && !(fsts_reg & status_fields)) { - vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); - VTD_DPRINTF(FLOG, "all pending interrupt conditions serviced, clear " - "IP field of FECTL_REG"); - } - /* FIXME: when IQE is Clear, should we try to fetch some Invalidation - * Descriptors if there are any when Queued Invalidation is enabled? - */ -} - -static void vtd_handle_fectl_write(IntelIOMMUState *s) -{ - uint32_t fectl_reg; - /* FIXME: when software clears the IM field, check the IP field. But do we - * need to compare the old value and the new value to conclude that - * software clears the IM field? Or just check if the IM field is zero? - */ - fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG); - if ((fectl_reg & VTD_FECTL_IP) && !(fectl_reg & VTD_FECTL_IM)) { - vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG); - vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); - VTD_DPRINTF(FLOG, "IM field is cleared, generate " - "fault event interrupt"); - } -} - -static void vtd_handle_ics_write(IntelIOMMUState *s) -{ - uint32_t ics_reg = vtd_get_long_raw(s, DMAR_ICS_REG); - uint32_t iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG); - - if ((iectl_reg & VTD_IECTL_IP) && !(ics_reg & VTD_ICS_IWC)) { - vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0); - VTD_DPRINTF(INV, "pending completion interrupt condition serviced, " - "clear IP field of IECTL_REG"); - } -} - -static void vtd_handle_iectl_write(IntelIOMMUState *s) -{ - uint32_t iectl_reg; - /* FIXME: when software clears the IM field, check the IP field. But do we - * need to compare the old value and the new value to conclude that - * software clears the IM field? Or just check if the IM field is zero? - */ - iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG); - if ((iectl_reg & VTD_IECTL_IP) && !(iectl_reg & VTD_IECTL_IM)) { - vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG); - vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0); - VTD_DPRINTF(INV, "IM field is cleared, generate " - "invalidation event interrupt"); - } -} - -static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size) -{ - IntelIOMMUState *s = opaque; - uint64_t val; - - if (addr + size > DMAR_REG_SIZE) { - VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64 - ", got 0x%"PRIx64 " %d", - (uint64_t)DMAR_REG_SIZE, addr, size); - return (uint64_t)-1; - } - - switch (addr) { - /* Root Table Address Register, 64-bit */ - case DMAR_RTADDR_REG: - if (size == 4) { - val = s->root & ((1ULL << 32) - 1); - } else { - val = s->root; - } - break; - - case DMAR_RTADDR_REG_HI: - assert(size == 4); - val = s->root >> 32; - break; - - /* Invalidation Queue Address Register, 64-bit */ - case DMAR_IQA_REG: - val = s->iq | (vtd_get_quad(s, DMAR_IQA_REG) & VTD_IQA_QS); - if (size == 4) { - val = val & ((1ULL << 32) - 1); - } - break; - - case DMAR_IQA_REG_HI: - assert(size == 4); - val = s->iq >> 32; - break; - - default: - if (size == 4) { - val = vtd_get_long(s, addr); - } else { - val = vtd_get_quad(s, addr); - } - } - VTD_DPRINTF(CSR, "addr 0x%"PRIx64 " size %d val 0x%"PRIx64, - addr, size, val); - return val; -} - -static void vtd_mem_write(void *opaque, hwaddr addr, - uint64_t val, unsigned size) -{ - IntelIOMMUState *s = opaque; - - if (addr + size > DMAR_REG_SIZE) { - VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64 - ", got 0x%"PRIx64 " %d", - (uint64_t)DMAR_REG_SIZE, addr, size); - return; - } - - switch (addr) { - /* Global Command Register, 32-bit */ - case DMAR_GCMD_REG: - VTD_DPRINTF(CSR, "DMAR_GCMD_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - vtd_set_long(s, addr, val); - vtd_handle_gcmd_write(s); - break; - - /* Context Command Register, 64-bit */ - case DMAR_CCMD_REG: - VTD_DPRINTF(CSR, "DMAR_CCMD_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - if (size == 4) { - vtd_set_long(s, addr, val); - } else { - vtd_set_quad(s, addr, val); - vtd_handle_ccmd_write(s); - } - break; - - case DMAR_CCMD_REG_HI: - VTD_DPRINTF(CSR, "DMAR_CCMD_REG_HI write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - vtd_handle_ccmd_write(s); - break; - - /* IOTLB Invalidation Register, 64-bit */ - case DMAR_IOTLB_REG: - VTD_DPRINTF(INV, "DMAR_IOTLB_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - if (size == 4) { - vtd_set_long(s, addr, val); - } else { - vtd_set_quad(s, addr, val); - vtd_handle_iotlb_write(s); - } - break; - - case DMAR_IOTLB_REG_HI: - VTD_DPRINTF(INV, "DMAR_IOTLB_REG_HI write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - vtd_handle_iotlb_write(s); - break; - - /* Invalidate Address Register, 64-bit */ - case DMAR_IVA_REG: - VTD_DPRINTF(INV, "DMAR_IVA_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - if (size == 4) { - vtd_set_long(s, addr, val); - } else { - vtd_set_quad(s, addr, val); - } - break; - - case DMAR_IVA_REG_HI: - VTD_DPRINTF(INV, "DMAR_IVA_REG_HI write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - break; - - /* Fault Status Register, 32-bit */ - case DMAR_FSTS_REG: - VTD_DPRINTF(FLOG, "DMAR_FSTS_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - vtd_handle_fsts_write(s); - break; - - /* Fault Event Control Register, 32-bit */ - case DMAR_FECTL_REG: - VTD_DPRINTF(FLOG, "DMAR_FECTL_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - vtd_handle_fectl_write(s); - break; - - /* Fault Event Data Register, 32-bit */ - case DMAR_FEDATA_REG: - VTD_DPRINTF(FLOG, "DMAR_FEDATA_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - break; - - /* Fault Event Address Register, 32-bit */ - case DMAR_FEADDR_REG: - VTD_DPRINTF(FLOG, "DMAR_FEADDR_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - break; - - /* Fault Event Upper Address Register, 32-bit */ - case DMAR_FEUADDR_REG: - VTD_DPRINTF(FLOG, "DMAR_FEUADDR_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - break; - - /* Protected Memory Enable Register, 32-bit */ - case DMAR_PMEN_REG: - VTD_DPRINTF(CSR, "DMAR_PMEN_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - break; - - /* Root Table Address Register, 64-bit */ - case DMAR_RTADDR_REG: - VTD_DPRINTF(CSR, "DMAR_RTADDR_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - if (size == 4) { - vtd_set_long(s, addr, val); - } else { - vtd_set_quad(s, addr, val); - } - break; - - case DMAR_RTADDR_REG_HI: - VTD_DPRINTF(CSR, "DMAR_RTADDR_REG_HI write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - break; - - /* Invalidation Queue Tail Register, 64-bit */ - case DMAR_IQT_REG: - VTD_DPRINTF(INV, "DMAR_IQT_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - if (size == 4) { - vtd_set_long(s, addr, val); - } else { - vtd_set_quad(s, addr, val); - } - vtd_handle_iqt_write(s); - break; - - case DMAR_IQT_REG_HI: - VTD_DPRINTF(INV, "DMAR_IQT_REG_HI write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - /* 19:63 of IQT_REG is RsvdZ, do nothing here */ - break; - - /* Invalidation Queue Address Register, 64-bit */ - case DMAR_IQA_REG: - VTD_DPRINTF(INV, "DMAR_IQA_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - if (size == 4) { - vtd_set_long(s, addr, val); - } else { - vtd_set_quad(s, addr, val); - } - break; - - case DMAR_IQA_REG_HI: - VTD_DPRINTF(INV, "DMAR_IQA_REG_HI write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - break; - - /* Invalidation Completion Status Register, 32-bit */ - case DMAR_ICS_REG: - VTD_DPRINTF(INV, "DMAR_ICS_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - vtd_handle_ics_write(s); - break; - - /* Invalidation Event Control Register, 32-bit */ - case DMAR_IECTL_REG: - VTD_DPRINTF(INV, "DMAR_IECTL_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - vtd_handle_iectl_write(s); - break; - - /* Invalidation Event Data Register, 32-bit */ - case DMAR_IEDATA_REG: - VTD_DPRINTF(INV, "DMAR_IEDATA_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - break; - - /* Invalidation Event Address Register, 32-bit */ - case DMAR_IEADDR_REG: - VTD_DPRINTF(INV, "DMAR_IEADDR_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - break; - - /* Invalidation Event Upper Address Register, 32-bit */ - case DMAR_IEUADDR_REG: - VTD_DPRINTF(INV, "DMAR_IEUADDR_REG write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - break; - - /* Fault Recording Registers, 128-bit */ - case DMAR_FRCD_REG_0_0: - VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_0 write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - if (size == 4) { - vtd_set_long(s, addr, val); - } else { - vtd_set_quad(s, addr, val); - } - break; - - case DMAR_FRCD_REG_0_1: - VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_1 write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - break; - - case DMAR_FRCD_REG_0_2: - VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_2 write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - if (size == 4) { - vtd_set_long(s, addr, val); - } else { - vtd_set_quad(s, addr, val); - /* May clear bit 127 (Fault), update PPF */ - vtd_update_fsts_ppf(s); - } - break; - - case DMAR_FRCD_REG_0_3: - VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_3 write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - assert(size == 4); - vtd_set_long(s, addr, val); - /* May clear bit 127 (Fault), update PPF */ - vtd_update_fsts_ppf(s); - break; - - default: - VTD_DPRINTF(GENERAL, "error: unhandled reg write addr 0x%"PRIx64 - ", size %d, val 0x%"PRIx64, addr, size, val); - if (size == 4) { - vtd_set_long(s, addr, val); - } else { - vtd_set_quad(s, addr, val); - } - } -} - -static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr, - bool is_write) -{ - VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); - IntelIOMMUState *s = vtd_as->iommu_state; - IOMMUTLBEntry ret = { - .target_as = &address_space_memory, - .iova = addr, - .translated_addr = 0, - .addr_mask = ~(hwaddr)0, - .perm = IOMMU_NONE, - }; - - if (!s->dmar_enabled) { - /* DMAR disabled, passthrough, use 4k-page*/ - ret.iova = addr & VTD_PAGE_MASK_4K; - ret.translated_addr = addr & VTD_PAGE_MASK_4K; - ret.addr_mask = ~VTD_PAGE_MASK_4K; - ret.perm = IOMMU_RW; - return ret; - } - - vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr, - is_write, &ret); - VTD_DPRINTF(MMU, - "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8 - " gpa 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus), - VTD_PCI_SLOT(vtd_as->devfn), VTD_PCI_FUNC(vtd_as->devfn), - vtd_as->devfn, addr, ret.translated_addr); - return ret; -} - -static const VMStateDescription vtd_vmstate = { - .name = "iommu-intel", - .unmigratable = 1, -}; - -static const MemoryRegionOps vtd_mem_ops = { - .read = vtd_mem_read, - .write = vtd_mem_write, - .endianness = DEVICE_LITTLE_ENDIAN, - .impl = { - .min_access_size = 4, - .max_access_size = 8, - }, - .valid = { - .min_access_size = 4, - .max_access_size = 8, - }, -}; - -static Property vtd_properties[] = { - DEFINE_PROP_UINT32("version", IntelIOMMUState, version, 0), - DEFINE_PROP_END_OF_LIST(), -}; - - -VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) -{ - uintptr_t key = (uintptr_t)bus; - VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key); - VTDAddressSpace *vtd_dev_as; - - if (!vtd_bus) { - /* No corresponding free() */ - vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * VTD_PCI_DEVFN_MAX); - vtd_bus->bus = bus; - key = (uintptr_t)bus; - g_hash_table_insert(s->vtd_as_by_busptr, &key, vtd_bus); - } - - vtd_dev_as = vtd_bus->dev_as[devfn]; - - if (!vtd_dev_as) { - vtd_bus->dev_as[devfn] = vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace)); - - vtd_dev_as->bus = bus; - vtd_dev_as->devfn = (uint8_t)devfn; - vtd_dev_as->iommu_state = s; - vtd_dev_as->context_cache_entry.context_cache_gen = 0; - memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s), - &s->iommu_ops, "intel_iommu", UINT64_MAX); - address_space_init(&vtd_dev_as->as, - &vtd_dev_as->iommu, "intel_iommu"); - } - return vtd_dev_as; -} - -/* Do the initialization. It will also be called when reset, so pay - * attention when adding new initialization stuff. - */ -static void vtd_init(IntelIOMMUState *s) -{ - memset(s->csr, 0, DMAR_REG_SIZE); - memset(s->wmask, 0, DMAR_REG_SIZE); - memset(s->w1cmask, 0, DMAR_REG_SIZE); - memset(s->womask, 0, DMAR_REG_SIZE); - - s->iommu_ops.translate = vtd_iommu_translate; - s->root = 0; - s->root_extended = false; - s->dmar_enabled = false; - s->iq_head = 0; - s->iq_tail = 0; - s->iq = 0; - s->iq_size = 0; - s->qi_enabled = false; - s->iq_last_desc_type = VTD_INV_DESC_NONE; - s->next_frcd_reg = 0; - s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW | - VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS; - s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; - - vtd_reset_context_cache(s); - vtd_reset_iotlb(s); - - /* Define registers with default values and bit semantics */ - vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0); - vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0); - vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0); - vtd_define_long(s, DMAR_GCMD_REG, 0, 0xff800000UL, 0); - vtd_define_long_wo(s, DMAR_GCMD_REG, 0xff800000UL); - vtd_define_long(s, DMAR_GSTS_REG, 0, 0, 0); - vtd_define_quad(s, DMAR_RTADDR_REG, 0, 0xfffffffffffff000ULL, 0); - vtd_define_quad(s, DMAR_CCMD_REG, 0, 0xe0000003ffffffffULL, 0); - vtd_define_quad_wo(s, DMAR_CCMD_REG, 0x3ffff0000ULL); - - /* Advanced Fault Logging not supported */ - vtd_define_long(s, DMAR_FSTS_REG, 0, 0, 0x11UL); - vtd_define_long(s, DMAR_FECTL_REG, 0x80000000UL, 0x80000000UL, 0); - vtd_define_long(s, DMAR_FEDATA_REG, 0, 0x0000ffffUL, 0); - vtd_define_long(s, DMAR_FEADDR_REG, 0, 0xfffffffcUL, 0); - - /* Treated as RsvdZ when EIM in ECAP_REG is not supported - * vtd_define_long(s, DMAR_FEUADDR_REG, 0, 0xffffffffUL, 0); - */ - vtd_define_long(s, DMAR_FEUADDR_REG, 0, 0, 0); - - /* Treated as RO for implementations that PLMR and PHMR fields reported - * as Clear in the CAP_REG. - * vtd_define_long(s, DMAR_PMEN_REG, 0, 0x80000000UL, 0); - */ - vtd_define_long(s, DMAR_PMEN_REG, 0, 0, 0); - - vtd_define_quad(s, DMAR_IQH_REG, 0, 0, 0); - vtd_define_quad(s, DMAR_IQT_REG, 0, 0x7fff0ULL, 0); - vtd_define_quad(s, DMAR_IQA_REG, 0, 0xfffffffffffff007ULL, 0); - vtd_define_long(s, DMAR_ICS_REG, 0, 0, 0x1UL); - vtd_define_long(s, DMAR_IECTL_REG, 0x80000000UL, 0x80000000UL, 0); - vtd_define_long(s, DMAR_IEDATA_REG, 0, 0xffffffffUL, 0); - vtd_define_long(s, DMAR_IEADDR_REG, 0, 0xfffffffcUL, 0); - /* Treadted as RsvdZ when EIM in ECAP_REG is not supported */ - vtd_define_long(s, DMAR_IEUADDR_REG, 0, 0, 0); - - /* IOTLB registers */ - vtd_define_quad(s, DMAR_IOTLB_REG, 0, 0Xb003ffff00000000ULL, 0); - vtd_define_quad(s, DMAR_IVA_REG, 0, 0xfffffffffffff07fULL, 0); - vtd_define_quad_wo(s, DMAR_IVA_REG, 0xfffffffffffff07fULL); - - /* Fault Recording Registers, 128-bit */ - vtd_define_quad(s, DMAR_FRCD_REG_0_0, 0, 0, 0); - vtd_define_quad(s, DMAR_FRCD_REG_0_2, 0, 0, 0x8000000000000000ULL); -} - -/* Should not reset address_spaces when reset because devices will still use - * the address space they got at first (won't ask the bus again). - */ -static void vtd_reset(DeviceState *dev) -{ - IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); - - VTD_DPRINTF(GENERAL, ""); - vtd_init(s); -} - -static void vtd_realize(DeviceState *dev, Error **errp) -{ - IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); - - VTD_DPRINTF(GENERAL, ""); - memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num)); - memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s, - "intel_iommu", DMAR_REG_SIZE); - sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem); - /* No corresponding destroy */ - s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal, - g_free, g_free); - s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal, - g_free, g_free); - vtd_init(s); -} - -static void vtd_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->reset = vtd_reset; - dc->realize = vtd_realize; - dc->vmsd = &vtd_vmstate; - dc->props = vtd_properties; -} - -static const TypeInfo vtd_info = { - .name = TYPE_INTEL_IOMMU_DEVICE, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(IntelIOMMUState), - .class_init = vtd_class_init, -}; - -static void vtd_register_types(void) -{ - VTD_DPRINTF(GENERAL, ""); - type_register_static(&vtd_info); -} - -type_init(vtd_register_types) diff --git a/qemu/hw/i386/intel_iommu_internal.h b/qemu/hw/i386/intel_iommu_internal.h deleted file mode 100644 index e5f514c6e..000000000 --- a/qemu/hw/i386/intel_iommu_internal.h +++ /dev/null @@ -1,391 +0,0 @@ -/* - * QEMU emulation of an Intel IOMMU (VT-d) - * (DMA Remapping device) - * - * Copyright (C) 2013 Knut Omang, Oracle <knut.omang@oracle.com> - * Copyright (C) 2014 Le Tan, <tamlokveer@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - - * You should have received a copy of the GNU General Public License along - * with this program; if not, see <http://www.gnu.org/licenses/>. - * - * Lots of defines copied from kernel/include/linux/intel-iommu.h: - * Copyright (C) 2006-2008 Intel Corporation - * Author: Ashok Raj <ashok.raj@intel.com> - * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> - * - */ - -#ifndef HW_I386_INTEL_IOMMU_INTERNAL_H -#define HW_I386_INTEL_IOMMU_INTERNAL_H -#include "hw/i386/intel_iommu.h" - -/* - * Intel IOMMU register specification - */ -#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ -#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ -#define DMAR_CAP_REG_HI 0xc /* High 32-bit of DMAR_CAP_REG */ -#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ -#define DMAR_ECAP_REG_HI 0X14 -#define DMAR_GCMD_REG 0x18 /* Global command */ -#define DMAR_GSTS_REG 0x1c /* Global status */ -#define DMAR_RTADDR_REG 0x20 /* Root entry table */ -#define DMAR_RTADDR_REG_HI 0X24 -#define DMAR_CCMD_REG 0x28 /* Context command */ -#define DMAR_CCMD_REG_HI 0x2c -#define DMAR_FSTS_REG 0x34 /* Fault status */ -#define DMAR_FECTL_REG 0x38 /* Fault control */ -#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data */ -#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr */ -#define DMAR_FEUADDR_REG 0x44 /* Upper address */ -#define DMAR_AFLOG_REG 0x58 /* Advanced fault control */ -#define DMAR_AFLOG_REG_HI 0X5c -#define DMAR_PMEN_REG 0x64 /* Enable protected memory region */ -#define DMAR_PLMBASE_REG 0x68 /* PMRR low addr */ -#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ -#define DMAR_PHMBASE_REG 0x70 /* PMRR high base addr */ -#define DMAR_PHMBASE_REG_HI 0X74 -#define DMAR_PHMLIMIT_REG 0x78 /* PMRR high limit */ -#define DMAR_PHMLIMIT_REG_HI 0x7c -#define DMAR_IQH_REG 0x80 /* Invalidation queue head */ -#define DMAR_IQH_REG_HI 0X84 -#define DMAR_IQT_REG 0x88 /* Invalidation queue tail */ -#define DMAR_IQT_REG_HI 0X8c -#define DMAR_IQA_REG 0x90 /* Invalidation queue addr */ -#define DMAR_IQA_REG_HI 0x94 -#define DMAR_ICS_REG 0x9c /* Invalidation complete status */ -#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr */ -#define DMAR_IRTA_REG_HI 0xbc -#define DMAR_IECTL_REG 0xa0 /* Invalidation event control */ -#define DMAR_IEDATA_REG 0xa4 /* Invalidation event data */ -#define DMAR_IEADDR_REG 0xa8 /* Invalidation event address */ -#define DMAR_IEUADDR_REG 0xac /* Invalidation event address */ -#define DMAR_PQH_REG 0xc0 /* Page request queue head */ -#define DMAR_PQH_REG_HI 0xc4 -#define DMAR_PQT_REG 0xc8 /* Page request queue tail*/ -#define DMAR_PQT_REG_HI 0xcc -#define DMAR_PQA_REG 0xd0 /* Page request queue address */ -#define DMAR_PQA_REG_HI 0xd4 -#define DMAR_PRS_REG 0xdc /* Page request status */ -#define DMAR_PECTL_REG 0xe0 /* Page request event control */ -#define DMAR_PEDATA_REG 0xe4 /* Page request event data */ -#define DMAR_PEADDR_REG 0xe8 /* Page request event address */ -#define DMAR_PEUADDR_REG 0xec /* Page event upper address */ -#define DMAR_MTRRCAP_REG 0x100 /* MTRR capability */ -#define DMAR_MTRRCAP_REG_HI 0x104 -#define DMAR_MTRRDEF_REG 0x108 /* MTRR default type */ -#define DMAR_MTRRDEF_REG_HI 0x10c - -/* IOTLB registers */ -#define DMAR_IOTLB_REG_OFFSET 0xf0 /* Offset to the IOTLB registers */ -#define DMAR_IVA_REG DMAR_IOTLB_REG_OFFSET /* Invalidate address */ -#define DMAR_IVA_REG_HI (DMAR_IVA_REG + 4) -/* IOTLB invalidate register */ -#define DMAR_IOTLB_REG (DMAR_IOTLB_REG_OFFSET + 0x8) -#define DMAR_IOTLB_REG_HI (DMAR_IOTLB_REG + 4) - -/* FRCD */ -#define DMAR_FRCD_REG_OFFSET 0x220 /* Offset to the fault recording regs */ -/* NOTICE: If you change the DMAR_FRCD_REG_NR, please remember to change the - * DMAR_REG_SIZE in include/hw/i386/intel_iommu.h. - * #define DMAR_REG_SIZE (DMAR_FRCD_REG_OFFSET + 16 * DMAR_FRCD_REG_NR) - */ -#define DMAR_FRCD_REG_NR 1ULL /* Num of fault recording regs */ - -#define DMAR_FRCD_REG_0_0 0x220 /* The 0th fault recording regs */ -#define DMAR_FRCD_REG_0_1 0x224 -#define DMAR_FRCD_REG_0_2 0x228 -#define DMAR_FRCD_REG_0_3 0x22c - -/* Interrupt Address Range */ -#define VTD_INTERRUPT_ADDR_FIRST 0xfee00000ULL -#define VTD_INTERRUPT_ADDR_LAST 0xfeefffffULL - -/* The shift of source_id in the key of IOTLB hash table */ -#define VTD_IOTLB_SID_SHIFT 36 -#define VTD_IOTLB_LVL_SHIFT 44 -#define VTD_IOTLB_MAX_SIZE 1024 /* Max size of the hash table */ - -/* IOTLB_REG */ -#define VTD_TLB_GLOBAL_FLUSH (1ULL << 60) /* Global invalidation */ -#define VTD_TLB_DSI_FLUSH (2ULL << 60) /* Domain-selective */ -#define VTD_TLB_PSI_FLUSH (3ULL << 60) /* Page-selective */ -#define VTD_TLB_FLUSH_GRANU_MASK (3ULL << 60) -#define VTD_TLB_GLOBAL_FLUSH_A (1ULL << 57) -#define VTD_TLB_DSI_FLUSH_A (2ULL << 57) -#define VTD_TLB_PSI_FLUSH_A (3ULL << 57) -#define VTD_TLB_FLUSH_GRANU_MASK_A (3ULL << 57) -#define VTD_TLB_IVT (1ULL << 63) -#define VTD_TLB_DID(val) (((val) >> 32) & VTD_DOMAIN_ID_MASK) - -/* IVA_REG */ -#define VTD_IVA_ADDR(val) ((val) & ~0xfffULL & ((1ULL << VTD_MGAW) - 1)) -#define VTD_IVA_AM(val) ((val) & 0x3fULL) - -/* GCMD_REG */ -#define VTD_GCMD_TE (1UL << 31) -#define VTD_GCMD_SRTP (1UL << 30) -#define VTD_GCMD_SFL (1UL << 29) -#define VTD_GCMD_EAFL (1UL << 28) -#define VTD_GCMD_WBF (1UL << 27) -#define VTD_GCMD_QIE (1UL << 26) -#define VTD_GCMD_IRE (1UL << 25) -#define VTD_GCMD_SIRTP (1UL << 24) -#define VTD_GCMD_CFI (1UL << 23) - -/* GSTS_REG */ -#define VTD_GSTS_TES (1UL << 31) -#define VTD_GSTS_RTPS (1UL << 30) -#define VTD_GSTS_FLS (1UL << 29) -#define VTD_GSTS_AFLS (1UL << 28) -#define VTD_GSTS_WBFS (1UL << 27) -#define VTD_GSTS_QIES (1UL << 26) -#define VTD_GSTS_IRES (1UL << 25) -#define VTD_GSTS_IRTPS (1UL << 24) -#define VTD_GSTS_CFIS (1UL << 23) - -/* CCMD_REG */ -#define VTD_CCMD_ICC (1ULL << 63) -#define VTD_CCMD_GLOBAL_INVL (1ULL << 61) -#define VTD_CCMD_DOMAIN_INVL (2ULL << 61) -#define VTD_CCMD_DEVICE_INVL (3ULL << 61) -#define VTD_CCMD_CIRG_MASK (3ULL << 61) -#define VTD_CCMD_GLOBAL_INVL_A (1ULL << 59) -#define VTD_CCMD_DOMAIN_INVL_A (2ULL << 59) -#define VTD_CCMD_DEVICE_INVL_A (3ULL << 59) -#define VTD_CCMD_CAIG_MASK (3ULL << 59) -#define VTD_CCMD_DID(val) ((val) & VTD_DOMAIN_ID_MASK) -#define VTD_CCMD_SID(val) (((val) >> 16) & 0xffffULL) -#define VTD_CCMD_FM(val) (((val) >> 32) & 3ULL) - -/* RTADDR_REG */ -#define VTD_RTADDR_RTT (1ULL << 11) -#define VTD_RTADDR_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL) - -/* ECAP_REG */ -/* (offset >> 4) << 8 */ -#define VTD_ECAP_IRO (DMAR_IOTLB_REG_OFFSET << 4) -#define VTD_ECAP_QI (1ULL << 1) - -/* CAP_REG */ -/* (offset >> 4) << 24 */ -#define VTD_CAP_FRO (DMAR_FRCD_REG_OFFSET << 20) -#define VTD_CAP_NFR ((DMAR_FRCD_REG_NR - 1) << 40) -#define VTD_DOMAIN_ID_SHIFT 16 /* 16-bit domain id for 64K domains */ -#define VTD_DOMAIN_ID_MASK ((1UL << VTD_DOMAIN_ID_SHIFT) - 1) -#define VTD_CAP_ND (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL) -#define VTD_MGAW 39 /* Maximum Guest Address Width */ -#define VTD_CAP_MGAW (((VTD_MGAW - 1) & 0x3fULL) << 16) -#define VTD_MAMV 18ULL -#define VTD_CAP_MAMV (VTD_MAMV << 48) -#define VTD_CAP_PSI (1ULL << 39) -#define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35)) - -/* Supported Adjusted Guest Address Widths */ -#define VTD_CAP_SAGAW_SHIFT 8 -#define VTD_CAP_SAGAW_MASK (0x1fULL << VTD_CAP_SAGAW_SHIFT) - /* 39-bit AGAW, 3-level page-table */ -#define VTD_CAP_SAGAW_39bit (0x2ULL << VTD_CAP_SAGAW_SHIFT) - /* 48-bit AGAW, 4-level page-table */ -#define VTD_CAP_SAGAW_48bit (0x4ULL << VTD_CAP_SAGAW_SHIFT) -#define VTD_CAP_SAGAW VTD_CAP_SAGAW_39bit - -/* IQT_REG */ -#define VTD_IQT_QT(val) (((val) >> 4) & 0x7fffULL) - -/* IQA_REG */ -#define VTD_IQA_IQA_MASK (VTD_HAW_MASK ^ 0xfffULL) -#define VTD_IQA_QS 0x7ULL - -/* IQH_REG */ -#define VTD_IQH_QH_SHIFT 4 -#define VTD_IQH_QH_MASK 0x7fff0ULL - -/* ICS_REG */ -#define VTD_ICS_IWC 1UL - -/* IECTL_REG */ -#define VTD_IECTL_IM (1UL << 31) -#define VTD_IECTL_IP (1UL << 30) - -/* FSTS_REG */ -#define VTD_FSTS_FRI_MASK 0xff00UL -#define VTD_FSTS_FRI(val) ((((uint32_t)(val)) << 8) & VTD_FSTS_FRI_MASK) -#define VTD_FSTS_IQE (1UL << 4) -#define VTD_FSTS_PPF (1UL << 1) -#define VTD_FSTS_PFO 1UL - -/* FECTL_REG */ -#define VTD_FECTL_IM (1UL << 31) -#define VTD_FECTL_IP (1UL << 30) - -/* Fault Recording Register */ -/* For the high 64-bit of 128-bit */ -#define VTD_FRCD_F (1ULL << 63) -#define VTD_FRCD_T (1ULL << 62) -#define VTD_FRCD_FR(val) (((val) & 0xffULL) << 32) -#define VTD_FRCD_SID_MASK 0xffffULL -#define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK) -/* For the low 64-bit of 128-bit */ -#define VTD_FRCD_FI(val) ((val) & (((1ULL << VTD_MGAW) - 1) ^ 0xfffULL)) - -/* DMA Remapping Fault Conditions */ -typedef enum VTDFaultReason { - VTD_FR_RESERVED = 0, /* Reserved for Advanced Fault logging */ - VTD_FR_ROOT_ENTRY_P = 1, /* The Present(P) field of root-entry is 0 */ - VTD_FR_CONTEXT_ENTRY_P, /* The Present(P) field of context-entry is 0 */ - VTD_FR_CONTEXT_ENTRY_INV, /* Invalid programming of a context-entry */ - VTD_FR_ADDR_BEYOND_MGAW, /* Input-address above (2^x-1) */ - VTD_FR_WRITE, /* No write permission */ - VTD_FR_READ, /* No read permission */ - /* Fail to access a second-level paging entry (not SL_PML4E) */ - VTD_FR_PAGING_ENTRY_INV, - VTD_FR_ROOT_TABLE_INV, /* Fail to access a root-entry */ - VTD_FR_CONTEXT_TABLE_INV, /* Fail to access a context-entry */ - /* Non-zero reserved field in a present root-entry */ - VTD_FR_ROOT_ENTRY_RSVD, - /* Non-zero reserved field in a present context-entry */ - VTD_FR_CONTEXT_ENTRY_RSVD, - /* Non-zero reserved field in a second-level paging entry with at lease one - * Read(R) and Write(W) or Execute(E) field is Set. - */ - VTD_FR_PAGING_ENTRY_RSVD, - /* Translation request or translated request explicitly blocked dut to the - * programming of the Translation Type (T) field in the present - * context-entry. - */ - VTD_FR_CONTEXT_ENTRY_TT, - /* This is not a normal fault reason. We use this to indicate some faults - * that are not referenced by the VT-d specification. - * Fault event with such reason should not be recorded. - */ - VTD_FR_RESERVED_ERR, - VTD_FR_MAX, /* Guard */ -} VTDFaultReason; - -#define VTD_CONTEXT_CACHE_GEN_MAX 0xffffffffUL - -/* Queued Invalidation Descriptor */ -struct VTDInvDesc { - uint64_t lo; - uint64_t hi; -}; -typedef struct VTDInvDesc VTDInvDesc; - -/* Masks for struct VTDInvDesc */ -#define VTD_INV_DESC_TYPE 0xf -#define VTD_INV_DESC_CC 0x1 /* Context-cache Invalidate Desc */ -#define VTD_INV_DESC_IOTLB 0x2 -#define VTD_INV_DESC_WAIT 0x5 /* Invalidation Wait Descriptor */ -#define VTD_INV_DESC_NONE 0 /* Not an Invalidate Descriptor */ - -/* Masks for Invalidation Wait Descriptor*/ -#define VTD_INV_DESC_WAIT_SW (1ULL << 5) -#define VTD_INV_DESC_WAIT_IF (1ULL << 4) -#define VTD_INV_DESC_WAIT_FN (1ULL << 6) -#define VTD_INV_DESC_WAIT_DATA_SHIFT 32 -#define VTD_INV_DESC_WAIT_RSVD_LO 0Xffffff80ULL -#define VTD_INV_DESC_WAIT_RSVD_HI 3ULL - -/* Masks for Context-cache Invalidation Descriptor */ -#define VTD_INV_DESC_CC_G (3ULL << 4) -#define VTD_INV_DESC_CC_GLOBAL (1ULL << 4) -#define VTD_INV_DESC_CC_DOMAIN (2ULL << 4) -#define VTD_INV_DESC_CC_DEVICE (3ULL << 4) -#define VTD_INV_DESC_CC_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK) -#define VTD_INV_DESC_CC_SID(val) (((val) >> 32) & 0xffffUL) -#define VTD_INV_DESC_CC_FM(val) (((val) >> 48) & 3UL) -#define VTD_INV_DESC_CC_RSVD 0xfffc00000000ffc0ULL - -/* Masks for IOTLB Invalidate Descriptor */ -#define VTD_INV_DESC_IOTLB_G (3ULL << 4) -#define VTD_INV_DESC_IOTLB_GLOBAL (1ULL << 4) -#define VTD_INV_DESC_IOTLB_DOMAIN (2ULL << 4) -#define VTD_INV_DESC_IOTLB_PAGE (3ULL << 4) -#define VTD_INV_DESC_IOTLB_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK) -#define VTD_INV_DESC_IOTLB_ADDR(val) ((val) & ~0xfffULL & \ - ((1ULL << VTD_MGAW) - 1)) -#define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL) -#define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000ff00ULL -#define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL - -/* Information about page-selective IOTLB invalidate */ -struct VTDIOTLBPageInvInfo { - uint16_t domain_id; - uint64_t addr; - uint8_t mask; -}; -typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo; - -/* Pagesize of VTD paging structures, including root and context tables */ -#define VTD_PAGE_SHIFT 12 -#define VTD_PAGE_SIZE (1ULL << VTD_PAGE_SHIFT) - -#define VTD_PAGE_SHIFT_4K 12 -#define VTD_PAGE_MASK_4K (~((1ULL << VTD_PAGE_SHIFT_4K) - 1)) -#define VTD_PAGE_SHIFT_2M 21 -#define VTD_PAGE_MASK_2M (~((1ULL << VTD_PAGE_SHIFT_2M) - 1)) -#define VTD_PAGE_SHIFT_1G 30 -#define VTD_PAGE_MASK_1G (~((1ULL << VTD_PAGE_SHIFT_1G) - 1)) - -struct VTDRootEntry { - uint64_t val; - uint64_t rsvd; -}; -typedef struct VTDRootEntry VTDRootEntry; - -/* Masks for struct VTDRootEntry */ -#define VTD_ROOT_ENTRY_P 1ULL -#define VTD_ROOT_ENTRY_CTP (~0xfffULL) - -#define VTD_ROOT_ENTRY_NR (VTD_PAGE_SIZE / sizeof(VTDRootEntry)) -#define VTD_ROOT_ENTRY_RSVD (0xffeULL | ~VTD_HAW_MASK) - -/* Masks for struct VTDContextEntry */ -/* lo */ -#define VTD_CONTEXT_ENTRY_P (1ULL << 0) -#define VTD_CONTEXT_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */ -#define VTD_CONTEXT_ENTRY_TT (3ULL << 2) /* Translation Type */ -#define VTD_CONTEXT_TT_MULTI_LEVEL 0 -#define VTD_CONTEXT_TT_DEV_IOTLB 1 -#define VTD_CONTEXT_TT_PASS_THROUGH 2 -/* Second Level Page Translation Pointer*/ -#define VTD_CONTEXT_ENTRY_SLPTPTR (~0xfffULL) -#define VTD_CONTEXT_ENTRY_RSVD_LO (0xff0ULL | ~VTD_HAW_MASK) -/* hi */ -#define VTD_CONTEXT_ENTRY_AW 7ULL /* Adjusted guest-address-width */ -#define VTD_CONTEXT_ENTRY_DID(val) (((val) >> 8) & VTD_DOMAIN_ID_MASK) -#define VTD_CONTEXT_ENTRY_RSVD_HI 0xffffffffff000080ULL - -#define VTD_CONTEXT_ENTRY_NR (VTD_PAGE_SIZE / sizeof(VTDContextEntry)) - -/* Paging Structure common */ -#define VTD_SL_PT_PAGE_SIZE_MASK (1ULL << 7) -/* Bits to decide the offset for each level */ -#define VTD_SL_LEVEL_BITS 9 - -/* Second Level Paging Structure */ -#define VTD_SL_PML4_LEVEL 4 -#define VTD_SL_PDP_LEVEL 3 -#define VTD_SL_PD_LEVEL 2 -#define VTD_SL_PT_LEVEL 1 -#define VTD_SL_PT_ENTRY_NR 512 - -/* Masks for Second Level Paging Entry */ -#define VTD_SL_RW_MASK 3ULL -#define VTD_SL_R 1ULL -#define VTD_SL_W (1ULL << 1) -#define VTD_SL_PT_BASE_ADDR_MASK (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK) -#define VTD_SL_IGN_COM 0xbff0000000000000ULL - -#endif diff --git a/qemu/hw/i386/kvm/Makefile.objs b/qemu/hw/i386/kvm/Makefile.objs deleted file mode 100644 index d8bce209b..000000000 --- a/qemu/hw/i386/kvm/Makefile.objs +++ /dev/null @@ -1 +0,0 @@ -obj-y += clock.o apic.o i8259.o ioapic.o i8254.o pci-assign.o diff --git a/qemu/hw/i386/kvm/apic.c b/qemu/hw/i386/kvm/apic.c deleted file mode 100644 index 3c7c8fa00..000000000 --- a/qemu/hw/i386/kvm/apic.c +++ /dev/null @@ -1,219 +0,0 @@ -/* - * KVM in-kernel APIC support - * - * Copyright (c) 2011 Siemens AG - * - * Authors: - * Jan Kiszka <jan.kiszka@siemens.com> - * - * This work is licensed under the terms of the GNU GPL version 2. - * See the COPYING file in the top-level directory. - */ -#include "qemu/osdep.h" -#include "hw/i386/apic_internal.h" -#include "hw/pci/msi.h" -#include "sysemu/kvm.h" - -static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, - int reg_id, uint32_t val) -{ - *((uint32_t *)(kapic->regs + (reg_id << 4))) = val; -} - -static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic, - int reg_id) -{ - return *((uint32_t *)(kapic->regs + (reg_id << 4))); -} - -void kvm_put_apic_state(DeviceState *dev, struct kvm_lapic_state *kapic) -{ - APICCommonState *s = APIC_COMMON(dev); - int i; - - memset(kapic, 0, sizeof(*kapic)); - kvm_apic_set_reg(kapic, 0x2, s->id << 24); - kvm_apic_set_reg(kapic, 0x8, s->tpr); - kvm_apic_set_reg(kapic, 0xd, s->log_dest << 24); - kvm_apic_set_reg(kapic, 0xe, s->dest_mode << 28 | 0x0fffffff); - kvm_apic_set_reg(kapic, 0xf, s->spurious_vec); - for (i = 0; i < 8; i++) { - kvm_apic_set_reg(kapic, 0x10 + i, s->isr[i]); - kvm_apic_set_reg(kapic, 0x18 + i, s->tmr[i]); - kvm_apic_set_reg(kapic, 0x20 + i, s->irr[i]); - } - kvm_apic_set_reg(kapic, 0x28, s->esr); - kvm_apic_set_reg(kapic, 0x30, s->icr[0]); - kvm_apic_set_reg(kapic, 0x31, s->icr[1]); - for (i = 0; i < APIC_LVT_NB; i++) { - kvm_apic_set_reg(kapic, 0x32 + i, s->lvt[i]); - } - kvm_apic_set_reg(kapic, 0x38, s->initial_count); - kvm_apic_set_reg(kapic, 0x3e, s->divide_conf); -} - -void kvm_get_apic_state(DeviceState *dev, struct kvm_lapic_state *kapic) -{ - APICCommonState *s = APIC_COMMON(dev); - int i, v; - - s->id = kvm_apic_get_reg(kapic, 0x2) >> 24; - s->tpr = kvm_apic_get_reg(kapic, 0x8); - s->arb_id = kvm_apic_get_reg(kapic, 0x9); - s->log_dest = kvm_apic_get_reg(kapic, 0xd) >> 24; - s->dest_mode = kvm_apic_get_reg(kapic, 0xe) >> 28; - s->spurious_vec = kvm_apic_get_reg(kapic, 0xf); - for (i = 0; i < 8; i++) { - s->isr[i] = kvm_apic_get_reg(kapic, 0x10 + i); - s->tmr[i] = kvm_apic_get_reg(kapic, 0x18 + i); - s->irr[i] = kvm_apic_get_reg(kapic, 0x20 + i); - } - s->esr = kvm_apic_get_reg(kapic, 0x28); - s->icr[0] = kvm_apic_get_reg(kapic, 0x30); - s->icr[1] = kvm_apic_get_reg(kapic, 0x31); - for (i = 0; i < APIC_LVT_NB; i++) { - s->lvt[i] = kvm_apic_get_reg(kapic, 0x32 + i); - } - s->initial_count = kvm_apic_get_reg(kapic, 0x38); - s->divide_conf = kvm_apic_get_reg(kapic, 0x3e); - - v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4); - s->count_shift = (v + 1) & 7; - - s->initial_count_load_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - apic_next_timer(s, s->initial_count_load_time); -} - -static void kvm_apic_set_base(APICCommonState *s, uint64_t val) -{ - s->apicbase = val; -} - -static void kvm_apic_set_tpr(APICCommonState *s, uint8_t val) -{ - s->tpr = (val & 0x0f) << 4; -} - -static uint8_t kvm_apic_get_tpr(APICCommonState *s) -{ - return s->tpr >> 4; -} - -static void kvm_apic_enable_tpr_reporting(APICCommonState *s, bool enable) -{ - struct kvm_tpr_access_ctl ctl = { - .enabled = enable - }; - - kvm_vcpu_ioctl(CPU(s->cpu), KVM_TPR_ACCESS_REPORTING, &ctl); -} - -static void kvm_apic_vapic_base_update(APICCommonState *s) -{ - struct kvm_vapic_addr vapid_addr = { - .vapic_addr = s->vapic_paddr, - }; - int ret; - - ret = kvm_vcpu_ioctl(CPU(s->cpu), KVM_SET_VAPIC_ADDR, &vapid_addr); - if (ret < 0) { - fprintf(stderr, "KVM: setting VAPIC address failed (%s)\n", - strerror(-ret)); - abort(); - } -} - -static void do_inject_external_nmi(void *data) -{ - APICCommonState *s = data; - CPUState *cpu = CPU(s->cpu); - uint32_t lvt; - int ret; - - cpu_synchronize_state(cpu); - - lvt = s->lvt[APIC_LVT_LINT1]; - if (!(lvt & APIC_LVT_MASKED) && ((lvt >> 8) & 7) == APIC_DM_NMI) { - ret = kvm_vcpu_ioctl(cpu, KVM_NMI); - if (ret < 0) { - fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n", - strerror(-ret)); - } - } -} - -static void kvm_apic_external_nmi(APICCommonState *s) -{ - run_on_cpu(CPU(s->cpu), do_inject_external_nmi, s); -} - -static uint64_t kvm_apic_mem_read(void *opaque, hwaddr addr, - unsigned size) -{ - return ~(uint64_t)0; -} - -static void kvm_apic_mem_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - MSIMessage msg = { .address = addr, .data = data }; - int ret; - - ret = kvm_irqchip_send_msi(kvm_state, msg); - if (ret < 0) { - fprintf(stderr, "KVM: injection failed, MSI lost (%s)\n", - strerror(-ret)); - } -} - -static const MemoryRegionOps kvm_apic_io_ops = { - .read = kvm_apic_mem_read, - .write = kvm_apic_mem_write, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void kvm_apic_reset(APICCommonState *s) -{ - /* Not used by KVM, which uses the CPU mp_state instead. */ - s->wait_for_sipi = 0; -} - -static void kvm_apic_realize(DeviceState *dev, Error **errp) -{ - APICCommonState *s = APIC_COMMON(dev); - - memory_region_init_io(&s->io_memory, NULL, &kvm_apic_io_ops, s, "kvm-apic-msi", - APIC_SPACE_SIZE); - - if (kvm_has_gsi_routing()) { - msi_nonbroken = true; - } -} - -static void kvm_apic_class_init(ObjectClass *klass, void *data) -{ - APICCommonClass *k = APIC_COMMON_CLASS(klass); - - k->realize = kvm_apic_realize; - k->reset = kvm_apic_reset; - k->set_base = kvm_apic_set_base; - k->set_tpr = kvm_apic_set_tpr; - k->get_tpr = kvm_apic_get_tpr; - k->enable_tpr_reporting = kvm_apic_enable_tpr_reporting; - k->vapic_base_update = kvm_apic_vapic_base_update; - k->external_nmi = kvm_apic_external_nmi; -} - -static const TypeInfo kvm_apic_info = { - .name = "kvm-apic", - .parent = TYPE_APIC_COMMON, - .instance_size = sizeof(APICCommonState), - .class_init = kvm_apic_class_init, -}; - -static void kvm_apic_register_types(void) -{ - type_register_static(&kvm_apic_info); -} - -type_init(kvm_apic_register_types) diff --git a/qemu/hw/i386/kvm/clock.c b/qemu/hw/i386/kvm/clock.c deleted file mode 100644 index a3b300cad..000000000 --- a/qemu/hw/i386/kvm/clock.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - * QEMU KVM support, paravirtual clock device - * - * Copyright (C) 2011 Siemens AG - * - * Authors: - * Jan Kiszka <jan.kiszka@siemens.com> - * - * This work is licensed under the terms of the GNU GPL version 2. - * See the COPYING file in the top-level directory. - * - * Contributions after 2012-01-13 are licensed under the terms of the - * GNU GPL, version 2 or (at your option) any later version. - */ - -#include "qemu/osdep.h" -#include "qemu-common.h" -#include "qemu/host-utils.h" -#include "sysemu/sysemu.h" -#include "sysemu/kvm.h" -#include "kvm_i386.h" -#include "hw/sysbus.h" -#include "hw/kvm/clock.h" - -#include <linux/kvm.h> -#include <linux/kvm_para.h> - -#define TYPE_KVM_CLOCK "kvmclock" -#define KVM_CLOCK(obj) OBJECT_CHECK(KVMClockState, (obj), TYPE_KVM_CLOCK) - -typedef struct KVMClockState { - /*< private >*/ - SysBusDevice busdev; - /*< public >*/ - - uint64_t clock; - bool clock_valid; -} KVMClockState; - -struct pvclock_vcpu_time_info { - uint32_t version; - uint32_t pad0; - uint64_t tsc_timestamp; - uint64_t system_time; - uint32_t tsc_to_system_mul; - int8_t tsc_shift; - uint8_t flags; - uint8_t pad[2]; -} __attribute__((__packed__)); /* 32 bytes */ - -static uint64_t kvmclock_current_nsec(KVMClockState *s) -{ - CPUState *cpu = first_cpu; - CPUX86State *env = cpu->env_ptr; - hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL; - uint64_t migration_tsc = env->tsc; - struct pvclock_vcpu_time_info time; - uint64_t delta; - uint64_t nsec_lo; - uint64_t nsec_hi; - uint64_t nsec; - - if (!(env->system_time_msr & 1ULL)) { - /* KVM clock not active */ - return 0; - } - - cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time)); - - assert(time.tsc_timestamp <= migration_tsc); - delta = migration_tsc - time.tsc_timestamp; - if (time.tsc_shift < 0) { - delta >>= -time.tsc_shift; - } else { - delta <<= time.tsc_shift; - } - - mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul); - nsec = (nsec_lo >> 32) | (nsec_hi << 32); - return nsec + time.system_time; -} - -static void kvmclock_vm_state_change(void *opaque, int running, - RunState state) -{ - KVMClockState *s = opaque; - CPUState *cpu; - int cap_clock_ctrl = kvm_check_extension(kvm_state, KVM_CAP_KVMCLOCK_CTRL); - int ret; - - if (running) { - struct kvm_clock_data data = {}; - uint64_t time_at_migration = kvmclock_current_nsec(s); - - s->clock_valid = false; - - /* We can't rely on the migrated clock value, just discard it */ - if (time_at_migration) { - s->clock = time_at_migration; - } - - data.clock = s->clock; - ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data); - if (ret < 0) { - fprintf(stderr, "KVM_SET_CLOCK failed: %s\n", strerror(ret)); - abort(); - } - - if (!cap_clock_ctrl) { - return; - } - CPU_FOREACH(cpu) { - ret = kvm_vcpu_ioctl(cpu, KVM_KVMCLOCK_CTRL, 0); - if (ret) { - if (ret != -EINVAL) { - fprintf(stderr, "%s: %s\n", __func__, strerror(-ret)); - } - return; - } - } - } else { - struct kvm_clock_data data; - int ret; - - if (s->clock_valid) { - return; - } - - kvm_synchronize_all_tsc(); - - ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); - if (ret < 0) { - fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); - abort(); - } - s->clock = data.clock; - - /* - * If the VM is stopped, declare the clock state valid to - * avoid re-reading it on next vmsave (which would return - * a different value). Will be reset when the VM is continued. - */ - s->clock_valid = true; - } -} - -static void kvmclock_realize(DeviceState *dev, Error **errp) -{ - KVMClockState *s = KVM_CLOCK(dev); - - qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); -} - -static const VMStateDescription kvmclock_vmsd = { - .name = "kvmclock", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT64(clock, KVMClockState), - VMSTATE_END_OF_LIST() - } -}; - -static void kvmclock_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = kvmclock_realize; - dc->vmsd = &kvmclock_vmsd; -} - -static const TypeInfo kvmclock_info = { - .name = TYPE_KVM_CLOCK, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(KVMClockState), - .class_init = kvmclock_class_init, -}; - -/* Note: Must be called after VCPU initialization. */ -void kvmclock_create(void) -{ - X86CPU *cpu = X86_CPU(first_cpu); - - if (kvm_enabled() && - cpu->env.features[FEAT_KVM] & ((1ULL << KVM_FEATURE_CLOCKSOURCE) | - (1ULL << KVM_FEATURE_CLOCKSOURCE2))) { - sysbus_create_simple(TYPE_KVM_CLOCK, -1, NULL); - } -} - -static void kvmclock_register_types(void) -{ - type_register_static(&kvmclock_info); -} - -type_init(kvmclock_register_types) diff --git a/qemu/hw/i386/kvm/i8254.c b/qemu/hw/i386/kvm/i8254.c deleted file mode 100644 index a4462e5ca..000000000 --- a/qemu/hw/i386/kvm/i8254.c +++ /dev/null @@ -1,337 +0,0 @@ -/* - * KVM in-kernel PIT (i8254) support - * - * Copyright (c) 2003-2004 Fabrice Bellard - * Copyright (c) 2012 Jan Kiszka, Siemens AG - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include "qemu/osdep.h" -#include "qapi/error.h" -#include "qemu/timer.h" -#include "sysemu/sysemu.h" -#include "hw/timer/i8254.h" -#include "hw/timer/i8254_internal.h" -#include "sysemu/kvm.h" - -#define KVM_PIT_REINJECT_BIT 0 - -#define CALIBRATION_ROUNDS 3 - -#define KVM_PIT(obj) OBJECT_CHECK(KVMPITState, (obj), TYPE_KVM_I8254) -#define KVM_PIT_CLASS(class) \ - OBJECT_CLASS_CHECK(KVMPITClass, (class), TYPE_KVM_I8254) -#define KVM_PIT_GET_CLASS(obj) \ - OBJECT_GET_CLASS(KVMPITClass, (obj), TYPE_KVM_I8254) - -typedef struct KVMPITState { - PITCommonState parent_obj; - - LostTickPolicy lost_tick_policy; - bool vm_stopped; - int64_t kernel_clock_offset; -} KVMPITState; - -typedef struct KVMPITClass { - PITCommonClass parent_class; - - DeviceRealize parent_realize; -} KVMPITClass; - -static int64_t abs64(int64_t v) -{ - return v < 0 ? -v : v; -} - -static void kvm_pit_update_clock_offset(KVMPITState *s) -{ - int64_t offset, clock_offset; - struct timespec ts; - int i; - - /* - * Measure the delta between CLOCK_MONOTONIC, the base used for - * kvm_pit_channel_state::count_load_time, and QEMU_CLOCK_VIRTUAL. Take the - * minimum of several samples to filter out scheduling noise. - */ - clock_offset = INT64_MAX; - for (i = 0; i < CALIBRATION_ROUNDS; i++) { - offset = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - clock_gettime(CLOCK_MONOTONIC, &ts); - offset -= ts.tv_nsec; - offset -= (int64_t)ts.tv_sec * 1000000000; - if (abs64(offset) < abs64(clock_offset)) { - clock_offset = offset; - } - } - s->kernel_clock_offset = clock_offset; -} - -static void kvm_pit_get(PITCommonState *pit) -{ - KVMPITState *s = KVM_PIT(pit); - struct kvm_pit_state2 kpit; - struct kvm_pit_channel_state *kchan; - struct PITChannelState *sc; - int i, ret; - - /* No need to re-read the state if VM is stopped. */ - if (s->vm_stopped) { - return; - } - - if (kvm_has_pit_state2()) { - ret = kvm_vm_ioctl(kvm_state, KVM_GET_PIT2, &kpit); - if (ret < 0) { - fprintf(stderr, "KVM_GET_PIT2 failed: %s\n", strerror(ret)); - abort(); - } - pit->channels[0].irq_disabled = kpit.flags & KVM_PIT_FLAGS_HPET_LEGACY; - } else { - /* - * kvm_pit_state2 is superset of kvm_pit_state struct, - * so we can use it for KVM_GET_PIT as well. - */ - ret = kvm_vm_ioctl(kvm_state, KVM_GET_PIT, &kpit); - if (ret < 0) { - fprintf(stderr, "KVM_GET_PIT failed: %s\n", strerror(ret)); - abort(); - } - } - for (i = 0; i < 3; i++) { - kchan = &kpit.channels[i]; - sc = &pit->channels[i]; - sc->count = kchan->count; - sc->latched_count = kchan->latched_count; - sc->count_latched = kchan->count_latched; - sc->status_latched = kchan->status_latched; - sc->status = kchan->status; - sc->read_state = kchan->read_state; - sc->write_state = kchan->write_state; - sc->write_latch = kchan->write_latch; - sc->rw_mode = kchan->rw_mode; - sc->mode = kchan->mode; - sc->bcd = kchan->bcd; - sc->gate = kchan->gate; - sc->count_load_time = kchan->count_load_time + s->kernel_clock_offset; - } - - sc = &pit->channels[0]; - sc->next_transition_time = - pit_get_next_transition_time(sc, sc->count_load_time); -} - -static void kvm_pit_put(PITCommonState *pit) -{ - KVMPITState *s = KVM_PIT(pit); - struct kvm_pit_state2 kpit = {}; - struct kvm_pit_channel_state *kchan; - struct PITChannelState *sc; - int i, ret; - - /* The offset keeps changing as long as the VM is stopped. */ - if (s->vm_stopped) { - kvm_pit_update_clock_offset(s); - } - - kpit.flags = pit->channels[0].irq_disabled ? KVM_PIT_FLAGS_HPET_LEGACY : 0; - for (i = 0; i < 3; i++) { - kchan = &kpit.channels[i]; - sc = &pit->channels[i]; - kchan->count = sc->count; - kchan->latched_count = sc->latched_count; - kchan->count_latched = sc->count_latched; - kchan->status_latched = sc->status_latched; - kchan->status = sc->status; - kchan->read_state = sc->read_state; - kchan->write_state = sc->write_state; - kchan->write_latch = sc->write_latch; - kchan->rw_mode = sc->rw_mode; - kchan->mode = sc->mode; - kchan->bcd = sc->bcd; - kchan->gate = sc->gate; - kchan->count_load_time = sc->count_load_time - s->kernel_clock_offset; - } - - ret = kvm_vm_ioctl(kvm_state, - kvm_has_pit_state2() ? KVM_SET_PIT2 : KVM_SET_PIT, - &kpit); - if (ret < 0) { - fprintf(stderr, "%s failed: %s\n", - kvm_has_pit_state2() ? "KVM_SET_PIT2" : "KVM_SET_PIT", - strerror(ret)); - abort(); - } -} - -static void kvm_pit_set_gate(PITCommonState *s, PITChannelState *sc, int val) -{ - kvm_pit_get(s); - - switch (sc->mode) { - default: - case 0: - case 4: - /* XXX: just disable/enable counting */ - break; - case 1: - case 2: - case 3: - case 5: - if (sc->gate < val) { - /* restart counting on rising edge */ - sc->count_load_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - } - break; - } - sc->gate = val; - - kvm_pit_put(s); -} - -static void kvm_pit_get_channel_info(PITCommonState *s, PITChannelState *sc, - PITChannelInfo *info) -{ - kvm_pit_get(s); - - pit_get_channel_info_common(s, sc, info); -} - -static void kvm_pit_reset(DeviceState *dev) -{ - PITCommonState *s = PIT_COMMON(dev); - - pit_reset_common(s); - - kvm_pit_put(s); -} - -static void kvm_pit_irq_control(void *opaque, int n, int enable) -{ - PITCommonState *pit = opaque; - PITChannelState *s = &pit->channels[0]; - - kvm_pit_get(pit); - - s->irq_disabled = !enable; - - kvm_pit_put(pit); -} - -static void kvm_pit_vm_state_change(void *opaque, int running, - RunState state) -{ - KVMPITState *s = opaque; - - if (running) { - kvm_pit_update_clock_offset(s); - kvm_pit_put(PIT_COMMON(s)); - s->vm_stopped = false; - } else { - kvm_pit_update_clock_offset(s); - kvm_pit_get(PIT_COMMON(s)); - s->vm_stopped = true; - } -} - -static void kvm_pit_realizefn(DeviceState *dev, Error **errp) -{ - PITCommonState *pit = PIT_COMMON(dev); - KVMPITClass *kpc = KVM_PIT_GET_CLASS(dev); - KVMPITState *s = KVM_PIT(pit); - struct kvm_pit_config config = { - .flags = 0, - }; - int ret; - - if (kvm_check_extension(kvm_state, KVM_CAP_PIT2)) { - ret = kvm_vm_ioctl(kvm_state, KVM_CREATE_PIT2, &config); - } else { - ret = kvm_vm_ioctl(kvm_state, KVM_CREATE_PIT); - } - if (ret < 0) { - error_setg(errp, "Create kernel PIC irqchip failed: %s", - strerror(ret)); - return; - } - switch (s->lost_tick_policy) { - case LOST_TICK_POLICY_DELAY: - break; /* enabled by default */ - case LOST_TICK_POLICY_DISCARD: - if (kvm_check_extension(kvm_state, KVM_CAP_REINJECT_CONTROL)) { - struct kvm_reinject_control control = { .pit_reinject = 0 }; - - ret = kvm_vm_ioctl(kvm_state, KVM_REINJECT_CONTROL, &control); - if (ret < 0) { - error_setg(errp, - "Can't disable in-kernel PIT reinjection: %s", - strerror(ret)); - return; - } - } - break; - default: - error_setg(errp, "Lost tick policy not supported."); - return; - } - - memory_region_init_reservation(&pit->ioports, NULL, "kvm-pit", 4); - - qdev_init_gpio_in(dev, kvm_pit_irq_control, 1); - - qemu_add_vm_change_state_handler(kvm_pit_vm_state_change, s); - - kpc->parent_realize(dev, errp); -} - -static Property kvm_pit_properties[] = { - DEFINE_PROP_UINT32("iobase", PITCommonState, iobase, -1), - DEFINE_PROP_LOSTTICKPOLICY("lost_tick_policy", KVMPITState, - lost_tick_policy, LOST_TICK_POLICY_DELAY), - DEFINE_PROP_END_OF_LIST(), -}; - -static void kvm_pit_class_init(ObjectClass *klass, void *data) -{ - KVMPITClass *kpc = KVM_PIT_CLASS(klass); - PITCommonClass *k = PIT_COMMON_CLASS(klass); - DeviceClass *dc = DEVICE_CLASS(klass); - - kpc->parent_realize = dc->realize; - dc->realize = kvm_pit_realizefn; - k->set_channel_gate = kvm_pit_set_gate; - k->get_channel_info = kvm_pit_get_channel_info; - dc->reset = kvm_pit_reset; - dc->props = kvm_pit_properties; -} - -static const TypeInfo kvm_pit_info = { - .name = TYPE_KVM_I8254, - .parent = TYPE_PIT_COMMON, - .instance_size = sizeof(KVMPITState), - .class_init = kvm_pit_class_init, - .class_size = sizeof(KVMPITClass), -}; - -static void kvm_pit_register(void) -{ - type_register_static(&kvm_pit_info); -} - -type_init(kvm_pit_register) diff --git a/qemu/hw/i386/kvm/i8259.c b/qemu/hw/i386/kvm/i8259.c deleted file mode 100644 index 2b207de01..000000000 --- a/qemu/hw/i386/kvm/i8259.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - * KVM in-kernel PIC (i8259) support - * - * Copyright (c) 2011 Siemens AG - * - * Authors: - * Jan Kiszka <jan.kiszka@siemens.com> - * - * This work is licensed under the terms of the GNU GPL version 2. - * See the COPYING file in the top-level directory. - */ -#include "qemu/osdep.h" -#include "hw/isa/i8259_internal.h" -#include "hw/i386/apic_internal.h" -#include "sysemu/kvm.h" - -#define TYPE_KVM_I8259 "kvm-i8259" -#define KVM_PIC_CLASS(class) \ - OBJECT_CLASS_CHECK(KVMPICClass, (class), TYPE_KVM_I8259) -#define KVM_PIC_GET_CLASS(obj) \ - OBJECT_GET_CLASS(KVMPICClass, (obj), TYPE_KVM_I8259) - -/** - * KVMPICClass: - * @parent_realize: The parent's realizefn. - */ -typedef struct KVMPICClass { - PICCommonClass parent_class; - - DeviceRealize parent_realize; -} KVMPICClass; - -static void kvm_pic_get(PICCommonState *s) -{ - struct kvm_irqchip chip; - struct kvm_pic_state *kpic; - int ret; - - chip.chip_id = s->master ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE; - ret = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, &chip); - if (ret < 0) { - fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret)); - abort(); - } - - kpic = &chip.chip.pic; - - s->last_irr = kpic->last_irr; - s->irr = kpic->irr; - s->imr = kpic->imr; - s->isr = kpic->isr; - s->priority_add = kpic->priority_add; - s->irq_base = kpic->irq_base; - s->read_reg_select = kpic->read_reg_select; - s->poll = kpic->poll; - s->special_mask = kpic->special_mask; - s->init_state = kpic->init_state; - s->auto_eoi = kpic->auto_eoi; - s->rotate_on_auto_eoi = kpic->rotate_on_auto_eoi; - s->special_fully_nested_mode = kpic->special_fully_nested_mode; - s->init4 = kpic->init4; - s->elcr = kpic->elcr; - s->elcr_mask = kpic->elcr_mask; -} - -static void kvm_pic_put(PICCommonState *s) -{ - struct kvm_irqchip chip; - struct kvm_pic_state *kpic; - int ret; - - chip.chip_id = s->master ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE; - - kpic = &chip.chip.pic; - - kpic->last_irr = s->last_irr; - kpic->irr = s->irr; - kpic->imr = s->imr; - kpic->isr = s->isr; - kpic->priority_add = s->priority_add; - kpic->irq_base = s->irq_base; - kpic->read_reg_select = s->read_reg_select; - kpic->poll = s->poll; - kpic->special_mask = s->special_mask; - kpic->init_state = s->init_state; - kpic->auto_eoi = s->auto_eoi; - kpic->rotate_on_auto_eoi = s->rotate_on_auto_eoi; - kpic->special_fully_nested_mode = s->special_fully_nested_mode; - kpic->init4 = s->init4; - kpic->elcr = s->elcr; - kpic->elcr_mask = s->elcr_mask; - - ret = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, &chip); - if (ret < 0) { - fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret)); - abort(); - } -} - -static void kvm_pic_reset(DeviceState *dev) -{ - PICCommonState *s = PIC_COMMON(dev); - - s->elcr = 0; - pic_reset_common(s); - - kvm_pic_put(s); -} - -static void kvm_pic_set_irq(void *opaque, int irq, int level) -{ - int delivered; - - delivered = kvm_set_irq(kvm_state, irq, level); - apic_report_irq_delivered(delivered); -} - -static void kvm_pic_realize(DeviceState *dev, Error **errp) -{ - PICCommonState *s = PIC_COMMON(dev); - KVMPICClass *kpc = KVM_PIC_GET_CLASS(dev); - - memory_region_init_reservation(&s->base_io, NULL, "kvm-pic", 2); - memory_region_init_reservation(&s->elcr_io, NULL, "kvm-elcr", 1); - - kpc->parent_realize(dev, errp); -} - -qemu_irq *kvm_i8259_init(ISABus *bus) -{ - i8259_init_chip(TYPE_KVM_I8259, bus, true); - i8259_init_chip(TYPE_KVM_I8259, bus, false); - - return qemu_allocate_irqs(kvm_pic_set_irq, NULL, ISA_NUM_IRQS); -} - -static void kvm_i8259_class_init(ObjectClass *klass, void *data) -{ - KVMPICClass *kpc = KVM_PIC_CLASS(klass); - PICCommonClass *k = PIC_COMMON_CLASS(klass); - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->reset = kvm_pic_reset; - kpc->parent_realize = dc->realize; - dc->realize = kvm_pic_realize; - k->pre_save = kvm_pic_get; - k->post_load = kvm_pic_put; -} - -static const TypeInfo kvm_i8259_info = { - .name = TYPE_KVM_I8259, - .parent = TYPE_PIC_COMMON, - .instance_size = sizeof(PICCommonState), - .class_init = kvm_i8259_class_init, - .class_size = sizeof(KVMPICClass), -}; - -static void kvm_pic_register_types(void) -{ - type_register_static(&kvm_i8259_info); -} - -type_init(kvm_pic_register_types) diff --git a/qemu/hw/i386/kvm/ioapic.c b/qemu/hw/i386/kvm/ioapic.c deleted file mode 100644 index 8eb2c7a70..000000000 --- a/qemu/hw/i386/kvm/ioapic.c +++ /dev/null @@ -1,179 +0,0 @@ -/* - * KVM in-kernel IOPIC support - * - * Copyright (c) 2011 Siemens AG - * - * Authors: - * Jan Kiszka <jan.kiszka@siemens.com> - * - * This work is licensed under the terms of the GNU GPL version 2. - * See the COPYING file in the top-level directory. - */ - -#include "qemu/osdep.h" -#include "monitor/monitor.h" -#include "hw/i386/pc.h" -#include "hw/i386/ioapic_internal.h" -#include "hw/i386/apic_internal.h" -#include "sysemu/kvm.h" - -/* PC Utility function */ -void kvm_pc_setup_irq_routing(bool pci_enabled) -{ - KVMState *s = kvm_state; - int i; - - if (kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) { - for (i = 0; i < 8; ++i) { - if (i == 2) { - continue; - } - kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_MASTER, i); - } - for (i = 8; i < 16; ++i) { - kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8); - } - if (pci_enabled) { - for (i = 0; i < 24; ++i) { - if (i == 0) { - kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, 2); - } else if (i != 2) { - kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, i); - } - } - } - kvm_irqchip_commit_routes(s); - } -} - -void kvm_pc_gsi_handler(void *opaque, int n, int level) -{ - GSIState *s = opaque; - - if (n < ISA_NUM_IRQS) { - /* Kernel will forward to both PIC and IOAPIC */ - qemu_set_irq(s->i8259_irq[n], level); - } else { - qemu_set_irq(s->ioapic_irq[n], level); - } -} - -typedef struct KVMIOAPICState KVMIOAPICState; - -struct KVMIOAPICState { - IOAPICCommonState ioapic; - uint32_t kvm_gsi_base; -}; - -static void kvm_ioapic_get(IOAPICCommonState *s) -{ - struct kvm_irqchip chip; - struct kvm_ioapic_state *kioapic; - int ret, i; - - chip.chip_id = KVM_IRQCHIP_IOAPIC; - ret = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, &chip); - if (ret < 0) { - fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret)); - abort(); - } - - kioapic = &chip.chip.ioapic; - - s->id = kioapic->id; - s->ioregsel = kioapic->ioregsel; - s->irr = kioapic->irr; - for (i = 0; i < IOAPIC_NUM_PINS; i++) { - s->ioredtbl[i] = kioapic->redirtbl[i].bits; - } -} - -static void kvm_ioapic_put(IOAPICCommonState *s) -{ - struct kvm_irqchip chip; - struct kvm_ioapic_state *kioapic; - int ret, i; - - chip.chip_id = KVM_IRQCHIP_IOAPIC; - kioapic = &chip.chip.ioapic; - - kioapic->id = s->id; - kioapic->ioregsel = s->ioregsel; - kioapic->base_address = s->busdev.mmio[0].addr; - kioapic->irr = s->irr; - for (i = 0; i < IOAPIC_NUM_PINS; i++) { - kioapic->redirtbl[i].bits = s->ioredtbl[i]; - } - - ret = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, &chip); - if (ret < 0) { - fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret)); - abort(); - } -} - -void kvm_ioapic_dump_state(Monitor *mon, const QDict *qdict) -{ - IOAPICCommonState s; - - kvm_ioapic_get(&s); - - ioapic_print_redtbl(mon, &s); -} - -static void kvm_ioapic_reset(DeviceState *dev) -{ - IOAPICCommonState *s = IOAPIC_COMMON(dev); - - ioapic_reset_common(dev); - kvm_ioapic_put(s); -} - -static void kvm_ioapic_set_irq(void *opaque, int irq, int level) -{ - KVMIOAPICState *s = opaque; - int delivered; - - delivered = kvm_set_irq(kvm_state, s->kvm_gsi_base + irq, level); - apic_report_irq_delivered(delivered); -} - -static void kvm_ioapic_realize(DeviceState *dev, Error **errp) -{ - IOAPICCommonState *s = IOAPIC_COMMON(dev); - - memory_region_init_reservation(&s->io_memory, NULL, "kvm-ioapic", 0x1000); - - qdev_init_gpio_in(dev, kvm_ioapic_set_irq, IOAPIC_NUM_PINS); -} - -static Property kvm_ioapic_properties[] = { - DEFINE_PROP_UINT32("gsi_base", KVMIOAPICState, kvm_gsi_base, 0), - DEFINE_PROP_END_OF_LIST() -}; - -static void kvm_ioapic_class_init(ObjectClass *klass, void *data) -{ - IOAPICCommonClass *k = IOAPIC_COMMON_CLASS(klass); - DeviceClass *dc = DEVICE_CLASS(klass); - - k->realize = kvm_ioapic_realize; - k->pre_save = kvm_ioapic_get; - k->post_load = kvm_ioapic_put; - dc->reset = kvm_ioapic_reset; - dc->props = kvm_ioapic_properties; -} - -static const TypeInfo kvm_ioapic_info = { - .name = "kvm-ioapic", - .parent = TYPE_IOAPIC_COMMON, - .instance_size = sizeof(KVMIOAPICState), - .class_init = kvm_ioapic_class_init, -}; - -static void kvm_ioapic_register_types(void) -{ - type_register_static(&kvm_ioapic_info); -} - -type_init(kvm_ioapic_register_types) diff --git a/qemu/hw/i386/kvm/pci-assign.c b/qemu/hw/i386/kvm/pci-assign.c deleted file mode 100644 index bf425a2b9..000000000 --- a/qemu/hw/i386/kvm/pci-assign.c +++ /dev/null @@ -1,1898 +0,0 @@ -/* - * Copyright (c) 2007, Neocleus Corporation. - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - * - * Assign a PCI device from the host to a guest VM. - * - * This implementation uses the classic device assignment interface of KVM - * and is only available on x86 hosts. It is expected to be obsoleted by VFIO - * based device assignment. - * - * Adapted for KVM (qemu-kvm) by Qumranet. QEMU version was based on qemu-kvm - * revision 4144fe9d48. See its repository for the history. - * - * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) - * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) - * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) - * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) - * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) - */ -#include "qemu/osdep.h" -#include "qapi/error.h" -#include <sys/mman.h> -#include "hw/hw.h" -#include "hw/i386/pc.h" -#include "qemu/error-report.h" -#include "ui/console.h" -#include "hw/loader.h" -#include "monitor/monitor.h" -#include "qemu/range.h" -#include "sysemu/sysemu.h" -#include "hw/pci/pci.h" -#include "hw/pci/msi.h" -#include "kvm_i386.h" -#include "hw/pci/pci-assign.h" - -#define MSIX_PAGE_SIZE 0x1000 - -/* From linux/ioport.h */ -#define IORESOURCE_IO 0x00000100 /* Resource type */ -#define IORESOURCE_MEM 0x00000200 -#define IORESOURCE_IRQ 0x00000400 -#define IORESOURCE_DMA 0x00000800 -#define IORESOURCE_PREFETCH 0x00002000 /* No side effects */ -#define IORESOURCE_MEM_64 0x00100000 - -typedef struct PCIRegion { - int type; /* Memory or port I/O */ - int valid; - uint64_t base_addr; - uint64_t size; /* size of the region */ - int resource_fd; -} PCIRegion; - -typedef struct PCIDevRegions { - uint8_t bus, dev, func; /* Bus inside domain, device and function */ - int irq; /* IRQ number */ - uint16_t region_number; /* number of active regions */ - - /* Port I/O or MMIO Regions */ - PCIRegion regions[PCI_NUM_REGIONS - 1]; - int config_fd; -} PCIDevRegions; - -typedef struct AssignedDevRegion { - MemoryRegion container; - MemoryRegion real_iomem; - union { - uint8_t *r_virtbase; /* mmapped access address for memory regions */ - uint32_t r_baseport; /* the base guest port for I/O regions */ - } u; - pcibus_t e_size; /* emulated size of region in bytes */ - pcibus_t r_size; /* real size of region in bytes */ - PCIRegion *region; -} AssignedDevRegion; - -#define ASSIGNED_DEVICE_PREFER_MSI_BIT 0 -#define ASSIGNED_DEVICE_SHARE_INTX_BIT 1 - -#define ASSIGNED_DEVICE_PREFER_MSI_MASK (1 << ASSIGNED_DEVICE_PREFER_MSI_BIT) -#define ASSIGNED_DEVICE_SHARE_INTX_MASK (1 << ASSIGNED_DEVICE_SHARE_INTX_BIT) - -typedef struct MSIXTableEntry { - uint32_t addr_lo; - uint32_t addr_hi; - uint32_t data; - uint32_t ctrl; -} MSIXTableEntry; - -typedef enum AssignedIRQType { - ASSIGNED_IRQ_NONE = 0, - ASSIGNED_IRQ_INTX_HOST_INTX, - ASSIGNED_IRQ_INTX_HOST_MSI, - ASSIGNED_IRQ_MSI, - ASSIGNED_IRQ_MSIX -} AssignedIRQType; - -typedef struct AssignedDevice { - PCIDevice dev; - PCIHostDeviceAddress host; - uint32_t dev_id; - uint32_t features; - int intpin; - AssignedDevRegion v_addrs[PCI_NUM_REGIONS - 1]; - PCIDevRegions real_device; - PCIINTxRoute intx_route; - AssignedIRQType assigned_irq_type; - struct { -#define ASSIGNED_DEVICE_CAP_MSI (1 << 0) -#define ASSIGNED_DEVICE_CAP_MSIX (1 << 1) - uint32_t available; -#define ASSIGNED_DEVICE_MSI_ENABLED (1 << 0) -#define ASSIGNED_DEVICE_MSIX_ENABLED (1 << 1) -#define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2) - uint32_t state; - } cap; - uint8_t emulate_config_read[PCI_CONFIG_SPACE_SIZE]; - uint8_t emulate_config_write[PCI_CONFIG_SPACE_SIZE]; - int msi_virq_nr; - int *msi_virq; - MSIXTableEntry *msix_table; - hwaddr msix_table_addr; - uint16_t msix_max; - MemoryRegion mmio; - char *configfd_name; - int32_t bootindex; -} AssignedDevice; - -#define TYPE_PCI_ASSIGN "kvm-pci-assign" -#define PCI_ASSIGN(obj) OBJECT_CHECK(AssignedDevice, (obj), TYPE_PCI_ASSIGN) - -static void assigned_dev_update_irq_routing(PCIDevice *dev); - -static void assigned_dev_load_option_rom(AssignedDevice *dev); - -static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev); - -static uint64_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, - hwaddr addr, int size, - uint64_t *data) -{ - uint64_t val = 0; - int fd = dev_region->region->resource_fd; - - if (data) { - DEBUG("pwrite data=%" PRIx64 ", size=%d, e_phys=" TARGET_FMT_plx - ", addr="TARGET_FMT_plx"\n", *data, size, addr, addr); - if (pwrite(fd, data, size, addr) != size) { - error_report("%s - pwrite failed %s", __func__, strerror(errno)); - } - } else { - if (pread(fd, &val, size, addr) != size) { - error_report("%s - pread failed %s", __func__, strerror(errno)); - val = (1UL << (size * 8)) - 1; - } - DEBUG("pread val=%" PRIx64 ", size=%d, e_phys=" TARGET_FMT_plx - ", addr=" TARGET_FMT_plx "\n", val, size, addr, addr); - } - return val; -} - -static void assigned_dev_ioport_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - assigned_dev_ioport_rw(opaque, addr, size, &data); -} - -static uint64_t assigned_dev_ioport_read(void *opaque, - hwaddr addr, unsigned size) -{ - return assigned_dev_ioport_rw(opaque, addr, size, NULL); -} - -static uint32_t slow_bar_readb(void *opaque, hwaddr addr) -{ - AssignedDevRegion *d = opaque; - uint8_t *in = d->u.r_virtbase + addr; - uint32_t r; - - r = *in; - DEBUG("addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r); - - return r; -} - -static uint32_t slow_bar_readw(void *opaque, hwaddr addr) -{ - AssignedDevRegion *d = opaque; - uint16_t *in = (uint16_t *)(d->u.r_virtbase + addr); - uint32_t r; - - r = *in; - DEBUG("addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r); - - return r; -} - -static uint32_t slow_bar_readl(void *opaque, hwaddr addr) -{ - AssignedDevRegion *d = opaque; - uint32_t *in = (uint32_t *)(d->u.r_virtbase + addr); - uint32_t r; - - r = *in; - DEBUG("addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r); - - return r; -} - -static void slow_bar_writeb(void *opaque, hwaddr addr, uint32_t val) -{ - AssignedDevRegion *d = opaque; - uint8_t *out = d->u.r_virtbase + addr; - - DEBUG("addr=0x" TARGET_FMT_plx " val=0x%02x\n", addr, val); - *out = val; -} - -static void slow_bar_writew(void *opaque, hwaddr addr, uint32_t val) -{ - AssignedDevRegion *d = opaque; - uint16_t *out = (uint16_t *)(d->u.r_virtbase + addr); - - DEBUG("addr=0x" TARGET_FMT_plx " val=0x%04x\n", addr, val); - *out = val; -} - -static void slow_bar_writel(void *opaque, hwaddr addr, uint32_t val) -{ - AssignedDevRegion *d = opaque; - uint32_t *out = (uint32_t *)(d->u.r_virtbase + addr); - - DEBUG("addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, val); - *out = val; -} - -static const MemoryRegionOps slow_bar_ops = { - .old_mmio = { - .read = { slow_bar_readb, slow_bar_readw, slow_bar_readl, }, - .write = { slow_bar_writeb, slow_bar_writew, slow_bar_writel, }, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void assigned_dev_iomem_setup(PCIDevice *pci_dev, int region_num, - pcibus_t e_size) -{ - AssignedDevice *r_dev = PCI_ASSIGN(pci_dev); - AssignedDevRegion *region = &r_dev->v_addrs[region_num]; - PCIRegion *real_region = &r_dev->real_device.regions[region_num]; - - if (e_size > 0) { - memory_region_init(®ion->container, OBJECT(pci_dev), - "assigned-dev-container", e_size); - memory_region_add_subregion(®ion->container, 0, ®ion->real_iomem); - - /* deal with MSI-X MMIO page */ - if (real_region->base_addr <= r_dev->msix_table_addr && - real_region->base_addr + real_region->size > - r_dev->msix_table_addr) { - uint64_t offset = r_dev->msix_table_addr - real_region->base_addr; - - memory_region_add_subregion_overlap(®ion->container, - offset, - &r_dev->mmio, - 1); - } - } -} - -static const MemoryRegionOps assigned_dev_ioport_ops = { - .read = assigned_dev_ioport_read, - .write = assigned_dev_ioport_write, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void assigned_dev_ioport_setup(PCIDevice *pci_dev, int region_num, - pcibus_t size) -{ - AssignedDevice *r_dev = PCI_ASSIGN(pci_dev); - AssignedDevRegion *region = &r_dev->v_addrs[region_num]; - - region->e_size = size; - memory_region_init(®ion->container, OBJECT(pci_dev), - "assigned-dev-container", size); - memory_region_init_io(®ion->real_iomem, OBJECT(pci_dev), - &assigned_dev_ioport_ops, r_dev->v_addrs + region_num, - "assigned-dev-iomem", size); - memory_region_add_subregion(®ion->container, 0, ®ion->real_iomem); -} - -static uint32_t assigned_dev_pci_read(PCIDevice *d, int pos, int len) -{ - AssignedDevice *pci_dev = PCI_ASSIGN(d); - uint32_t val; - ssize_t ret; - int fd = pci_dev->real_device.config_fd; - -again: - ret = pread(fd, &val, len, pos); - if (ret != len) { - if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) { - goto again; - } - - hw_error("pci read failed, ret = %zd errno = %d\n", ret, errno); - } - - return val; -} - -static uint8_t assigned_dev_pci_read_byte(PCIDevice *d, int pos) -{ - return (uint8_t)assigned_dev_pci_read(d, pos, 1); -} - -static void assigned_dev_pci_write(PCIDevice *d, int pos, uint32_t val, int len) -{ - AssignedDevice *pci_dev = PCI_ASSIGN(d); - ssize_t ret; - int fd = pci_dev->real_device.config_fd; - -again: - ret = pwrite(fd, &val, len, pos); - if (ret != len) { - if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) { - goto again; - } - - hw_error("pci write failed, ret = %zd errno = %d\n", ret, errno); - } -} - -static void assigned_dev_emulate_config_read(AssignedDevice *dev, - uint32_t offset, uint32_t len) -{ - memset(dev->emulate_config_read + offset, 0xff, len); -} - -static void assigned_dev_direct_config_read(AssignedDevice *dev, - uint32_t offset, uint32_t len) -{ - memset(dev->emulate_config_read + offset, 0, len); -} - -static void assigned_dev_direct_config_write(AssignedDevice *dev, - uint32_t offset, uint32_t len) -{ - memset(dev->emulate_config_write + offset, 0, len); -} - -static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap, uint8_t start) -{ - int id; - int max_cap = 48; - int pos = start ? start : PCI_CAPABILITY_LIST; - int status; - - status = assigned_dev_pci_read_byte(d, PCI_STATUS); - if ((status & PCI_STATUS_CAP_LIST) == 0) { - return 0; - } - - while (max_cap--) { - pos = assigned_dev_pci_read_byte(d, pos); - if (pos < 0x40) { - break; - } - - pos &= ~3; - id = assigned_dev_pci_read_byte(d, pos + PCI_CAP_LIST_ID); - - if (id == 0xff) { - break; - } - if (id == cap) { - return pos; - } - - pos += PCI_CAP_LIST_NEXT; - } - return 0; -} - -static void assigned_dev_register_regions(PCIRegion *io_regions, - unsigned long regions_num, - AssignedDevice *pci_dev, - Error **errp) -{ - uint32_t i; - PCIRegion *cur_region = io_regions; - - for (i = 0; i < regions_num; i++, cur_region++) { - if (!cur_region->valid) { - continue; - } - - /* handle memory io regions */ - if (cur_region->type & IORESOURCE_MEM) { - int t = PCI_BASE_ADDRESS_SPACE_MEMORY; - if (cur_region->type & IORESOURCE_PREFETCH) { - t |= PCI_BASE_ADDRESS_MEM_PREFETCH; - } - if (cur_region->type & IORESOURCE_MEM_64) { - t |= PCI_BASE_ADDRESS_MEM_TYPE_64; - } - - /* map physical memory */ - pci_dev->v_addrs[i].u.r_virtbase = mmap(NULL, cur_region->size, - PROT_WRITE | PROT_READ, - MAP_SHARED, - cur_region->resource_fd, - (off_t)0); - - if (pci_dev->v_addrs[i].u.r_virtbase == MAP_FAILED) { - pci_dev->v_addrs[i].u.r_virtbase = NULL; - error_setg_errno(errp, errno, "Couldn't mmap 0x%" PRIx64 "!", - cur_region->base_addr); - return; - } - - pci_dev->v_addrs[i].r_size = cur_region->size; - pci_dev->v_addrs[i].e_size = 0; - - /* add offset */ - pci_dev->v_addrs[i].u.r_virtbase += - (cur_region->base_addr & 0xFFF); - - if (cur_region->size & 0xFFF) { - error_report("PCI region %d at address 0x%" PRIx64 " has " - "size 0x%" PRIx64 ", which is not a multiple of " - "4K. You might experience some performance hit " - "due to that.", - i, cur_region->base_addr, cur_region->size); - memory_region_init_io(&pci_dev->v_addrs[i].real_iomem, - OBJECT(pci_dev), &slow_bar_ops, - &pci_dev->v_addrs[i], - "assigned-dev-slow-bar", - cur_region->size); - } else { - void *virtbase = pci_dev->v_addrs[i].u.r_virtbase; - char name[32]; - snprintf(name, sizeof(name), "%s.bar%d", - object_get_typename(OBJECT(pci_dev)), i); - memory_region_init_ram_ptr(&pci_dev->v_addrs[i].real_iomem, - OBJECT(pci_dev), name, - cur_region->size, virtbase); - vmstate_register_ram(&pci_dev->v_addrs[i].real_iomem, - &pci_dev->dev.qdev); - } - - assigned_dev_iomem_setup(&pci_dev->dev, i, cur_region->size); - pci_register_bar((PCIDevice *) pci_dev, i, t, - &pci_dev->v_addrs[i].container); - continue; - } else { - /* handle port io regions */ - uint32_t val; - int ret; - - /* Test kernel support for ioport resource read/write. Old - * kernels return EIO. New kernels only allow 1/2/4 byte reads - * so should return EINVAL for a 3 byte read */ - ret = pread(pci_dev->v_addrs[i].region->resource_fd, &val, 3, 0); - if (ret >= 0) { - error_report("Unexpected return from I/O port read: %d", ret); - abort(); - } else if (errno != EINVAL) { - error_report("Kernel doesn't support ioport resource " - "access, hiding this region."); - close(pci_dev->v_addrs[i].region->resource_fd); - cur_region->valid = 0; - continue; - } - - pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr; - pci_dev->v_addrs[i].r_size = cur_region->size; - pci_dev->v_addrs[i].e_size = 0; - - assigned_dev_ioport_setup(&pci_dev->dev, i, cur_region->size); - pci_register_bar((PCIDevice *) pci_dev, i, - PCI_BASE_ADDRESS_SPACE_IO, - &pci_dev->v_addrs[i].container); - } - } - - /* success */ -} - -static void get_real_id(const char *devpath, const char *idname, uint16_t *val, - Error **errp) -{ - FILE *f; - char name[128]; - long id; - - snprintf(name, sizeof(name), "%s%s", devpath, idname); - f = fopen(name, "r"); - if (f == NULL) { - error_setg_file_open(errp, errno, name); - return; - } - if (fscanf(f, "%li\n", &id) == 1) { - *val = id; - } else { - error_setg(errp, "Failed to parse contents of '%s'", name); - } - fclose(f); -} - -static void get_real_vendor_id(const char *devpath, uint16_t *val, - Error **errp) -{ - get_real_id(devpath, "vendor", val, errp); -} - -static void get_real_device_id(const char *devpath, uint16_t *val, - Error **errp) -{ - get_real_id(devpath, "device", val, errp); -} - -static void get_real_device(AssignedDevice *pci_dev, Error **errp) -{ - char dir[128], name[128]; - int fd, r = 0; - FILE *f; - uint64_t start, end, size, flags; - uint16_t id; - PCIRegion *rp; - PCIDevRegions *dev = &pci_dev->real_device; - Error *local_err = NULL; - - dev->region_number = 0; - - snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/", - pci_dev->host.domain, pci_dev->host.bus, - pci_dev->host.slot, pci_dev->host.function); - - snprintf(name, sizeof(name), "%sconfig", dir); - - if (pci_dev->configfd_name && *pci_dev->configfd_name) { - dev->config_fd = monitor_fd_param(cur_mon, pci_dev->configfd_name, - &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - } else { - dev->config_fd = open(name, O_RDWR); - - if (dev->config_fd == -1) { - error_setg_file_open(errp, errno, name); - return; - } - } -again: - r = read(dev->config_fd, pci_dev->dev.config, - pci_config_size(&pci_dev->dev)); - if (r < 0) { - if (errno == EINTR || errno == EAGAIN) { - goto again; - } - error_setg_errno(errp, errno, "read(\"%s\")", - (pci_dev->configfd_name && *pci_dev->configfd_name) ? - pci_dev->configfd_name : name); - return; - } - - /* Restore or clear multifunction, this is always controlled by qemu */ - if (pci_dev->dev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { - pci_dev->dev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; - } else { - pci_dev->dev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION; - } - - /* Clear host resource mapping info. If we choose not to register a - * BAR, such as might be the case with the option ROM, we can get - * confusing, unwritable, residual addresses from the host here. */ - memset(&pci_dev->dev.config[PCI_BASE_ADDRESS_0], 0, 24); - memset(&pci_dev->dev.config[PCI_ROM_ADDRESS], 0, 4); - - snprintf(name, sizeof(name), "%sresource", dir); - - f = fopen(name, "r"); - if (f == NULL) { - error_setg_file_open(errp, errno, name); - return; - } - - for (r = 0; r < PCI_ROM_SLOT; r++) { - if (fscanf(f, "%" SCNi64 " %" SCNi64 " %" SCNi64 "\n", - &start, &end, &flags) != 3) { - break; - } - - rp = dev->regions + r; - rp->valid = 0; - rp->resource_fd = -1; - size = end - start + 1; - flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH - | IORESOURCE_MEM_64; - if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0) { - continue; - } - if (flags & IORESOURCE_MEM) { - flags &= ~IORESOURCE_IO; - } else { - flags &= ~IORESOURCE_PREFETCH; - } - snprintf(name, sizeof(name), "%sresource%d", dir, r); - fd = open(name, O_RDWR); - if (fd == -1) { - continue; - } - rp->resource_fd = fd; - - rp->type = flags; - rp->valid = 1; - rp->base_addr = start; - rp->size = size; - pci_dev->v_addrs[r].region = rp; - DEBUG("region %d size %" PRIu64 " start 0x%" PRIx64 - " type %d resource_fd %d\n", - r, rp->size, start, rp->type, rp->resource_fd); - } - - fclose(f); - - /* read and fill vendor ID */ - get_real_vendor_id(dir, &id, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - pci_dev->dev.config[0] = id & 0xff; - pci_dev->dev.config[1] = (id & 0xff00) >> 8; - - /* read and fill device ID */ - get_real_device_id(dir, &id, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - pci_dev->dev.config[2] = id & 0xff; - pci_dev->dev.config[3] = (id & 0xff00) >> 8; - - pci_word_test_and_clear_mask(pci_dev->emulate_config_write + PCI_COMMAND, - PCI_COMMAND_MASTER | PCI_COMMAND_INTX_DISABLE); - - dev->region_number = r; -} - -static void free_msi_virqs(AssignedDevice *dev) -{ - int i; - - for (i = 0; i < dev->msi_virq_nr; i++) { - if (dev->msi_virq[i] >= 0) { - kvm_irqchip_release_virq(kvm_state, dev->msi_virq[i]); - dev->msi_virq[i] = -1; - } - } - g_free(dev->msi_virq); - dev->msi_virq = NULL; - dev->msi_virq_nr = 0; -} - -static void free_assigned_device(AssignedDevice *dev) -{ - int i; - - if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) { - assigned_dev_unregister_msix_mmio(dev); - } - for (i = 0; i < dev->real_device.region_number; i++) { - PCIRegion *pci_region = &dev->real_device.regions[i]; - AssignedDevRegion *region = &dev->v_addrs[i]; - - if (!pci_region->valid) { - continue; - } - if (pci_region->type & IORESOURCE_IO) { - if (region->u.r_baseport) { - memory_region_del_subregion(®ion->container, - ®ion->real_iomem); - } - } else if (pci_region->type & IORESOURCE_MEM) { - if (region->u.r_virtbase) { - memory_region_del_subregion(®ion->container, - ®ion->real_iomem); - - /* Remove MSI-X table subregion */ - if (pci_region->base_addr <= dev->msix_table_addr && - pci_region->base_addr + pci_region->size > - dev->msix_table_addr) { - memory_region_del_subregion(®ion->container, - &dev->mmio); - } - if (munmap(region->u.r_virtbase, - (pci_region->size + 0xFFF) & 0xFFFFF000)) { - error_report("Failed to unmap assigned device region: %s", - strerror(errno)); - } - } - } - if (pci_region->resource_fd >= 0) { - close(pci_region->resource_fd); - } - } - - if (dev->real_device.config_fd >= 0) { - close(dev->real_device.config_fd); - } - - free_msi_virqs(dev); -} - -/* This function tries to determine the cause of the PCI assignment failure. It - * always returns the cause as a dynamically allocated, human readable string. - * If the function fails to determine the cause for any internal reason, then - * the returned string will state that fact. - */ -static char *assign_failed_examine(const AssignedDevice *dev) -{ - char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns; - uint16_t vendor_id, device_id; - int r; - Error *local_err = NULL; - - snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/", - dev->host.domain, dev->host.bus, dev->host.slot, - dev->host.function); - - snprintf(name, sizeof(name), "%sdriver", dir); - - r = readlink(name, driver, sizeof(driver)); - if ((r <= 0) || r >= sizeof(driver)) { - goto fail; - } - - driver[r] = 0; - ns = strrchr(driver, '/'); - if (!ns) { - goto fail; - } - - ns++; - - if ((get_real_vendor_id(dir, &vendor_id, &local_err), local_err) || - (get_real_device_id(dir, &device_id, &local_err), local_err)) { - /* We're already analyzing an assignment error, so we suppress this - * one just like the others above. - */ - error_free(local_err); - goto fail; - } - - return g_strdup_printf( - "*** The driver '%s' is occupying your device %04x:%02x:%02x.%x.\n" - "***\n" - "*** You can try the following commands to free it:\n" - "***\n" - "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/new_id\n" - "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/%s/unbind\n" - "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/" - "pci-stub/bind\n" - "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/remove_id\n" - "***\n", - ns, dev->host.domain, dev->host.bus, dev->host.slot, - dev->host.function, vendor_id, device_id, - dev->host.domain, dev->host.bus, dev->host.slot, dev->host.function, - ns, dev->host.domain, dev->host.bus, dev->host.slot, - dev->host.function, vendor_id, device_id); - -fail: - return g_strdup("Couldn't find out why.\n"); -} - -static void assign_device(AssignedDevice *dev, Error **errp) -{ - uint32_t flags = KVM_DEV_ASSIGN_ENABLE_IOMMU; - int r; - - /* Only pass non-zero PCI segment to capable module */ - if (!kvm_check_extension(kvm_state, KVM_CAP_PCI_SEGMENT) && - dev->host.domain) { - error_setg(errp, "Can't assign device inside non-zero PCI segment " - "as this KVM module doesn't support it."); - return; - } - - if (!kvm_check_extension(kvm_state, KVM_CAP_IOMMU)) { - error_setg(errp, "No IOMMU found. Unable to assign device \"%s\"", - dev->dev.qdev.id); - return; - } - - if (dev->features & ASSIGNED_DEVICE_SHARE_INTX_MASK && - kvm_has_intx_set_mask()) { - flags |= KVM_DEV_ASSIGN_PCI_2_3; - } - - r = kvm_device_pci_assign(kvm_state, &dev->host, flags, &dev->dev_id); - if (r < 0) { - switch (r) { - case -EBUSY: { - char *cause; - - cause = assign_failed_examine(dev); - error_setg_errno(errp, -r, "Failed to assign device \"%s\"", - dev->dev.qdev.id); - error_append_hint(errp, "%s", cause); - g_free(cause); - break; - } - default: - error_setg_errno(errp, -r, "Failed to assign device \"%s\"", - dev->dev.qdev.id); - break; - } - } -} - -static void verify_irqchip_in_kernel(Error **errp) -{ - if (kvm_irqchip_in_kernel()) { - return; - } - error_setg(errp, "pci-assign requires KVM with in-kernel irqchip enabled"); -} - -static int assign_intx(AssignedDevice *dev, Error **errp) -{ - AssignedIRQType new_type; - PCIINTxRoute intx_route; - bool intx_host_msi; - int r; - Error *local_err = NULL; - - /* Interrupt PIN 0 means don't use INTx */ - if (assigned_dev_pci_read_byte(&dev->dev, PCI_INTERRUPT_PIN) == 0) { - pci_device_set_intx_routing_notifier(&dev->dev, NULL); - return 0; - } - - verify_irqchip_in_kernel(&local_err); - if (local_err) { - error_propagate(errp, local_err); - return -ENOTSUP; - } - - pci_device_set_intx_routing_notifier(&dev->dev, - assigned_dev_update_irq_routing); - - intx_route = pci_device_route_intx_to_irq(&dev->dev, dev->intpin); - assert(intx_route.mode != PCI_INTX_INVERTED); - - if (!pci_intx_route_changed(&dev->intx_route, &intx_route)) { - return 0; - } - - switch (dev->assigned_irq_type) { - case ASSIGNED_IRQ_INTX_HOST_INTX: - case ASSIGNED_IRQ_INTX_HOST_MSI: - intx_host_msi = dev->assigned_irq_type == ASSIGNED_IRQ_INTX_HOST_MSI; - r = kvm_device_intx_deassign(kvm_state, dev->dev_id, intx_host_msi); - break; - case ASSIGNED_IRQ_MSI: - r = kvm_device_msi_deassign(kvm_state, dev->dev_id); - break; - case ASSIGNED_IRQ_MSIX: - r = kvm_device_msix_deassign(kvm_state, dev->dev_id); - break; - default: - r = 0; - break; - } - if (r) { - perror("assign_intx: deassignment of previous interrupt failed"); - } - dev->assigned_irq_type = ASSIGNED_IRQ_NONE; - - if (intx_route.mode == PCI_INTX_DISABLED) { - dev->intx_route = intx_route; - return 0; - } - -retry: - if (dev->features & ASSIGNED_DEVICE_PREFER_MSI_MASK && - dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { - intx_host_msi = true; - new_type = ASSIGNED_IRQ_INTX_HOST_MSI; - } else { - intx_host_msi = false; - new_type = ASSIGNED_IRQ_INTX_HOST_INTX; - } - - r = kvm_device_intx_assign(kvm_state, dev->dev_id, intx_host_msi, - intx_route.irq); - if (r < 0) { - if (r == -EIO && !(dev->features & ASSIGNED_DEVICE_PREFER_MSI_MASK) && - dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { - /* Retry with host-side MSI. There might be an IRQ conflict and - * either the kernel or the device doesn't support sharing. */ - error_report("Host-side INTx sharing not supported, " - "using MSI instead"); - error_printf("Some devices do not work properly in this mode.\n"); - dev->features |= ASSIGNED_DEVICE_PREFER_MSI_MASK; - goto retry; - } - error_setg_errno(errp, -r, "Failed to assign irq for \"%s\"", - dev->dev.qdev.id); - error_append_hint(errp, "Perhaps you are assigning a device " - "that shares an IRQ with another device?\n"); - return r; - } - - dev->intx_route = intx_route; - dev->assigned_irq_type = new_type; - return r; -} - -static void deassign_device(AssignedDevice *dev) -{ - int r; - - r = kvm_device_pci_deassign(kvm_state, dev->dev_id); - assert(r == 0); -} - -/* The pci config space got updated. Check if irq numbers have changed - * for our devices - */ -static void assigned_dev_update_irq_routing(PCIDevice *dev) -{ - AssignedDevice *assigned_dev = PCI_ASSIGN(dev); - Error *err = NULL; - int r; - - r = assign_intx(assigned_dev, &err); - if (r < 0) { - error_report_err(err); - err = NULL; - qdev_unplug(&dev->qdev, &err); - assert(!err); - } -} - -static void assigned_dev_update_msi(PCIDevice *pci_dev) -{ - AssignedDevice *assigned_dev = PCI_ASSIGN(pci_dev); - uint8_t ctrl_byte = pci_get_byte(pci_dev->config + pci_dev->msi_cap + - PCI_MSI_FLAGS); - int r; - - /* Some guests gratuitously disable MSI even if they're not using it, - * try to catch this by only deassigning irqs if the guest is using - * MSI or intends to start. */ - if (assigned_dev->assigned_irq_type == ASSIGNED_IRQ_MSI || - (ctrl_byte & PCI_MSI_FLAGS_ENABLE)) { - r = kvm_device_msi_deassign(kvm_state, assigned_dev->dev_id); - /* -ENXIO means no assigned irq */ - if (r && r != -ENXIO) { - perror("assigned_dev_update_msi: deassign irq"); - } - - free_msi_virqs(assigned_dev); - - assigned_dev->assigned_irq_type = ASSIGNED_IRQ_NONE; - pci_device_set_intx_routing_notifier(pci_dev, NULL); - } - - if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) { - MSIMessage msg = msi_get_message(pci_dev, 0); - int virq; - - virq = kvm_irqchip_add_msi_route(kvm_state, msg, pci_dev); - if (virq < 0) { - perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route"); - return; - } - - assigned_dev->msi_virq = g_malloc(sizeof(*assigned_dev->msi_virq)); - assigned_dev->msi_virq_nr = 1; - assigned_dev->msi_virq[0] = virq; - if (kvm_device_msi_assign(kvm_state, assigned_dev->dev_id, virq) < 0) { - perror("assigned_dev_update_msi: kvm_device_msi_assign"); - } - - assigned_dev->intx_route.mode = PCI_INTX_DISABLED; - assigned_dev->intx_route.irq = -1; - assigned_dev->assigned_irq_type = ASSIGNED_IRQ_MSI; - } else { - Error *local_err = NULL; - - assign_intx(assigned_dev, &local_err); - if (local_err) { - error_report_err(local_err); - } - } -} - -static void assigned_dev_update_msi_msg(PCIDevice *pci_dev) -{ - AssignedDevice *assigned_dev = PCI_ASSIGN(pci_dev); - uint8_t ctrl_byte = pci_get_byte(pci_dev->config + pci_dev->msi_cap + - PCI_MSI_FLAGS); - - if (assigned_dev->assigned_irq_type != ASSIGNED_IRQ_MSI || - !(ctrl_byte & PCI_MSI_FLAGS_ENABLE)) { - return; - } - - kvm_irqchip_update_msi_route(kvm_state, assigned_dev->msi_virq[0], - msi_get_message(pci_dev, 0), pci_dev); -} - -static bool assigned_dev_msix_masked(MSIXTableEntry *entry) -{ - return (entry->ctrl & cpu_to_le32(0x1)) != 0; -} - -/* - * When MSI-X is first enabled the vector table typically has all the - * vectors masked, so we can't use that as the obvious test to figure out - * how many vectors to initially enable. Instead we look at the data field - * because this is what worked for pci-assign for a long time. This makes - * sure the physical MSI-X state tracks the guest's view, which is important - * for some VF/PF and PF/fw communication channels. - */ -static bool assigned_dev_msix_skipped(MSIXTableEntry *entry) -{ - return !entry->data; -} - -static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) -{ - AssignedDevice *adev = PCI_ASSIGN(pci_dev); - uint16_t entries_nr = 0; - int i, r = 0; - MSIXTableEntry *entry = adev->msix_table; - MSIMessage msg; - - /* Get the usable entry number for allocating */ - for (i = 0; i < adev->msix_max; i++, entry++) { - if (assigned_dev_msix_skipped(entry)) { - continue; - } - entries_nr++; - } - - DEBUG("MSI-X entries: %d\n", entries_nr); - - /* It's valid to enable MSI-X with all entries masked */ - if (!entries_nr) { - return 0; - } - - r = kvm_device_msix_init_vectors(kvm_state, adev->dev_id, entries_nr); - if (r != 0) { - error_report("fail to set MSI-X entry number for MSIX! %s", - strerror(-r)); - return r; - } - - free_msi_virqs(adev); - - adev->msi_virq_nr = adev->msix_max; - adev->msi_virq = g_malloc(adev->msix_max * sizeof(*adev->msi_virq)); - - entry = adev->msix_table; - for (i = 0; i < adev->msix_max; i++, entry++) { - adev->msi_virq[i] = -1; - - if (assigned_dev_msix_skipped(entry)) { - continue; - } - - msg.address = entry->addr_lo | ((uint64_t)entry->addr_hi << 32); - msg.data = entry->data; - r = kvm_irqchip_add_msi_route(kvm_state, msg, pci_dev); - if (r < 0) { - return r; - } - adev->msi_virq[i] = r; - - DEBUG("MSI-X vector %d, gsi %d, addr %08x_%08x, data %08x\n", i, - r, entry->addr_hi, entry->addr_lo, entry->data); - - r = kvm_device_msix_set_vector(kvm_state, adev->dev_id, i, - adev->msi_virq[i]); - if (r) { - error_report("fail to set MSI-X entry! %s", strerror(-r)); - break; - } - } - - return r; -} - -static void assigned_dev_update_msix(PCIDevice *pci_dev) -{ - AssignedDevice *assigned_dev = PCI_ASSIGN(pci_dev); - uint16_t ctrl_word = pci_get_word(pci_dev->config + pci_dev->msix_cap + - PCI_MSIX_FLAGS); - int r; - - /* Some guests gratuitously disable MSIX even if they're not using it, - * try to catch this by only deassigning irqs if the guest is using - * MSIX or intends to start. */ - if ((assigned_dev->assigned_irq_type == ASSIGNED_IRQ_MSIX) || - (ctrl_word & PCI_MSIX_FLAGS_ENABLE)) { - r = kvm_device_msix_deassign(kvm_state, assigned_dev->dev_id); - /* -ENXIO means no assigned irq */ - if (r && r != -ENXIO) { - perror("assigned_dev_update_msix: deassign irq"); - } - - free_msi_virqs(assigned_dev); - - assigned_dev->assigned_irq_type = ASSIGNED_IRQ_NONE; - pci_device_set_intx_routing_notifier(pci_dev, NULL); - } - - if (ctrl_word & PCI_MSIX_FLAGS_ENABLE) { - if (assigned_dev_update_msix_mmio(pci_dev) < 0) { - perror("assigned_dev_update_msix_mmio"); - return; - } - - if (assigned_dev->msi_virq_nr > 0) { - if (kvm_device_msix_assign(kvm_state, assigned_dev->dev_id) < 0) { - perror("assigned_dev_enable_msix: assign irq"); - return; - } - } - assigned_dev->intx_route.mode = PCI_INTX_DISABLED; - assigned_dev->intx_route.irq = -1; - assigned_dev->assigned_irq_type = ASSIGNED_IRQ_MSIX; - } else { - Error *local_err = NULL; - - assign_intx(assigned_dev, &local_err); - if (local_err) { - error_report_err(local_err); - } - } -} - -static uint32_t assigned_dev_pci_read_config(PCIDevice *pci_dev, - uint32_t address, int len) -{ - AssignedDevice *assigned_dev = PCI_ASSIGN(pci_dev); - uint32_t virt_val = pci_default_read_config(pci_dev, address, len); - uint32_t real_val, emulate_mask, full_emulation_mask; - - emulate_mask = 0; - memcpy(&emulate_mask, assigned_dev->emulate_config_read + address, len); - emulate_mask = le32_to_cpu(emulate_mask); - - full_emulation_mask = 0xffffffff >> (32 - len * 8); - - if (emulate_mask != full_emulation_mask) { - real_val = assigned_dev_pci_read(pci_dev, address, len); - return (virt_val & emulate_mask) | (real_val & ~emulate_mask); - } else { - return virt_val; - } -} - -static void assigned_dev_pci_write_config(PCIDevice *pci_dev, uint32_t address, - uint32_t val, int len) -{ - AssignedDevice *assigned_dev = PCI_ASSIGN(pci_dev); - uint16_t old_cmd = pci_get_word(pci_dev->config + PCI_COMMAND); - uint32_t emulate_mask, full_emulation_mask; - int ret; - - pci_default_write_config(pci_dev, address, val, len); - - if (kvm_has_intx_set_mask() && - range_covers_byte(address, len, PCI_COMMAND + 1)) { - bool intx_masked = (pci_get_word(pci_dev->config + PCI_COMMAND) & - PCI_COMMAND_INTX_DISABLE); - - if (intx_masked != !!(old_cmd & PCI_COMMAND_INTX_DISABLE)) { - ret = kvm_device_intx_set_mask(kvm_state, assigned_dev->dev_id, - intx_masked); - if (ret) { - perror("assigned_dev_pci_write_config: set intx mask"); - } - } - } - if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { - if (range_covers_byte(address, len, - pci_dev->msi_cap + PCI_MSI_FLAGS)) { - assigned_dev_update_msi(pci_dev); - } else if (ranges_overlap(address, len, /* 32bit MSI only */ - pci_dev->msi_cap + PCI_MSI_ADDRESS_LO, 6)) { - assigned_dev_update_msi_msg(pci_dev); - } - } - if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) { - if (range_covers_byte(address, len, - pci_dev->msix_cap + PCI_MSIX_FLAGS + 1)) { - assigned_dev_update_msix(pci_dev); - } - } - - emulate_mask = 0; - memcpy(&emulate_mask, assigned_dev->emulate_config_write + address, len); - emulate_mask = le32_to_cpu(emulate_mask); - - full_emulation_mask = 0xffffffff >> (32 - len * 8); - - if (emulate_mask != full_emulation_mask) { - if (emulate_mask) { - val &= ~emulate_mask; - val |= assigned_dev_pci_read(pci_dev, address, len) & emulate_mask; - } - assigned_dev_pci_write(pci_dev, address, val, len); - } -} - -static void assigned_dev_setup_cap_read(AssignedDevice *dev, uint32_t offset, - uint32_t len) -{ - assigned_dev_direct_config_read(dev, offset, len); - assigned_dev_emulate_config_read(dev, offset + PCI_CAP_LIST_NEXT, 1); -} - -static int assigned_device_pci_cap_init(PCIDevice *pci_dev, Error **errp) -{ - AssignedDevice *dev = PCI_ASSIGN(pci_dev); - PCIRegion *pci_region = dev->real_device.regions; - int ret, pos; - Error *local_err = NULL; - - /* Clear initial capabilities pointer and status copied from hw */ - pci_set_byte(pci_dev->config + PCI_CAPABILITY_LIST, 0); - pci_set_word(pci_dev->config + PCI_STATUS, - pci_get_word(pci_dev->config + PCI_STATUS) & - ~PCI_STATUS_CAP_LIST); - - /* Expose MSI capability - * MSI capability is the 1st capability in capability config */ - pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSI, 0); - if (pos != 0 && kvm_check_extension(kvm_state, KVM_CAP_ASSIGN_DEV_IRQ)) { - verify_irqchip_in_kernel(&local_err); - if (local_err) { - error_propagate(errp, local_err); - return -ENOTSUP; - } - dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI; - /* Only 32-bit/no-mask currently supported */ - ret = pci_add_capability2(pci_dev, PCI_CAP_ID_MSI, pos, 10, - &local_err); - if (ret < 0) { - error_propagate(errp, local_err); - return ret; - } - pci_dev->msi_cap = pos; - - pci_set_word(pci_dev->config + pos + PCI_MSI_FLAGS, - pci_get_word(pci_dev->config + pos + PCI_MSI_FLAGS) & - PCI_MSI_FLAGS_QMASK); - pci_set_long(pci_dev->config + pos + PCI_MSI_ADDRESS_LO, 0); - pci_set_word(pci_dev->config + pos + PCI_MSI_DATA_32, 0); - - /* Set writable fields */ - pci_set_word(pci_dev->wmask + pos + PCI_MSI_FLAGS, - PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE); - pci_set_long(pci_dev->wmask + pos + PCI_MSI_ADDRESS_LO, 0xfffffffc); - pci_set_word(pci_dev->wmask + pos + PCI_MSI_DATA_32, 0xffff); - } - /* Expose MSI-X capability */ - pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX, 0); - if (pos != 0 && kvm_device_msix_supported(kvm_state)) { - int bar_nr; - uint32_t msix_table_entry; - uint16_t msix_max; - - verify_irqchip_in_kernel(&local_err); - if (local_err) { - error_propagate(errp, local_err); - return -ENOTSUP; - } - dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX; - ret = pci_add_capability2(pci_dev, PCI_CAP_ID_MSIX, pos, 12, - &local_err); - if (ret < 0) { - error_propagate(errp, local_err); - return ret; - } - pci_dev->msix_cap = pos; - - msix_max = (pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS) & - PCI_MSIX_FLAGS_QSIZE) + 1; - msix_max = MIN(msix_max, KVM_MAX_MSIX_PER_DEV); - pci_set_word(pci_dev->config + pos + PCI_MSIX_FLAGS, msix_max - 1); - - /* Only enable and function mask bits are writable */ - pci_set_word(pci_dev->wmask + pos + PCI_MSIX_FLAGS, - PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL); - - msix_table_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_TABLE); - bar_nr = msix_table_entry & PCI_MSIX_FLAGS_BIRMASK; - msix_table_entry &= ~PCI_MSIX_FLAGS_BIRMASK; - dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry; - dev->msix_max = msix_max; - } - - /* Minimal PM support, nothing writable, device appears to NAK changes */ - pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PM, 0); - if (pos) { - uint16_t pmc; - - ret = pci_add_capability2(pci_dev, PCI_CAP_ID_PM, pos, PCI_PM_SIZEOF, - &local_err); - if (ret < 0) { - error_propagate(errp, local_err); - return ret; - } - - assigned_dev_setup_cap_read(dev, pos, PCI_PM_SIZEOF); - - pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS); - pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI); - pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc); - - /* assign_device will bring the device up to D0, so we don't need - * to worry about doing that ourselves here. */ - pci_set_word(pci_dev->config + pos + PCI_PM_CTRL, - PCI_PM_CTRL_NO_SOFT_RESET); - - pci_set_byte(pci_dev->config + pos + PCI_PM_PPB_EXTENSIONS, 0); - pci_set_byte(pci_dev->config + pos + PCI_PM_DATA_REGISTER, 0); - } - - pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_EXP, 0); - if (pos) { - uint8_t version, size = 0; - uint16_t type, devctl, lnksta; - uint32_t devcap, lnkcap; - - version = pci_get_byte(pci_dev->config + pos + PCI_EXP_FLAGS); - version &= PCI_EXP_FLAGS_VERS; - if (version == 1) { - size = 0x14; - } else if (version == 2) { - /* - * Check for non-std size, accept reduced size to 0x34, - * which is what bcm5761 implemented, violating the - * PCIe v3.0 spec that regs should exist and be read as 0, - * not optionally provided and shorten the struct size. - */ - size = MIN(0x3c, PCI_CONFIG_SPACE_SIZE - pos); - if (size < 0x34) { - error_setg(errp, "Invalid size PCIe cap-id 0x%x", - PCI_CAP_ID_EXP); - return -EINVAL; - } else if (size != 0x3c) { - error_report("WARNING, %s: PCIe cap-id 0x%x has " - "non-standard size 0x%x; std size should be 0x3c", - __func__, PCI_CAP_ID_EXP, size); - } - } else if (version == 0) { - uint16_t vid, did; - vid = pci_get_word(pci_dev->config + PCI_VENDOR_ID); - did = pci_get_word(pci_dev->config + PCI_DEVICE_ID); - if (vid == PCI_VENDOR_ID_INTEL && did == 0x10ed) { - /* - * quirk for Intel 82599 VF with invalid PCIe capability - * version, should really be version 2 (same as PF) - */ - size = 0x3c; - } - } - - if (size == 0) { - error_setg(errp, "Unsupported PCI express capability version %d", - version); - return -EINVAL; - } - - ret = pci_add_capability2(pci_dev, PCI_CAP_ID_EXP, pos, size, - &local_err); - if (ret < 0) { - error_propagate(errp, local_err); - return ret; - } - - assigned_dev_setup_cap_read(dev, pos, size); - - type = pci_get_word(pci_dev->config + pos + PCI_EXP_FLAGS); - type = (type & PCI_EXP_FLAGS_TYPE) >> 4; - if (type != PCI_EXP_TYPE_ENDPOINT && - type != PCI_EXP_TYPE_LEG_END && type != PCI_EXP_TYPE_RC_END) { - error_setg(errp, "Device assignment only supports endpoint " - "assignment, device type %d", type); - return -EINVAL; - } - - /* capabilities, pass existing read-only copy - * PCI_EXP_FLAGS_IRQ: updated by hardware, should be direct read */ - - /* device capabilities: hide FLR */ - devcap = pci_get_long(pci_dev->config + pos + PCI_EXP_DEVCAP); - devcap &= ~PCI_EXP_DEVCAP_FLR; - pci_set_long(pci_dev->config + pos + PCI_EXP_DEVCAP, devcap); - - /* device control: clear all error reporting enable bits, leaving - * only a few host values. Note, these are - * all writable, but not passed to hw. - */ - devctl = pci_get_word(pci_dev->config + pos + PCI_EXP_DEVCTL); - devctl = (devctl & (PCI_EXP_DEVCTL_READRQ | PCI_EXP_DEVCTL_PAYLOAD)) | - PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN; - pci_set_word(pci_dev->config + pos + PCI_EXP_DEVCTL, devctl); - devctl = PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_AUX_PME; - pci_set_word(pci_dev->wmask + pos + PCI_EXP_DEVCTL, ~devctl); - - /* Clear device status */ - pci_set_word(pci_dev->config + pos + PCI_EXP_DEVSTA, 0); - - /* Link capabilities, expose links and latencues, clear reporting */ - lnkcap = pci_get_long(pci_dev->config + pos + PCI_EXP_LNKCAP); - lnkcap &= (PCI_EXP_LNKCAP_SLS | PCI_EXP_LNKCAP_MLW | - PCI_EXP_LNKCAP_ASPMS | PCI_EXP_LNKCAP_L0SEL | - PCI_EXP_LNKCAP_L1EL); - pci_set_long(pci_dev->config + pos + PCI_EXP_LNKCAP, lnkcap); - - /* Link control, pass existing read-only copy. Should be writable? */ - - /* Link status, only expose current speed and width */ - lnksta = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKSTA); - lnksta &= (PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW); - pci_set_word(pci_dev->config + pos + PCI_EXP_LNKSTA, lnksta); - - if (version >= 2) { - /* Slot capabilities, control, status - not needed for endpoints */ - pci_set_long(pci_dev->config + pos + PCI_EXP_SLTCAP, 0); - pci_set_word(pci_dev->config + pos + PCI_EXP_SLTCTL, 0); - pci_set_word(pci_dev->config + pos + PCI_EXP_SLTSTA, 0); - - /* Root control, capabilities, status - not needed for endpoints */ - pci_set_word(pci_dev->config + pos + PCI_EXP_RTCTL, 0); - pci_set_word(pci_dev->config + pos + PCI_EXP_RTCAP, 0); - pci_set_long(pci_dev->config + pos + PCI_EXP_RTSTA, 0); - - /* Device capabilities/control 2, pass existing read-only copy */ - /* Link control 2, pass existing read-only copy */ - } - } - - pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PCIX, 0); - if (pos) { - uint16_t cmd; - uint32_t status; - - /* Only expose the minimum, 8 byte capability */ - ret = pci_add_capability2(pci_dev, PCI_CAP_ID_PCIX, pos, 8, - &local_err); - if (ret < 0) { - error_propagate(errp, local_err); - return ret; - } - - assigned_dev_setup_cap_read(dev, pos, 8); - - /* Command register, clear upper bits, including extended modes */ - cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD); - cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ | - PCI_X_CMD_MAX_SPLIT); - pci_set_word(pci_dev->config + pos + PCI_X_CMD, cmd); - - /* Status register, update with emulated PCI bus location, clear - * error bits, leave the rest. */ - status = pci_get_long(pci_dev->config + pos + PCI_X_STATUS); - status &= ~(PCI_X_STATUS_BUS | PCI_X_STATUS_DEVFN); - status |= pci_requester_id(pci_dev); - status &= ~(PCI_X_STATUS_SPL_DISC | PCI_X_STATUS_UNX_SPL | - PCI_X_STATUS_SPL_ERR); - pci_set_long(pci_dev->config + pos + PCI_X_STATUS, status); - } - - pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VPD, 0); - if (pos) { - /* Direct R/W passthrough */ - ret = pci_add_capability2(pci_dev, PCI_CAP_ID_VPD, pos, 8, - &local_err); - if (ret < 0) { - error_propagate(errp, local_err); - return ret; - } - - assigned_dev_setup_cap_read(dev, pos, 8); - - /* direct write for cap content */ - assigned_dev_direct_config_write(dev, pos + 2, 6); - } - - /* Devices can have multiple vendor capabilities, get them all */ - for (pos = 0; (pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR, pos)); - pos += PCI_CAP_LIST_NEXT) { - uint8_t len = pci_get_byte(pci_dev->config + pos + PCI_CAP_FLAGS); - /* Direct R/W passthrough */ - ret = pci_add_capability2(pci_dev, PCI_CAP_ID_VNDR, pos, len, - &local_err); - if (ret < 0) { - error_propagate(errp, local_err); - return ret; - } - - assigned_dev_setup_cap_read(dev, pos, len); - - /* direct write for cap content */ - assigned_dev_direct_config_write(dev, pos + 2, len - 2); - } - - /* If real and virtual capability list status bits differ, virtualize the - * access. */ - if ((pci_get_word(pci_dev->config + PCI_STATUS) & PCI_STATUS_CAP_LIST) != - (assigned_dev_pci_read_byte(pci_dev, PCI_STATUS) & - PCI_STATUS_CAP_LIST)) { - dev->emulate_config_read[PCI_STATUS] |= PCI_STATUS_CAP_LIST; - } - - return 0; -} - -static uint64_t -assigned_dev_msix_mmio_read(void *opaque, hwaddr addr, - unsigned size) -{ - AssignedDevice *adev = opaque; - uint64_t val; - - memcpy(&val, (void *)((uint8_t *)adev->msix_table + addr), size); - - return val; -} - -static void assigned_dev_msix_mmio_write(void *opaque, hwaddr addr, - uint64_t val, unsigned size) -{ - AssignedDevice *adev = opaque; - PCIDevice *pdev = &adev->dev; - uint16_t ctrl; - MSIXTableEntry orig; - int i = addr >> 4; - - if (i >= adev->msix_max) { - return; /* Drop write */ - } - - ctrl = pci_get_word(pdev->config + pdev->msix_cap + PCI_MSIX_FLAGS); - - DEBUG("write to MSI-X table offset 0x%lx, val 0x%lx\n", addr, val); - - if (ctrl & PCI_MSIX_FLAGS_ENABLE) { - orig = adev->msix_table[i]; - } - - memcpy((uint8_t *)adev->msix_table + addr, &val, size); - - if (ctrl & PCI_MSIX_FLAGS_ENABLE) { - MSIXTableEntry *entry = &adev->msix_table[i]; - - if (!assigned_dev_msix_masked(&orig) && - assigned_dev_msix_masked(entry)) { - /* - * Vector masked, disable it - * - * XXX It's not clear if we can or should actually attempt - * to mask or disable the interrupt. KVM doesn't have - * support for pending bits and kvm_assign_set_msix_entry - * doesn't modify the device hardware mask. Interrupts - * while masked are simply not injected to the guest, so - * are lost. Can we get away with always injecting an - * interrupt on unmask? - */ - } else if (assigned_dev_msix_masked(&orig) && - !assigned_dev_msix_masked(entry)) { - /* Vector unmasked */ - if (i >= adev->msi_virq_nr || adev->msi_virq[i] < 0) { - /* Previously unassigned vector, start from scratch */ - assigned_dev_update_msix(pdev); - return; - } else { - /* Update an existing, previously masked vector */ - MSIMessage msg; - int ret; - - msg.address = entry->addr_lo | - ((uint64_t)entry->addr_hi << 32); - msg.data = entry->data; - - ret = kvm_irqchip_update_msi_route(kvm_state, - adev->msi_virq[i], msg, - pdev); - if (ret) { - error_report("Error updating irq routing entry (%d)", ret); - } - } - } - } -} - -static const MemoryRegionOps assigned_dev_msix_mmio_ops = { - .read = assigned_dev_msix_mmio_read, - .write = assigned_dev_msix_mmio_write, - .endianness = DEVICE_NATIVE_ENDIAN, - .valid = { - .min_access_size = 4, - .max_access_size = 8, - }, - .impl = { - .min_access_size = 4, - .max_access_size = 8, - }, -}; - -static void assigned_dev_msix_reset(AssignedDevice *dev) -{ - MSIXTableEntry *entry; - int i; - - if (!dev->msix_table) { - return; - } - - memset(dev->msix_table, 0, MSIX_PAGE_SIZE); - - for (i = 0, entry = dev->msix_table; i < dev->msix_max; i++, entry++) { - entry->ctrl = cpu_to_le32(0x1); /* Masked */ - } -} - -static void assigned_dev_register_msix_mmio(AssignedDevice *dev, Error **errp) -{ - dev->msix_table = mmap(NULL, MSIX_PAGE_SIZE, PROT_READ|PROT_WRITE, - MAP_ANONYMOUS|MAP_PRIVATE, 0, 0); - if (dev->msix_table == MAP_FAILED) { - error_setg_errno(errp, errno, "failed to allocate msix_table"); - dev->msix_table = NULL; - return; - } - - assigned_dev_msix_reset(dev); - - memory_region_init_io(&dev->mmio, OBJECT(dev), &assigned_dev_msix_mmio_ops, - dev, "assigned-dev-msix", MSIX_PAGE_SIZE); -} - -static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) -{ - if (!dev->msix_table) { - return; - } - - if (munmap(dev->msix_table, MSIX_PAGE_SIZE) == -1) { - error_report("error unmapping msix_table! %s", strerror(errno)); - } - dev->msix_table = NULL; -} - -static const VMStateDescription vmstate_assigned_device = { - .name = "pci-assign", - .unmigratable = 1, -}; - -static void reset_assigned_device(DeviceState *dev) -{ - PCIDevice *pci_dev = PCI_DEVICE(dev); - AssignedDevice *adev = PCI_ASSIGN(pci_dev); - char reset_file[64]; - const char reset[] = "1"; - int fd, ret; - - /* - * If a guest is reset without being shutdown, MSI/MSI-X can still - * be running. We want to return the device to a known state on - * reset, so disable those here. We especially do not want MSI-X - * enabled since it lives in MMIO space, which is about to get - * disabled. - */ - if (adev->assigned_irq_type == ASSIGNED_IRQ_MSIX) { - uint16_t ctrl = pci_get_word(pci_dev->config + - pci_dev->msix_cap + PCI_MSIX_FLAGS); - - pci_set_word(pci_dev->config + pci_dev->msix_cap + PCI_MSIX_FLAGS, - ctrl & ~PCI_MSIX_FLAGS_ENABLE); - assigned_dev_update_msix(pci_dev); - } else if (adev->assigned_irq_type == ASSIGNED_IRQ_MSI) { - uint8_t ctrl = pci_get_byte(pci_dev->config + - pci_dev->msi_cap + PCI_MSI_FLAGS); - - pci_set_byte(pci_dev->config + pci_dev->msi_cap + PCI_MSI_FLAGS, - ctrl & ~PCI_MSI_FLAGS_ENABLE); - assigned_dev_update_msi(pci_dev); - } - - snprintf(reset_file, sizeof(reset_file), - "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/reset", - adev->host.domain, adev->host.bus, adev->host.slot, - adev->host.function); - - /* - * Issue a device reset via pci-sysfs. Note that we use write(2) here - * and ignore the return value because some kernels have a bug that - * returns 0 rather than bytes written on success, sending us into an - * infinite retry loop using other write mechanisms. - */ - fd = open(reset_file, O_WRONLY); - if (fd != -1) { - ret = write(fd, reset, strlen(reset)); - (void)ret; - close(fd); - } - - /* - * When a 0 is written to the bus master register, the device is logically - * disconnected from the PCI bus. This avoids further DMA transfers. - */ - assigned_dev_pci_write_config(pci_dev, PCI_COMMAND, 0, 1); -} - -static void assigned_realize(struct PCIDevice *pci_dev, Error **errp) -{ - AssignedDevice *dev = PCI_ASSIGN(pci_dev); - uint8_t e_intx; - int r; - Error *local_err = NULL; - - if (!kvm_enabled()) { - error_setg(&local_err, "pci-assign requires KVM support"); - goto exit_with_error; - } - - if (!dev->host.domain && !dev->host.bus && !dev->host.slot && - !dev->host.function) { - error_setg(&local_err, "no host device specified"); - goto exit_with_error; - } - - /* - * Set up basic config space access control. Will be further refined during - * device initialization. - */ - assigned_dev_emulate_config_read(dev, 0, PCI_CONFIG_SPACE_SIZE); - assigned_dev_direct_config_read(dev, PCI_STATUS, 2); - assigned_dev_direct_config_read(dev, PCI_REVISION_ID, 1); - assigned_dev_direct_config_read(dev, PCI_CLASS_PROG, 3); - assigned_dev_direct_config_read(dev, PCI_CACHE_LINE_SIZE, 1); - assigned_dev_direct_config_read(dev, PCI_LATENCY_TIMER, 1); - assigned_dev_direct_config_read(dev, PCI_BIST, 1); - assigned_dev_direct_config_read(dev, PCI_CARDBUS_CIS, 4); - assigned_dev_direct_config_read(dev, PCI_SUBSYSTEM_VENDOR_ID, 2); - assigned_dev_direct_config_read(dev, PCI_SUBSYSTEM_ID, 2); - assigned_dev_direct_config_read(dev, PCI_CAPABILITY_LIST + 1, 7); - assigned_dev_direct_config_read(dev, PCI_MIN_GNT, 1); - assigned_dev_direct_config_read(dev, PCI_MAX_LAT, 1); - memcpy(dev->emulate_config_write, dev->emulate_config_read, - sizeof(dev->emulate_config_read)); - - get_real_device(dev, &local_err); - if (local_err) { - goto out; - } - - if (assigned_device_pci_cap_init(pci_dev, &local_err) < 0) { - goto out; - } - - /* intercept MSI-X entry page in the MMIO */ - if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) { - assigned_dev_register_msix_mmio(dev, &local_err); - if (local_err) { - goto out; - } - } - - /* handle real device's MMIO/PIO BARs */ - assigned_dev_register_regions(dev->real_device.regions, - dev->real_device.region_number, dev, - &local_err); - if (local_err) { - goto out; - } - - /* handle interrupt routing */ - e_intx = dev->dev.config[PCI_INTERRUPT_PIN] - 1; - dev->intpin = e_intx; - dev->intx_route.mode = PCI_INTX_DISABLED; - dev->intx_route.irq = -1; - - /* assign device to guest */ - assign_device(dev, &local_err); - if (local_err) { - goto out; - } - - /* assign legacy INTx to the device */ - r = assign_intx(dev, &local_err); - if (r < 0) { - goto assigned_out; - } - - assigned_dev_load_option_rom(dev); - - return; - -assigned_out: - deassign_device(dev); - -out: - free_assigned_device(dev); - -exit_with_error: - assert(local_err); - error_propagate(errp, local_err); -} - -static void assigned_exitfn(struct PCIDevice *pci_dev) -{ - AssignedDevice *dev = PCI_ASSIGN(pci_dev); - - deassign_device(dev); - free_assigned_device(dev); -} - -static void assigned_dev_instance_init(Object *obj) -{ - PCIDevice *pci_dev = PCI_DEVICE(obj); - AssignedDevice *d = PCI_ASSIGN(pci_dev); - - device_add_bootindex_property(obj, &d->bootindex, - "bootindex", NULL, - &pci_dev->qdev, NULL); -} - -static Property assigned_dev_properties[] = { - DEFINE_PROP_PCI_HOST_DEVADDR("host", AssignedDevice, host), - DEFINE_PROP_BIT("prefer_msi", AssignedDevice, features, - ASSIGNED_DEVICE_PREFER_MSI_BIT, false), - DEFINE_PROP_BIT("share_intx", AssignedDevice, features, - ASSIGNED_DEVICE_SHARE_INTX_BIT, true), - DEFINE_PROP_STRING("configfd", AssignedDevice, configfd_name), - DEFINE_PROP_END_OF_LIST(), -}; - -static void assign_class_init(ObjectClass *klass, void *data) -{ - PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - DeviceClass *dc = DEVICE_CLASS(klass); - - k->realize = assigned_realize; - k->exit = assigned_exitfn; - k->config_read = assigned_dev_pci_read_config; - k->config_write = assigned_dev_pci_write_config; - dc->props = assigned_dev_properties; - dc->vmsd = &vmstate_assigned_device; - dc->reset = reset_assigned_device; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); - dc->desc = "KVM-based PCI passthrough"; -} - -static const TypeInfo assign_info = { - .name = TYPE_PCI_ASSIGN, - .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(AssignedDevice), - .class_init = assign_class_init, - .instance_init = assigned_dev_instance_init, -}; - -static void assign_register_types(void) -{ - type_register_static(&assign_info); -} - -type_init(assign_register_types) - -static void assigned_dev_load_option_rom(AssignedDevice *dev) -{ - int size = 0; - - pci_assign_dev_load_option_rom(&dev->dev, OBJECT(dev), &size, - dev->host.domain, dev->host.bus, - dev->host.slot, dev->host.function); - - if (!size) { - error_report("pci-assign: Invalid ROM."); - } -} diff --git a/qemu/hw/i386/kvmvapic.c b/qemu/hw/i386/kvmvapic.c deleted file mode 100644 index c69f37404..000000000 --- a/qemu/hw/i386/kvmvapic.c +++ /dev/null @@ -1,866 +0,0 @@ -/* - * TPR optimization for 32-bit Windows guests (XP and Server 2003) - * - * Copyright (C) 2007-2008 Qumranet Technologies - * Copyright (C) 2012 Jan Kiszka, Siemens AG - * - * This work is licensed under the terms of the GNU GPL version 2, or - * (at your option) any later version. See the COPYING file in the - * top-level directory. - */ -#include "qemu/osdep.h" -#include "sysemu/sysemu.h" -#include "sysemu/cpus.h" -#include "sysemu/kvm.h" -#include "hw/i386/apic_internal.h" -#include "hw/sysbus.h" - -#define VAPIC_IO_PORT 0x7e - -#define VAPIC_CPU_SHIFT 7 - -#define ROM_BLOCK_SIZE 512 -#define ROM_BLOCK_MASK (~(ROM_BLOCK_SIZE - 1)) - -typedef enum VAPICMode { - VAPIC_INACTIVE = 0, - VAPIC_ACTIVE = 1, - VAPIC_STANDBY = 2, -} VAPICMode; - -typedef struct VAPICHandlers { - uint32_t set_tpr; - uint32_t set_tpr_eax; - uint32_t get_tpr[8]; - uint32_t get_tpr_stack; -} QEMU_PACKED VAPICHandlers; - -typedef struct GuestROMState { - char signature[8]; - uint32_t vaddr; - uint32_t fixup_start; - uint32_t fixup_end; - uint32_t vapic_vaddr; - uint32_t vapic_size; - uint32_t vcpu_shift; - uint32_t real_tpr_addr; - VAPICHandlers up; - VAPICHandlers mp; -} QEMU_PACKED GuestROMState; - -typedef struct VAPICROMState { - SysBusDevice busdev; - MemoryRegion io; - MemoryRegion rom; - uint32_t state; - uint32_t rom_state_paddr; - uint32_t rom_state_vaddr; - uint32_t vapic_paddr; - uint32_t real_tpr_addr; - GuestROMState rom_state; - size_t rom_size; - bool rom_mapped_writable; - VMChangeStateEntry *vmsentry; -} VAPICROMState; - -#define TYPE_VAPIC "kvmvapic" -#define VAPIC(obj) OBJECT_CHECK(VAPICROMState, (obj), TYPE_VAPIC) - -#define TPR_INSTR_ABS_MODRM 0x1 -#define TPR_INSTR_MATCH_MODRM_REG 0x2 - -typedef struct TPRInstruction { - uint8_t opcode; - uint8_t modrm_reg; - unsigned int flags; - TPRAccess access; - size_t length; - off_t addr_offset; -} TPRInstruction; - -/* must be sorted by length, shortest first */ -static const TPRInstruction tpr_instr[] = { - { /* mov abs to eax */ - .opcode = 0xa1, - .access = TPR_ACCESS_READ, - .length = 5, - .addr_offset = 1, - }, - { /* mov eax to abs */ - .opcode = 0xa3, - .access = TPR_ACCESS_WRITE, - .length = 5, - .addr_offset = 1, - }, - { /* mov r32 to r/m32 */ - .opcode = 0x89, - .flags = TPR_INSTR_ABS_MODRM, - .access = TPR_ACCESS_WRITE, - .length = 6, - .addr_offset = 2, - }, - { /* mov r/m32 to r32 */ - .opcode = 0x8b, - .flags = TPR_INSTR_ABS_MODRM, - .access = TPR_ACCESS_READ, - .length = 6, - .addr_offset = 2, - }, - { /* push r/m32 */ - .opcode = 0xff, - .modrm_reg = 6, - .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG, - .access = TPR_ACCESS_READ, - .length = 6, - .addr_offset = 2, - }, - { /* mov imm32, r/m32 (c7/0) */ - .opcode = 0xc7, - .modrm_reg = 0, - .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG, - .access = TPR_ACCESS_WRITE, - .length = 10, - .addr_offset = 2, - }, -}; - -static void read_guest_rom_state(VAPICROMState *s) -{ - cpu_physical_memory_read(s->rom_state_paddr, &s->rom_state, - sizeof(GuestROMState)); -} - -static void write_guest_rom_state(VAPICROMState *s) -{ - cpu_physical_memory_write(s->rom_state_paddr, &s->rom_state, - sizeof(GuestROMState)); -} - -static void update_guest_rom_state(VAPICROMState *s) -{ - read_guest_rom_state(s); - - s->rom_state.real_tpr_addr = cpu_to_le32(s->real_tpr_addr); - s->rom_state.vcpu_shift = cpu_to_le32(VAPIC_CPU_SHIFT); - - write_guest_rom_state(s); -} - -static int find_real_tpr_addr(VAPICROMState *s, CPUX86State *env) -{ - CPUState *cs = CPU(x86_env_get_cpu(env)); - hwaddr paddr; - target_ulong addr; - - if (s->state == VAPIC_ACTIVE) { - return 0; - } - /* - * If there is no prior TPR access instruction we could analyze (which is - * the case after resume from hibernation), we need to scan the possible - * virtual address space for the APIC mapping. - */ - for (addr = 0xfffff000; addr >= 0x80000000; addr -= TARGET_PAGE_SIZE) { - paddr = cpu_get_phys_page_debug(cs, addr); - if (paddr != APIC_DEFAULT_ADDRESS) { - continue; - } - s->real_tpr_addr = addr + 0x80; - update_guest_rom_state(s); - return 0; - } - return -1; -} - -static uint8_t modrm_reg(uint8_t modrm) -{ - return (modrm >> 3) & 7; -} - -static bool is_abs_modrm(uint8_t modrm) -{ - return (modrm & 0xc7) == 0x05; -} - -static bool opcode_matches(uint8_t *opcode, const TPRInstruction *instr) -{ - return opcode[0] == instr->opcode && - (!(instr->flags & TPR_INSTR_ABS_MODRM) || is_abs_modrm(opcode[1])) && - (!(instr->flags & TPR_INSTR_MATCH_MODRM_REG) || - modrm_reg(opcode[1]) == instr->modrm_reg); -} - -static int evaluate_tpr_instruction(VAPICROMState *s, X86CPU *cpu, - target_ulong *pip, TPRAccess access) -{ - CPUState *cs = CPU(cpu); - const TPRInstruction *instr; - target_ulong ip = *pip; - uint8_t opcode[2]; - uint32_t real_tpr_addr; - int i; - - if ((ip & 0xf0000000ULL) != 0x80000000ULL && - (ip & 0xf0000000ULL) != 0xe0000000ULL) { - return -1; - } - - /* - * Early Windows 2003 SMP initialization contains a - * - * mov imm32, r/m32 - * - * instruction that is patched by TPR optimization. The problem is that - * RSP, used by the patched instruction, is zero, so the guest gets a - * double fault and dies. - */ - if (cpu->env.regs[R_ESP] == 0) { - return -1; - } - - if (kvm_enabled() && !kvm_irqchip_in_kernel()) { - /* - * KVM without kernel-based TPR access reporting will pass an IP that - * points after the accessing instruction. So we need to look backward - * to find the reason. - */ - for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) { - instr = &tpr_instr[i]; - if (instr->access != access) { - continue; - } - if (cpu_memory_rw_debug(cs, ip - instr->length, opcode, - sizeof(opcode), 0) < 0) { - return -1; - } - if (opcode_matches(opcode, instr)) { - ip -= instr->length; - goto instruction_ok; - } - } - return -1; - } else { - if (cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0) < 0) { - return -1; - } - for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) { - instr = &tpr_instr[i]; - if (opcode_matches(opcode, instr)) { - goto instruction_ok; - } - } - return -1; - } - -instruction_ok: - /* - * Grab the virtual TPR address from the instruction - * and update the cached values. - */ - if (cpu_memory_rw_debug(cs, ip + instr->addr_offset, - (void *)&real_tpr_addr, - sizeof(real_tpr_addr), 0) < 0) { - return -1; - } - real_tpr_addr = le32_to_cpu(real_tpr_addr); - if ((real_tpr_addr & 0xfff) != 0x80) { - return -1; - } - s->real_tpr_addr = real_tpr_addr; - update_guest_rom_state(s); - - *pip = ip; - return 0; -} - -static int update_rom_mapping(VAPICROMState *s, CPUX86State *env, target_ulong ip) -{ - CPUState *cs = CPU(x86_env_get_cpu(env)); - hwaddr paddr; - uint32_t rom_state_vaddr; - uint32_t pos, patch, offset; - - /* nothing to do if already activated */ - if (s->state == VAPIC_ACTIVE) { - return 0; - } - - /* bail out if ROM init code was not executed (missing ROM?) */ - if (s->state == VAPIC_INACTIVE) { - return -1; - } - - /* find out virtual address of the ROM */ - rom_state_vaddr = s->rom_state_paddr + (ip & 0xf0000000); - paddr = cpu_get_phys_page_debug(cs, rom_state_vaddr); - if (paddr == -1) { - return -1; - } - paddr += rom_state_vaddr & ~TARGET_PAGE_MASK; - if (paddr != s->rom_state_paddr) { - return -1; - } - read_guest_rom_state(s); - if (memcmp(s->rom_state.signature, "kvm aPiC", 8) != 0) { - return -1; - } - s->rom_state_vaddr = rom_state_vaddr; - - /* fixup addresses in ROM if needed */ - if (rom_state_vaddr == le32_to_cpu(s->rom_state.vaddr)) { - return 0; - } - for (pos = le32_to_cpu(s->rom_state.fixup_start); - pos < le32_to_cpu(s->rom_state.fixup_end); - pos += 4) { - cpu_physical_memory_read(paddr + pos - s->rom_state.vaddr, - &offset, sizeof(offset)); - offset = le32_to_cpu(offset); - cpu_physical_memory_read(paddr + offset, &patch, sizeof(patch)); - patch = le32_to_cpu(patch); - patch += rom_state_vaddr - le32_to_cpu(s->rom_state.vaddr); - patch = cpu_to_le32(patch); - cpu_physical_memory_write(paddr + offset, &patch, sizeof(patch)); - } - read_guest_rom_state(s); - s->vapic_paddr = paddr + le32_to_cpu(s->rom_state.vapic_vaddr) - - le32_to_cpu(s->rom_state.vaddr); - - return 0; -} - -/* - * Tries to read the unique processor number from the Kernel Processor Control - * Region (KPCR) of 32-bit Windows XP and Server 2003. Returns -1 if the KPCR - * cannot be accessed or is considered invalid. This also ensures that we are - * not patching the wrong guest. - */ -static int get_kpcr_number(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - struct kpcr { - uint8_t fill1[0x1c]; - uint32_t self; - uint8_t fill2[0x31]; - uint8_t number; - } QEMU_PACKED kpcr; - - if (cpu_memory_rw_debug(CPU(cpu), env->segs[R_FS].base, - (void *)&kpcr, sizeof(kpcr), 0) < 0 || - kpcr.self != env->segs[R_FS].base) { - return -1; - } - return kpcr.number; -} - -static int vapic_enable(VAPICROMState *s, X86CPU *cpu) -{ - int cpu_number = get_kpcr_number(cpu); - hwaddr vapic_paddr; - static const uint8_t enabled = 1; - - if (cpu_number < 0) { - return -1; - } - vapic_paddr = s->vapic_paddr + - (((hwaddr)cpu_number) << VAPIC_CPU_SHIFT); - cpu_physical_memory_write(vapic_paddr + offsetof(VAPICState, enabled), - &enabled, sizeof(enabled)); - apic_enable_vapic(cpu->apic_state, vapic_paddr); - - s->state = VAPIC_ACTIVE; - - return 0; -} - -static void patch_byte(X86CPU *cpu, target_ulong addr, uint8_t byte) -{ - cpu_memory_rw_debug(CPU(cpu), addr, &byte, 1, 1); -} - -static void patch_call(VAPICROMState *s, X86CPU *cpu, target_ulong ip, - uint32_t target) -{ - uint32_t offset; - - offset = cpu_to_le32(target - ip - 5); - patch_byte(cpu, ip, 0xe8); /* call near */ - cpu_memory_rw_debug(CPU(cpu), ip + 1, (void *)&offset, sizeof(offset), 1); -} - -static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip) -{ - CPUState *cs = CPU(cpu); - CPUX86State *env = &cpu->env; - VAPICHandlers *handlers; - uint8_t opcode[2]; - uint32_t imm32; - target_ulong current_pc = 0; - target_ulong current_cs_base = 0; - int current_flags = 0; - - if (smp_cpus == 1) { - handlers = &s->rom_state.up; - } else { - handlers = &s->rom_state.mp; - } - - if (!kvm_enabled()) { - cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base, - ¤t_flags); - } - - pause_all_vcpus(); - - cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0); - - switch (opcode[0]) { - case 0x89: /* mov r32 to r/m32 */ - patch_byte(cpu, ip, 0x50 + modrm_reg(opcode[1])); /* push reg */ - patch_call(s, cpu, ip + 1, handlers->set_tpr); - break; - case 0x8b: /* mov r/m32 to r32 */ - patch_byte(cpu, ip, 0x90); - patch_call(s, cpu, ip + 1, handlers->get_tpr[modrm_reg(opcode[1])]); - break; - case 0xa1: /* mov abs to eax */ - patch_call(s, cpu, ip, handlers->get_tpr[0]); - break; - case 0xa3: /* mov eax to abs */ - patch_call(s, cpu, ip, handlers->set_tpr_eax); - break; - case 0xc7: /* mov imm32, r/m32 (c7/0) */ - patch_byte(cpu, ip, 0x68); /* push imm32 */ - cpu_memory_rw_debug(cs, ip + 6, (void *)&imm32, sizeof(imm32), 0); - cpu_memory_rw_debug(cs, ip + 1, (void *)&imm32, sizeof(imm32), 1); - patch_call(s, cpu, ip + 5, handlers->set_tpr); - break; - case 0xff: /* push r/m32 */ - patch_byte(cpu, ip, 0x50); /* push eax */ - patch_call(s, cpu, ip + 1, handlers->get_tpr_stack); - break; - default: - abort(); - } - - resume_all_vcpus(); - - if (!kvm_enabled()) { - cs->current_tb = NULL; - tb_gen_code(cs, current_pc, current_cs_base, current_flags, 1); - cpu_resume_from_signal(cs, NULL); - } -} - -void vapic_report_tpr_access(DeviceState *dev, CPUState *cs, target_ulong ip, - TPRAccess access) -{ - VAPICROMState *s = VAPIC(dev); - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - - cpu_synchronize_state(cs); - - if (evaluate_tpr_instruction(s, cpu, &ip, access) < 0) { - if (s->state == VAPIC_ACTIVE) { - vapic_enable(s, cpu); - } - return; - } - if (update_rom_mapping(s, env, ip) < 0) { - return; - } - if (vapic_enable(s, cpu) < 0) { - return; - } - patch_instruction(s, cpu, ip); -} - -typedef struct VAPICEnableTPRReporting { - DeviceState *apic; - bool enable; -} VAPICEnableTPRReporting; - -static void vapic_do_enable_tpr_reporting(void *data) -{ - VAPICEnableTPRReporting *info = data; - - apic_enable_tpr_access_reporting(info->apic, info->enable); -} - -static void vapic_enable_tpr_reporting(bool enable) -{ - VAPICEnableTPRReporting info = { - .enable = enable, - }; - CPUState *cs; - X86CPU *cpu; - - CPU_FOREACH(cs) { - cpu = X86_CPU(cs); - info.apic = cpu->apic_state; - run_on_cpu(cs, vapic_do_enable_tpr_reporting, &info); - } -} - -static void vapic_reset(DeviceState *dev) -{ - VAPICROMState *s = VAPIC(dev); - - s->state = VAPIC_INACTIVE; - s->rom_state_paddr = 0; - vapic_enable_tpr_reporting(false); -} - -/* - * Set the IRQ polling hypercalls to the supported variant: - * - vmcall if using KVM in-kernel irqchip - * - 32-bit VAPIC port write otherwise - */ -static int patch_hypercalls(VAPICROMState *s) -{ - hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK; - static const uint8_t vmcall_pattern[] = { /* vmcall */ - 0xb8, 0x1, 0, 0, 0, 0xf, 0x1, 0xc1 - }; - static const uint8_t outl_pattern[] = { /* nop; outl %eax,0x7e */ - 0xb8, 0x1, 0, 0, 0, 0x90, 0xe7, 0x7e - }; - uint8_t alternates[2]; - const uint8_t *pattern; - const uint8_t *patch; - int patches = 0; - off_t pos; - uint8_t *rom; - - rom = g_malloc(s->rom_size); - cpu_physical_memory_read(rom_paddr, rom, s->rom_size); - - for (pos = 0; pos < s->rom_size - sizeof(vmcall_pattern); pos++) { - if (kvm_irqchip_in_kernel()) { - pattern = outl_pattern; - alternates[0] = outl_pattern[7]; - alternates[1] = outl_pattern[7]; - patch = &vmcall_pattern[5]; - } else { - pattern = vmcall_pattern; - alternates[0] = vmcall_pattern[7]; - alternates[1] = 0xd9; /* AMD's VMMCALL */ - patch = &outl_pattern[5]; - } - if (memcmp(rom + pos, pattern, 7) == 0 && - (rom[pos + 7] == alternates[0] || rom[pos + 7] == alternates[1])) { - cpu_physical_memory_write(rom_paddr + pos + 5, patch, 3); - /* - * Don't flush the tb here. Under ordinary conditions, the patched - * calls are miles away from the current IP. Under malicious - * conditions, the guest could trick us to crash. - */ - } - } - - g_free(rom); - - if (patches != 0 && patches != 2) { - return -1; - } - - return 0; -} - -/* - * For TCG mode or the time KVM honors read-only memory regions, we need to - * enable write access to the option ROM so that variables can be updated by - * the guest. - */ -static int vapic_map_rom_writable(VAPICROMState *s) -{ - hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK; - MemoryRegionSection section; - MemoryRegion *as; - size_t rom_size; - uint8_t *ram; - - as = sysbus_address_space(&s->busdev); - - if (s->rom_mapped_writable) { - memory_region_del_subregion(as, &s->rom); - object_unparent(OBJECT(&s->rom)); - } - - /* grab RAM memory region (region @rom_paddr may still be pc.rom) */ - section = memory_region_find(as, 0, 1); - - /* read ROM size from RAM region */ - if (rom_paddr + 2 >= memory_region_size(section.mr)) { - return -1; - } - ram = memory_region_get_ram_ptr(section.mr); - rom_size = ram[rom_paddr + 2] * ROM_BLOCK_SIZE; - if (rom_size == 0) { - return -1; - } - s->rom_size = rom_size; - - /* We need to round to avoid creating subpages - * from which we cannot run code. */ - rom_size += rom_paddr & ~TARGET_PAGE_MASK; - rom_paddr &= TARGET_PAGE_MASK; - rom_size = TARGET_PAGE_ALIGN(rom_size); - - memory_region_init_alias(&s->rom, OBJECT(s), "kvmvapic-rom", section.mr, - rom_paddr, rom_size); - memory_region_add_subregion_overlap(as, rom_paddr, &s->rom, 1000); - s->rom_mapped_writable = true; - memory_region_unref(section.mr); - - return 0; -} - -static int vapic_prepare(VAPICROMState *s) -{ - if (vapic_map_rom_writable(s) < 0) { - return -1; - } - - if (patch_hypercalls(s) < 0) { - return -1; - } - - vapic_enable_tpr_reporting(true); - - return 0; -} - -static void vapic_write(void *opaque, hwaddr addr, uint64_t data, - unsigned int size) -{ - VAPICROMState *s = opaque; - X86CPU *cpu; - CPUX86State *env; - hwaddr rom_paddr; - - if (!current_cpu) { - return; - } - - cpu_synchronize_state(current_cpu); - cpu = X86_CPU(current_cpu); - env = &cpu->env; - - /* - * The VAPIC supports two PIO-based hypercalls, both via port 0x7E. - * o 16-bit write access: - * Reports the option ROM initialization to the hypervisor. Written - * value is the offset of the state structure in the ROM. - * o 8-bit write access: - * Reactivates the VAPIC after a guest hibernation, i.e. after the - * option ROM content has been re-initialized by a guest power cycle. - * o 32-bit write access: - * Poll for pending IRQs, considering the current VAPIC state. - */ - switch (size) { - case 2: - if (s->state == VAPIC_INACTIVE) { - rom_paddr = (env->segs[R_CS].base + env->eip) & ROM_BLOCK_MASK; - s->rom_state_paddr = rom_paddr + data; - - s->state = VAPIC_STANDBY; - } - if (vapic_prepare(s) < 0) { - s->state = VAPIC_INACTIVE; - s->rom_state_paddr = 0; - break; - } - break; - case 1: - if (kvm_enabled()) { - /* - * Disable triggering instruction in ROM by writing a NOP. - * - * We cannot do this in TCG mode as the reported IP is not - * accurate. - */ - pause_all_vcpus(); - patch_byte(cpu, env->eip - 2, 0x66); - patch_byte(cpu, env->eip - 1, 0x90); - resume_all_vcpus(); - } - - if (s->state == VAPIC_ACTIVE) { - break; - } - if (update_rom_mapping(s, env, env->eip) < 0) { - break; - } - if (find_real_tpr_addr(s, env) < 0) { - break; - } - vapic_enable(s, cpu); - break; - default: - case 4: - if (!kvm_irqchip_in_kernel()) { - apic_poll_irq(cpu->apic_state); - } - break; - } -} - -static uint64_t vapic_read(void *opaque, hwaddr addr, unsigned size) -{ - return 0xffffffff; -} - -static const MemoryRegionOps vapic_ops = { - .write = vapic_write, - .read = vapic_read, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void vapic_realize(DeviceState *dev, Error **errp) -{ - SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - VAPICROMState *s = VAPIC(dev); - - memory_region_init_io(&s->io, OBJECT(s), &vapic_ops, s, "kvmvapic", 2); - sysbus_add_io(sbd, VAPIC_IO_PORT, &s->io); - sysbus_init_ioports(sbd, VAPIC_IO_PORT, 2); - - option_rom[nb_option_roms].name = "kvmvapic.bin"; - option_rom[nb_option_roms].bootindex = -1; - nb_option_roms++; -} - -static void do_vapic_enable(void *data) -{ - VAPICROMState *s = data; - X86CPU *cpu = X86_CPU(first_cpu); - - static const uint8_t enabled = 1; - cpu_physical_memory_write(s->vapic_paddr + offsetof(VAPICState, enabled), - &enabled, sizeof(enabled)); - apic_enable_vapic(cpu->apic_state, s->vapic_paddr); - s->state = VAPIC_ACTIVE; -} - -static void kvmvapic_vm_state_change(void *opaque, int running, - RunState state) -{ - VAPICROMState *s = opaque; - uint8_t *zero; - - if (!running) { - return; - } - - if (s->state == VAPIC_ACTIVE) { - if (smp_cpus == 1) { - run_on_cpu(first_cpu, do_vapic_enable, s); - } else { - zero = g_malloc0(s->rom_state.vapic_size); - cpu_physical_memory_write(s->vapic_paddr, zero, - s->rom_state.vapic_size); - g_free(zero); - } - } - - qemu_del_vm_change_state_handler(s->vmsentry); -} - -static int vapic_post_load(void *opaque, int version_id) -{ - VAPICROMState *s = opaque; - - /* - * The old implementation of qemu-kvm did not provide the state - * VAPIC_STANDBY. Reconstruct it. - */ - if (s->state == VAPIC_INACTIVE && s->rom_state_paddr != 0) { - s->state = VAPIC_STANDBY; - } - - if (s->state != VAPIC_INACTIVE) { - if (vapic_prepare(s) < 0) { - return -1; - } - } - - if (!s->vmsentry) { - s->vmsentry = - qemu_add_vm_change_state_handler(kvmvapic_vm_state_change, s); - } - return 0; -} - -static const VMStateDescription vmstate_handlers = { - .name = "kvmvapic-handlers", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT32(set_tpr, VAPICHandlers), - VMSTATE_UINT32(set_tpr_eax, VAPICHandlers), - VMSTATE_UINT32_ARRAY(get_tpr, VAPICHandlers, 8), - VMSTATE_UINT32(get_tpr_stack, VAPICHandlers), - VMSTATE_END_OF_LIST() - } -}; - -static const VMStateDescription vmstate_guest_rom = { - .name = "kvmvapic-guest-rom", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UNUSED(8), /* signature */ - VMSTATE_UINT32(vaddr, GuestROMState), - VMSTATE_UINT32(fixup_start, GuestROMState), - VMSTATE_UINT32(fixup_end, GuestROMState), - VMSTATE_UINT32(vapic_vaddr, GuestROMState), - VMSTATE_UINT32(vapic_size, GuestROMState), - VMSTATE_UINT32(vcpu_shift, GuestROMState), - VMSTATE_UINT32(real_tpr_addr, GuestROMState), - VMSTATE_STRUCT(up, GuestROMState, 0, vmstate_handlers, VAPICHandlers), - VMSTATE_STRUCT(mp, GuestROMState, 0, vmstate_handlers, VAPICHandlers), - VMSTATE_END_OF_LIST() - } -}; - -static const VMStateDescription vmstate_vapic = { - .name = "kvm-tpr-opt", /* compatible with qemu-kvm VAPIC */ - .version_id = 1, - .minimum_version_id = 1, - .post_load = vapic_post_load, - .fields = (VMStateField[]) { - VMSTATE_STRUCT(rom_state, VAPICROMState, 0, vmstate_guest_rom, - GuestROMState), - VMSTATE_UINT32(state, VAPICROMState), - VMSTATE_UINT32(real_tpr_addr, VAPICROMState), - VMSTATE_UINT32(rom_state_vaddr, VAPICROMState), - VMSTATE_UINT32(vapic_paddr, VAPICROMState), - VMSTATE_UINT32(rom_state_paddr, VAPICROMState), - VMSTATE_END_OF_LIST() - } -}; - -static void vapic_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->reset = vapic_reset; - dc->vmsd = &vmstate_vapic; - dc->realize = vapic_realize; -} - -static const TypeInfo vapic_type = { - .name = TYPE_VAPIC, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_size = sizeof(VAPICROMState), - .class_init = vapic_class_init, -}; - -static void vapic_register(void) -{ - type_register_static(&vapic_type); -} - -type_init(vapic_register); diff --git a/qemu/hw/i386/multiboot.c b/qemu/hw/i386/multiboot.c deleted file mode 100644 index 387caa67d..000000000 --- a/qemu/hw/i386/multiboot.c +++ /dev/null @@ -1,375 +0,0 @@ -/* - * QEMU PC System Emulator - * - * Copyright (c) 2003-2004 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qemu-common.h" -#include "cpu.h" -#include "hw/hw.h" -#include "hw/nvram/fw_cfg.h" -#include "multiboot.h" -#include "hw/loader.h" -#include "elf.h" -#include "sysemu/sysemu.h" - -/* Show multiboot debug output */ -//#define DEBUG_MULTIBOOT - -#ifdef DEBUG_MULTIBOOT -#define mb_debug(a...) fprintf(stderr, ## a) -#else -#define mb_debug(a...) -#endif - -#define MULTIBOOT_STRUCT_ADDR 0x9000 - -#if MULTIBOOT_STRUCT_ADDR > 0xf0000 -#error multiboot struct needs to fit in 16 bit real mode -#endif - -enum { - /* Multiboot info */ - MBI_FLAGS = 0, - MBI_MEM_LOWER = 4, - MBI_MEM_UPPER = 8, - MBI_BOOT_DEVICE = 12, - MBI_CMDLINE = 16, - MBI_MODS_COUNT = 20, - MBI_MODS_ADDR = 24, - MBI_MMAP_ADDR = 48, - MBI_BOOTLOADER = 64, - - MBI_SIZE = 88, - - /* Multiboot modules */ - MB_MOD_START = 0, - MB_MOD_END = 4, - MB_MOD_CMDLINE = 8, - - MB_MOD_SIZE = 16, - - /* Region offsets */ - ADDR_E820_MAP = MULTIBOOT_STRUCT_ADDR + 0, - ADDR_MBI = ADDR_E820_MAP + 0x500, - - /* Multiboot flags */ - MULTIBOOT_FLAGS_MEMORY = 1 << 0, - MULTIBOOT_FLAGS_BOOT_DEVICE = 1 << 1, - MULTIBOOT_FLAGS_CMDLINE = 1 << 2, - MULTIBOOT_FLAGS_MODULES = 1 << 3, - MULTIBOOT_FLAGS_MMAP = 1 << 6, - MULTIBOOT_FLAGS_BOOTLOADER = 1 << 9, -}; - -typedef struct { - /* buffer holding kernel, cmdlines and mb_infos */ - void *mb_buf; - /* address in target */ - hwaddr mb_buf_phys; - /* size of mb_buf in bytes */ - unsigned mb_buf_size; - /* offset of mb-info's in bytes */ - hwaddr offset_mbinfo; - /* offset in buffer for cmdlines in bytes */ - hwaddr offset_cmdlines; - /* offset in buffer for bootloader name in bytes */ - hwaddr offset_bootloader; - /* offset of modules in bytes */ - hwaddr offset_mods; - /* available slots for mb modules infos */ - int mb_mods_avail; - /* currently used slots of mb modules */ - int mb_mods_count; -} MultibootState; - -const char *bootloader_name = "qemu"; - -static uint32_t mb_add_cmdline(MultibootState *s, const char *cmdline) -{ - hwaddr p = s->offset_cmdlines; - char *b = (char *)s->mb_buf + p; - - get_opt_value(b, strlen(cmdline) + 1, cmdline); - s->offset_cmdlines += strlen(b) + 1; - return s->mb_buf_phys + p; -} - -static uint32_t mb_add_bootloader(MultibootState *s, const char *bootloader) -{ - hwaddr p = s->offset_bootloader; - char *b = (char *)s->mb_buf + p; - - memcpy(b, bootloader, strlen(bootloader) + 1); - s->offset_bootloader += strlen(b) + 1; - return s->mb_buf_phys + p; -} - -static void mb_add_mod(MultibootState *s, - hwaddr start, hwaddr end, - hwaddr cmdline_phys) -{ - char *p; - assert(s->mb_mods_count < s->mb_mods_avail); - - p = (char *)s->mb_buf + s->offset_mbinfo + MB_MOD_SIZE * s->mb_mods_count; - - stl_p(p + MB_MOD_START, start); - stl_p(p + MB_MOD_END, end); - stl_p(p + MB_MOD_CMDLINE, cmdline_phys); - - mb_debug("mod%02d: "TARGET_FMT_plx" - "TARGET_FMT_plx"\n", - s->mb_mods_count, start, end); - - s->mb_mods_count++; -} - -int load_multiboot(FWCfgState *fw_cfg, - FILE *f, - const char *kernel_filename, - const char *initrd_filename, - const char *kernel_cmdline, - int kernel_file_size, - uint8_t *header) -{ - int i, is_multiboot = 0; - uint32_t flags = 0; - uint32_t mh_entry_addr; - uint32_t mh_load_addr; - uint32_t mb_kernel_size; - MultibootState mbs; - uint8_t bootinfo[MBI_SIZE]; - uint8_t *mb_bootinfo_data; - uint32_t cmdline_len; - - /* Ok, let's see if it is a multiboot image. - The header is 12x32bit long, so the latest entry may be 8192 - 48. */ - for (i = 0; i < (8192 - 48); i += 4) { - if (ldl_p(header+i) == 0x1BADB002) { - uint32_t checksum = ldl_p(header+i+8); - flags = ldl_p(header+i+4); - checksum += flags; - checksum += (uint32_t)0x1BADB002; - if (!checksum) { - is_multiboot = 1; - break; - } - } - } - - if (!is_multiboot) - return 0; /* no multiboot */ - - mb_debug("qemu: I believe we found a multiboot image!\n"); - memset(bootinfo, 0, sizeof(bootinfo)); - memset(&mbs, 0, sizeof(mbs)); - - if (flags & 0x00000004) { /* MULTIBOOT_HEADER_HAS_VBE */ - fprintf(stderr, "qemu: multiboot knows VBE. we don't.\n"); - } - if (!(flags & 0x00010000)) { /* MULTIBOOT_HEADER_HAS_ADDR */ - uint64_t elf_entry; - uint64_t elf_low, elf_high; - int kernel_size; - fclose(f); - - if (((struct elf64_hdr*)header)->e_machine == EM_X86_64) { - fprintf(stderr, "Cannot load x86-64 image, give a 32bit one.\n"); - exit(1); - } - - kernel_size = load_elf(kernel_filename, NULL, NULL, &elf_entry, - &elf_low, &elf_high, 0, I386_ELF_MACHINE, - 0, 0); - if (kernel_size < 0) { - fprintf(stderr, "Error while loading elf kernel\n"); - exit(1); - } - mh_load_addr = elf_low; - mb_kernel_size = elf_high - elf_low; - mh_entry_addr = elf_entry; - - mbs.mb_buf = g_malloc(mb_kernel_size); - if (rom_copy(mbs.mb_buf, mh_load_addr, mb_kernel_size) != mb_kernel_size) { - fprintf(stderr, "Error while fetching elf kernel from rom\n"); - exit(1); - } - - mb_debug("qemu: loading multiboot-elf kernel (%#x bytes) with entry %#zx\n", - mb_kernel_size, (size_t)mh_entry_addr); - } else { - /* Valid if mh_flags sets MULTIBOOT_HEADER_HAS_ADDR. */ - uint32_t mh_header_addr = ldl_p(header+i+12); - uint32_t mh_load_end_addr = ldl_p(header+i+20); - uint32_t mh_bss_end_addr = ldl_p(header+i+24); - mh_load_addr = ldl_p(header+i+16); - uint32_t mb_kernel_text_offset = i - (mh_header_addr - mh_load_addr); - uint32_t mb_load_size = 0; - mh_entry_addr = ldl_p(header+i+28); - - if (mh_load_end_addr) { - mb_kernel_size = mh_bss_end_addr - mh_load_addr; - mb_load_size = mh_load_end_addr - mh_load_addr; - } else { - mb_kernel_size = kernel_file_size - mb_kernel_text_offset; - mb_load_size = mb_kernel_size; - } - - /* Valid if mh_flags sets MULTIBOOT_HEADER_HAS_VBE. - uint32_t mh_mode_type = ldl_p(header+i+32); - uint32_t mh_width = ldl_p(header+i+36); - uint32_t mh_height = ldl_p(header+i+40); - uint32_t mh_depth = ldl_p(header+i+44); */ - - mb_debug("multiboot: mh_header_addr = %#x\n", mh_header_addr); - mb_debug("multiboot: mh_load_addr = %#x\n", mh_load_addr); - mb_debug("multiboot: mh_load_end_addr = %#x\n", mh_load_end_addr); - mb_debug("multiboot: mh_bss_end_addr = %#x\n", mh_bss_end_addr); - mb_debug("qemu: loading multiboot kernel (%#x bytes) at %#x\n", - mb_load_size, mh_load_addr); - - mbs.mb_buf = g_malloc(mb_kernel_size); - fseek(f, mb_kernel_text_offset, SEEK_SET); - if (fread(mbs.mb_buf, 1, mb_load_size, f) != mb_load_size) { - fprintf(stderr, "fread() failed\n"); - exit(1); - } - memset(mbs.mb_buf + mb_load_size, 0, mb_kernel_size - mb_load_size); - fclose(f); - } - - mbs.mb_buf_phys = mh_load_addr; - - mbs.mb_buf_size = TARGET_PAGE_ALIGN(mb_kernel_size); - mbs.offset_mbinfo = mbs.mb_buf_size; - - /* Calculate space for cmdlines, bootloader name, and mb_mods */ - cmdline_len = strlen(kernel_filename) + 1; - cmdline_len += strlen(kernel_cmdline) + 1; - if (initrd_filename) { - const char *r = initrd_filename; - cmdline_len += strlen(r) + 1; - mbs.mb_mods_avail = 1; - while (*(r = get_opt_value(NULL, 0, r))) { - mbs.mb_mods_avail++; - r++; - } - } - - mbs.mb_buf_size += cmdline_len; - mbs.mb_buf_size += MB_MOD_SIZE * mbs.mb_mods_avail; - mbs.mb_buf_size += strlen(bootloader_name) + 1; - - mbs.mb_buf_size = TARGET_PAGE_ALIGN(mbs.mb_buf_size); - - /* enlarge mb_buf to hold cmdlines, bootloader, mb-info structs */ - mbs.mb_buf = g_realloc(mbs.mb_buf, mbs.mb_buf_size); - mbs.offset_cmdlines = mbs.offset_mbinfo + mbs.mb_mods_avail * MB_MOD_SIZE; - mbs.offset_bootloader = mbs.offset_cmdlines + cmdline_len; - - if (initrd_filename) { - char *next_initrd, not_last; - - mbs.offset_mods = mbs.mb_buf_size; - - do { - char *next_space; - int mb_mod_length; - uint32_t offs = mbs.mb_buf_size; - - next_initrd = (char *)get_opt_value(NULL, 0, initrd_filename); - not_last = *next_initrd; - *next_initrd = '\0'; - /* if a space comes after the module filename, treat everything - after that as parameters */ - hwaddr c = mb_add_cmdline(&mbs, initrd_filename); - if ((next_space = strchr(initrd_filename, ' '))) - *next_space = '\0'; - mb_debug("multiboot loading module: %s\n", initrd_filename); - mb_mod_length = get_image_size(initrd_filename); - if (mb_mod_length < 0) { - fprintf(stderr, "Failed to open file '%s'\n", initrd_filename); - exit(1); - } - - mbs.mb_buf_size = TARGET_PAGE_ALIGN(mb_mod_length + mbs.mb_buf_size); - mbs.mb_buf = g_realloc(mbs.mb_buf, mbs.mb_buf_size); - - load_image(initrd_filename, (unsigned char *)mbs.mb_buf + offs); - mb_add_mod(&mbs, mbs.mb_buf_phys + offs, - mbs.mb_buf_phys + offs + mb_mod_length, c); - - mb_debug("mod_start: %p\nmod_end: %p\n cmdline: "TARGET_FMT_plx"\n", - (char *)mbs.mb_buf + offs, - (char *)mbs.mb_buf + offs + mb_mod_length, c); - initrd_filename = next_initrd+1; - } while (not_last); - } - - /* Commandline support */ - char kcmdline[strlen(kernel_filename) + strlen(kernel_cmdline) + 2]; - snprintf(kcmdline, sizeof(kcmdline), "%s %s", - kernel_filename, kernel_cmdline); - stl_p(bootinfo + MBI_CMDLINE, mb_add_cmdline(&mbs, kcmdline)); - - stl_p(bootinfo + MBI_BOOTLOADER, mb_add_bootloader(&mbs, bootloader_name)); - - stl_p(bootinfo + MBI_MODS_ADDR, mbs.mb_buf_phys + mbs.offset_mbinfo); - stl_p(bootinfo + MBI_MODS_COUNT, mbs.mb_mods_count); /* mods_count */ - - /* the kernel is where we want it to be now */ - stl_p(bootinfo + MBI_FLAGS, MULTIBOOT_FLAGS_MEMORY - | MULTIBOOT_FLAGS_BOOT_DEVICE - | MULTIBOOT_FLAGS_CMDLINE - | MULTIBOOT_FLAGS_MODULES - | MULTIBOOT_FLAGS_MMAP - | MULTIBOOT_FLAGS_BOOTLOADER); - stl_p(bootinfo + MBI_BOOT_DEVICE, 0x8000ffff); /* XXX: use the -boot switch? */ - stl_p(bootinfo + MBI_MMAP_ADDR, ADDR_E820_MAP); - - mb_debug("multiboot: mh_entry_addr = %#x\n", mh_entry_addr); - mb_debug(" mb_buf_phys = "TARGET_FMT_plx"\n", mbs.mb_buf_phys); - mb_debug(" mod_start = "TARGET_FMT_plx"\n", mbs.mb_buf_phys + mbs.offset_mods); - mb_debug(" mb_mods_count = %d\n", mbs.mb_mods_count); - - /* save bootinfo off the stack */ - mb_bootinfo_data = g_malloc(sizeof(bootinfo)); - memcpy(mb_bootinfo_data, bootinfo, sizeof(bootinfo)); - - /* Pass variables to option rom */ - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, mh_entry_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, mbs.mb_buf_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, - mbs.mb_buf, mbs.mb_buf_size); - - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, ADDR_MBI); - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, sizeof(bootinfo)); - fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, mb_bootinfo_data, - sizeof(bootinfo)); - - option_rom[nb_option_roms].name = "multiboot.bin"; - option_rom[nb_option_roms].bootindex = 0; - nb_option_roms++; - - return 1; /* yes, we are multiboot */ -} diff --git a/qemu/hw/i386/multiboot.h b/qemu/hw/i386/multiboot.h deleted file mode 100644 index 60de309cd..000000000 --- a/qemu/hw/i386/multiboot.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef QEMU_MULTIBOOT_H -#define QEMU_MULTIBOOT_H - -#include "hw/nvram/fw_cfg.h" - -int load_multiboot(FWCfgState *fw_cfg, - FILE *f, - const char *kernel_filename, - const char *initrd_filename, - const char *kernel_cmdline, - int kernel_file_size, - uint8_t *header); - -#endif diff --git a/qemu/hw/i386/pc.c b/qemu/hw/i386/pc.c deleted file mode 100644 index 99437e0b7..000000000 --- a/qemu/hw/i386/pc.c +++ /dev/null @@ -1,2017 +0,0 @@ -/* - * QEMU PC System Emulator - * - * Copyright (c) 2003-2004 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include "qemu/osdep.h" -#include "hw/hw.h" -#include "hw/i386/pc.h" -#include "hw/char/serial.h" -#include "hw/i386/apic.h" -#include "hw/i386/topology.h" -#include "sysemu/cpus.h" -#include "hw/block/fdc.h" -#include "hw/ide.h" -#include "hw/pci/pci.h" -#include "hw/pci/pci_bus.h" -#include "hw/nvram/fw_cfg.h" -#include "hw/timer/hpet.h" -#include "hw/smbios/smbios.h" -#include "hw/loader.h" -#include "elf.h" -#include "multiboot.h" -#include "hw/timer/mc146818rtc.h" -#include "hw/timer/i8254.h" -#include "hw/audio/pcspk.h" -#include "hw/pci/msi.h" -#include "hw/sysbus.h" -#include "sysemu/sysemu.h" -#include "sysemu/numa.h" -#include "sysemu/kvm.h" -#include "sysemu/qtest.h" -#include "kvm_i386.h" -#include "hw/xen/xen.h" -#include "sysemu/block-backend.h" -#include "hw/block/block.h" -#include "ui/qemu-spice.h" -#include "exec/memory.h" -#include "exec/address-spaces.h" -#include "sysemu/arch_init.h" -#include "qemu/bitmap.h" -#include "qemu/config-file.h" -#include "qemu/error-report.h" -#include "hw/acpi/acpi.h" -#include "hw/acpi/cpu_hotplug.h" -#include "hw/boards.h" -#include "hw/pci/pci_host.h" -#include "acpi-build.h" -#include "hw/mem/pc-dimm.h" -#include "qapi/visitor.h" -#include "qapi-visit.h" -#include "qom/cpu.h" - -/* debug PC/ISA interrupts */ -//#define DEBUG_IRQ - -#ifdef DEBUG_IRQ -#define DPRINTF(fmt, ...) \ - do { printf("CPUIRQ: " fmt , ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) -#endif - -#define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0) -#define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1) -#define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2) -#define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3) -#define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4) - -#define E820_NR_ENTRIES 16 - -struct e820_entry { - uint64_t address; - uint64_t length; - uint32_t type; -} QEMU_PACKED __attribute((__aligned__(4))); - -struct e820_table { - uint32_t count; - struct e820_entry entry[E820_NR_ENTRIES]; -} QEMU_PACKED __attribute((__aligned__(4))); - -static struct e820_table e820_reserve; -static struct e820_entry *e820_table; -static unsigned e820_entries; -struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX}; - -void gsi_handler(void *opaque, int n, int level) -{ - GSIState *s = opaque; - - DPRINTF("pc: %s GSI %d\n", level ? "raising" : "lowering", n); - if (n < ISA_NUM_IRQS) { - qemu_set_irq(s->i8259_irq[n], level); - } - qemu_set_irq(s->ioapic_irq[n], level); -} - -static void ioport80_write(void *opaque, hwaddr addr, uint64_t data, - unsigned size) -{ -} - -static uint64_t ioport80_read(void *opaque, hwaddr addr, unsigned size) -{ - return 0xffffffffffffffffULL; -} - -/* MSDOS compatibility mode FPU exception support */ -static qemu_irq ferr_irq; - -void pc_register_ferr_irq(qemu_irq irq) -{ - ferr_irq = irq; -} - -/* XXX: add IGNNE support */ -void cpu_set_ferr(CPUX86State *s) -{ - qemu_irq_raise(ferr_irq); -} - -static void ioportF0_write(void *opaque, hwaddr addr, uint64_t data, - unsigned size) -{ - qemu_irq_lower(ferr_irq); -} - -static uint64_t ioportF0_read(void *opaque, hwaddr addr, unsigned size) -{ - return 0xffffffffffffffffULL; -} - -/* TSC handling */ -uint64_t cpu_get_tsc(CPUX86State *env) -{ - return cpu_get_ticks(); -} - -/* IRQ handling */ -int cpu_get_pic_interrupt(CPUX86State *env) -{ - X86CPU *cpu = x86_env_get_cpu(env); - int intno; - - intno = apic_get_interrupt(cpu->apic_state); - if (intno >= 0) { - return intno; - } - /* read the irq from the PIC */ - if (!apic_accept_pic_intr(cpu->apic_state)) { - return -1; - } - - intno = pic_read_irq(isa_pic); - return intno; -} - -static void pic_irq_request(void *opaque, int irq, int level) -{ - CPUState *cs = first_cpu; - X86CPU *cpu = X86_CPU(cs); - - DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq); - if (cpu->apic_state) { - CPU_FOREACH(cs) { - cpu = X86_CPU(cs); - if (apic_accept_pic_intr(cpu->apic_state)) { - apic_deliver_pic_intr(cpu->apic_state, level); - } - } - } else { - if (level) { - cpu_interrupt(cs, CPU_INTERRUPT_HARD); - } else { - cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); - } - } -} - -/* PC cmos mappings */ - -#define REG_EQUIPMENT_BYTE 0x14 - -int cmos_get_fd_drive_type(FloppyDriveType fd0) -{ - int val; - - switch (fd0) { - case FLOPPY_DRIVE_TYPE_144: - /* 1.44 Mb 3"5 drive */ - val = 4; - break; - case FLOPPY_DRIVE_TYPE_288: - /* 2.88 Mb 3"5 drive */ - val = 5; - break; - case FLOPPY_DRIVE_TYPE_120: - /* 1.2 Mb 5"5 drive */ - val = 2; - break; - case FLOPPY_DRIVE_TYPE_NONE: - default: - val = 0; - break; - } - return val; -} - -static void cmos_init_hd(ISADevice *s, int type_ofs, int info_ofs, - int16_t cylinders, int8_t heads, int8_t sectors) -{ - rtc_set_memory(s, type_ofs, 47); - rtc_set_memory(s, info_ofs, cylinders); - rtc_set_memory(s, info_ofs + 1, cylinders >> 8); - rtc_set_memory(s, info_ofs + 2, heads); - rtc_set_memory(s, info_ofs + 3, 0xff); - rtc_set_memory(s, info_ofs + 4, 0xff); - rtc_set_memory(s, info_ofs + 5, 0xc0 | ((heads > 8) << 3)); - rtc_set_memory(s, info_ofs + 6, cylinders); - rtc_set_memory(s, info_ofs + 7, cylinders >> 8); - rtc_set_memory(s, info_ofs + 8, sectors); -} - -/* convert boot_device letter to something recognizable by the bios */ -static int boot_device2nibble(char boot_device) -{ - switch(boot_device) { - case 'a': - case 'b': - return 0x01; /* floppy boot */ - case 'c': - return 0x02; /* hard drive boot */ - case 'd': - return 0x03; /* CD-ROM boot */ - case 'n': - return 0x04; /* Network boot */ - } - return 0; -} - -static void set_boot_dev(ISADevice *s, const char *boot_device, Error **errp) -{ -#define PC_MAX_BOOT_DEVICES 3 - int nbds, bds[3] = { 0, }; - int i; - - nbds = strlen(boot_device); - if (nbds > PC_MAX_BOOT_DEVICES) { - error_setg(errp, "Too many boot devices for PC"); - return; - } - for (i = 0; i < nbds; i++) { - bds[i] = boot_device2nibble(boot_device[i]); - if (bds[i] == 0) { - error_setg(errp, "Invalid boot device for PC: '%c'", - boot_device[i]); - return; - } - } - rtc_set_memory(s, 0x3d, (bds[1] << 4) | bds[0]); - rtc_set_memory(s, 0x38, (bds[2] << 4) | (fd_bootchk ? 0x0 : 0x1)); -} - -static void pc_boot_set(void *opaque, const char *boot_device, Error **errp) -{ - set_boot_dev(opaque, boot_device, errp); -} - -static void pc_cmos_init_floppy(ISADevice *rtc_state, ISADevice *floppy) -{ - int val, nb, i; - FloppyDriveType fd_type[2] = { FLOPPY_DRIVE_TYPE_NONE, - FLOPPY_DRIVE_TYPE_NONE }; - - /* floppy type */ - if (floppy) { - for (i = 0; i < 2; i++) { - fd_type[i] = isa_fdc_get_drive_type(floppy, i); - } - } - val = (cmos_get_fd_drive_type(fd_type[0]) << 4) | - cmos_get_fd_drive_type(fd_type[1]); - rtc_set_memory(rtc_state, 0x10, val); - - val = rtc_get_memory(rtc_state, REG_EQUIPMENT_BYTE); - nb = 0; - if (fd_type[0] != FLOPPY_DRIVE_TYPE_NONE) { - nb++; - } - if (fd_type[1] != FLOPPY_DRIVE_TYPE_NONE) { - nb++; - } - switch (nb) { - case 0: - break; - case 1: - val |= 0x01; /* 1 drive, ready for boot */ - break; - case 2: - val |= 0x41; /* 2 drives, ready for boot */ - break; - } - rtc_set_memory(rtc_state, REG_EQUIPMENT_BYTE, val); -} - -typedef struct pc_cmos_init_late_arg { - ISADevice *rtc_state; - BusState *idebus[2]; -} pc_cmos_init_late_arg; - -typedef struct check_fdc_state { - ISADevice *floppy; - bool multiple; -} CheckFdcState; - -static int check_fdc(Object *obj, void *opaque) -{ - CheckFdcState *state = opaque; - Object *fdc; - uint32_t iobase; - Error *local_err = NULL; - - fdc = object_dynamic_cast(obj, TYPE_ISA_FDC); - if (!fdc) { - return 0; - } - - iobase = object_property_get_int(obj, "iobase", &local_err); - if (local_err || iobase != 0x3f0) { - error_free(local_err); - return 0; - } - - if (state->floppy) { - state->multiple = true; - } else { - state->floppy = ISA_DEVICE(obj); - } - return 0; -} - -static const char * const fdc_container_path[] = { - "/unattached", "/peripheral", "/peripheral-anon" -}; - -/* - * Locate the FDC at IO address 0x3f0, in order to configure the CMOS registers - * and ACPI objects. - */ -ISADevice *pc_find_fdc0(void) -{ - int i; - Object *container; - CheckFdcState state = { 0 }; - - for (i = 0; i < ARRAY_SIZE(fdc_container_path); i++) { - container = container_get(qdev_get_machine(), fdc_container_path[i]); - object_child_foreach(container, check_fdc, &state); - } - - if (state.multiple) { - error_report("warning: multiple floppy disk controllers with " - "iobase=0x3f0 have been found"); - error_printf("the one being picked for CMOS setup might not reflect " - "your intent"); - } - - return state.floppy; -} - -static void pc_cmos_init_late(void *opaque) -{ - pc_cmos_init_late_arg *arg = opaque; - ISADevice *s = arg->rtc_state; - int16_t cylinders; - int8_t heads, sectors; - int val; - int i, trans; - - val = 0; - if (ide_get_geometry(arg->idebus[0], 0, - &cylinders, &heads, §ors) >= 0) { - cmos_init_hd(s, 0x19, 0x1b, cylinders, heads, sectors); - val |= 0xf0; - } - if (ide_get_geometry(arg->idebus[0], 1, - &cylinders, &heads, §ors) >= 0) { - cmos_init_hd(s, 0x1a, 0x24, cylinders, heads, sectors); - val |= 0x0f; - } - rtc_set_memory(s, 0x12, val); - - val = 0; - for (i = 0; i < 4; i++) { - /* NOTE: ide_get_geometry() returns the physical - geometry. It is always such that: 1 <= sects <= 63, 1 - <= heads <= 16, 1 <= cylinders <= 16383. The BIOS - geometry can be different if a translation is done. */ - if (ide_get_geometry(arg->idebus[i / 2], i % 2, - &cylinders, &heads, §ors) >= 0) { - trans = ide_get_bios_chs_trans(arg->idebus[i / 2], i % 2) - 1; - assert((trans & ~3) == 0); - val |= trans << (i * 2); - } - } - rtc_set_memory(s, 0x39, val); - - pc_cmos_init_floppy(s, pc_find_fdc0()); - - qemu_unregister_reset(pc_cmos_init_late, opaque); -} - -void pc_cmos_init(PCMachineState *pcms, - BusState *idebus0, BusState *idebus1, - ISADevice *s) -{ - int val; - static pc_cmos_init_late_arg arg; - - /* various important CMOS locations needed by PC/Bochs bios */ - - /* memory size */ - /* base memory (first MiB) */ - val = MIN(pcms->below_4g_mem_size / 1024, 640); - rtc_set_memory(s, 0x15, val); - rtc_set_memory(s, 0x16, val >> 8); - /* extended memory (next 64MiB) */ - if (pcms->below_4g_mem_size > 1024 * 1024) { - val = (pcms->below_4g_mem_size - 1024 * 1024) / 1024; - } else { - val = 0; - } - if (val > 65535) - val = 65535; - rtc_set_memory(s, 0x17, val); - rtc_set_memory(s, 0x18, val >> 8); - rtc_set_memory(s, 0x30, val); - rtc_set_memory(s, 0x31, val >> 8); - /* memory between 16MiB and 4GiB */ - if (pcms->below_4g_mem_size > 16 * 1024 * 1024) { - val = (pcms->below_4g_mem_size - 16 * 1024 * 1024) / 65536; - } else { - val = 0; - } - if (val > 65535) - val = 65535; - rtc_set_memory(s, 0x34, val); - rtc_set_memory(s, 0x35, val >> 8); - /* memory above 4GiB */ - val = pcms->above_4g_mem_size / 65536; - rtc_set_memory(s, 0x5b, val); - rtc_set_memory(s, 0x5c, val >> 8); - rtc_set_memory(s, 0x5d, val >> 16); - - /* set the number of CPU */ - rtc_set_memory(s, 0x5f, smp_cpus - 1); - - object_property_add_link(OBJECT(pcms), "rtc_state", - TYPE_ISA_DEVICE, - (Object **)&pcms->rtc, - object_property_allow_set_link, - OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort); - object_property_set_link(OBJECT(pcms), OBJECT(s), - "rtc_state", &error_abort); - - set_boot_dev(s, MACHINE(pcms)->boot_order, &error_fatal); - - val = 0; - val |= 0x02; /* FPU is there */ - val |= 0x04; /* PS/2 mouse installed */ - rtc_set_memory(s, REG_EQUIPMENT_BYTE, val); - - /* hard drives and FDC */ - arg.rtc_state = s; - arg.idebus[0] = idebus0; - arg.idebus[1] = idebus1; - qemu_register_reset(pc_cmos_init_late, &arg); -} - -#define TYPE_PORT92 "port92" -#define PORT92(obj) OBJECT_CHECK(Port92State, (obj), TYPE_PORT92) - -/* port 92 stuff: could be split off */ -typedef struct Port92State { - ISADevice parent_obj; - - MemoryRegion io; - uint8_t outport; - qemu_irq *a20_out; -} Port92State; - -static void port92_write(void *opaque, hwaddr addr, uint64_t val, - unsigned size) -{ - Port92State *s = opaque; - int oldval = s->outport; - - DPRINTF("port92: write 0x%02" PRIx64 "\n", val); - s->outport = val; - qemu_set_irq(*s->a20_out, (val >> 1) & 1); - if ((val & 1) && !(oldval & 1)) { - qemu_system_reset_request(); - } -} - -static uint64_t port92_read(void *opaque, hwaddr addr, - unsigned size) -{ - Port92State *s = opaque; - uint32_t ret; - - ret = s->outport; - DPRINTF("port92: read 0x%02x\n", ret); - return ret; -} - -static void port92_init(ISADevice *dev, qemu_irq *a20_out) -{ - Port92State *s = PORT92(dev); - - s->a20_out = a20_out; -} - -static const VMStateDescription vmstate_port92_isa = { - .name = "port92", - .version_id = 1, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT8(outport, Port92State), - VMSTATE_END_OF_LIST() - } -}; - -static void port92_reset(DeviceState *d) -{ - Port92State *s = PORT92(d); - - s->outport &= ~1; -} - -static const MemoryRegionOps port92_ops = { - .read = port92_read, - .write = port92_write, - .impl = { - .min_access_size = 1, - .max_access_size = 1, - }, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - -static void port92_initfn(Object *obj) -{ - Port92State *s = PORT92(obj); - - memory_region_init_io(&s->io, OBJECT(s), &port92_ops, s, "port92", 1); - - s->outport = 0; -} - -static void port92_realizefn(DeviceState *dev, Error **errp) -{ - ISADevice *isadev = ISA_DEVICE(dev); - Port92State *s = PORT92(dev); - - isa_register_ioport(isadev, &s->io, 0x92); -} - -static void port92_class_initfn(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = port92_realizefn; - dc->reset = port92_reset; - dc->vmsd = &vmstate_port92_isa; - /* - * Reason: unlike ordinary ISA devices, this one needs additional - * wiring: its A20 output line needs to be wired up by - * port92_init(). - */ - dc->cannot_instantiate_with_device_add_yet = true; -} - -static const TypeInfo port92_info = { - .name = TYPE_PORT92, - .parent = TYPE_ISA_DEVICE, - .instance_size = sizeof(Port92State), - .instance_init = port92_initfn, - .class_init = port92_class_initfn, -}; - -static void port92_register_types(void) -{ - type_register_static(&port92_info); -} - -type_init(port92_register_types) - -static void handle_a20_line_change(void *opaque, int irq, int level) -{ - X86CPU *cpu = opaque; - - /* XXX: send to all CPUs ? */ - /* XXX: add logic to handle multiple A20 line sources */ - x86_cpu_set_a20(cpu, level); -} - -int e820_add_entry(uint64_t address, uint64_t length, uint32_t type) -{ - int index = le32_to_cpu(e820_reserve.count); - struct e820_entry *entry; - - if (type != E820_RAM) { - /* old FW_CFG_E820_TABLE entry -- reservations only */ - if (index >= E820_NR_ENTRIES) { - return -EBUSY; - } - entry = &e820_reserve.entry[index++]; - - entry->address = cpu_to_le64(address); - entry->length = cpu_to_le64(length); - entry->type = cpu_to_le32(type); - - e820_reserve.count = cpu_to_le32(index); - } - - /* new "etc/e820" file -- include ram too */ - e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1); - e820_table[e820_entries].address = cpu_to_le64(address); - e820_table[e820_entries].length = cpu_to_le64(length); - e820_table[e820_entries].type = cpu_to_le32(type); - e820_entries++; - - return e820_entries; -} - -int e820_get_num_entries(void) -{ - return e820_entries; -} - -bool e820_get_entry(int idx, uint32_t type, uint64_t *address, uint64_t *length) -{ - if (idx < e820_entries && e820_table[idx].type == cpu_to_le32(type)) { - *address = le64_to_cpu(e820_table[idx].address); - *length = le64_to_cpu(e820_table[idx].length); - return true; - } - return false; -} - -/* Enables contiguous-apic-ID mode, for compatibility */ -static bool compat_apic_id_mode; - -void enable_compat_apic_id_mode(void) -{ - compat_apic_id_mode = true; -} - -/* Calculates initial APIC ID for a specific CPU index - * - * Currently we need to be able to calculate the APIC ID from the CPU index - * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have - * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of - * all CPUs up to max_cpus. - */ -static uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index) -{ - uint32_t correct_id; - static bool warned; - - correct_id = x86_apicid_from_cpu_idx(smp_cores, smp_threads, cpu_index); - if (compat_apic_id_mode) { - if (cpu_index != correct_id && !warned && !qtest_enabled()) { - error_report("APIC IDs set in compatibility mode, " - "CPU topology won't match the configuration"); - warned = true; - } - return cpu_index; - } else { - return correct_id; - } -} - -static void pc_build_smbios(FWCfgState *fw_cfg) -{ - uint8_t *smbios_tables, *smbios_anchor; - size_t smbios_tables_len, smbios_anchor_len; - struct smbios_phys_mem_area *mem_array; - unsigned i, array_count; - - smbios_tables = smbios_get_table_legacy(&smbios_tables_len); - if (smbios_tables) { - fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, - smbios_tables, smbios_tables_len); - } - - /* build the array of physical mem area from e820 table */ - mem_array = g_malloc0(sizeof(*mem_array) * e820_get_num_entries()); - for (i = 0, array_count = 0; i < e820_get_num_entries(); i++) { - uint64_t addr, len; - - if (e820_get_entry(i, E820_RAM, &addr, &len)) { - mem_array[array_count].address = addr; - mem_array[array_count].length = len; - array_count++; - } - } - smbios_get_tables(mem_array, array_count, - &smbios_tables, &smbios_tables_len, - &smbios_anchor, &smbios_anchor_len); - g_free(mem_array); - - if (smbios_anchor) { - fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-tables", - smbios_tables, smbios_tables_len); - fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-anchor", - smbios_anchor, smbios_anchor_len); - } -} - -static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms) -{ - FWCfgState *fw_cfg; - uint64_t *numa_fw_cfg; - int i, j; - - fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, as); - - /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86: - * - * SeaBIOS needs FW_CFG_MAX_CPUS for CPU hotplug, but the CPU hotplug - * QEMU<->SeaBIOS interface is not based on the "CPU index", but on the APIC - * ID of hotplugged CPUs[1]. This means that FW_CFG_MAX_CPUS is not the - * "maximum number of CPUs", but the "limit to the APIC ID values SeaBIOS - * may see". - * - * So, this means we must not use max_cpus, here, but the maximum possible - * APIC ID value, plus one. - * - * [1] The only kind of "CPU identifier" used between SeaBIOS and QEMU is - * the APIC ID, not the "CPU index" - */ - fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)pcms->apic_id_limit); - fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES, - acpi_tables, acpi_tables_len); - fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, kvm_allows_irq0_override()); - - pc_build_smbios(fw_cfg); - - fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE, - &e820_reserve, sizeof(e820_reserve)); - fw_cfg_add_file(fw_cfg, "etc/e820", e820_table, - sizeof(struct e820_entry) * e820_entries); - - fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg)); - /* allocate memory for the NUMA channel: one (64bit) word for the number - * of nodes, one word for each VCPU->node and one word for each node to - * hold the amount of memory. - */ - numa_fw_cfg = g_new0(uint64_t, 1 + pcms->apic_id_limit + nb_numa_nodes); - numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); - for (i = 0; i < max_cpus; i++) { - unsigned int apic_id = x86_cpu_apic_id_from_index(i); - assert(apic_id < pcms->apic_id_limit); - for (j = 0; j < nb_numa_nodes; j++) { - if (test_bit(i, numa_info[j].node_cpu)) { - numa_fw_cfg[apic_id + 1] = cpu_to_le64(j); - break; - } - } - } - for (i = 0; i < nb_numa_nodes; i++) { - numa_fw_cfg[pcms->apic_id_limit + 1 + i] = - cpu_to_le64(numa_info[i].node_mem); - } - fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg, - (1 + pcms->apic_id_limit + nb_numa_nodes) * - sizeof(*numa_fw_cfg)); - - return fw_cfg; -} - -static long get_file_size(FILE *f) -{ - long where, size; - - /* XXX: on Unix systems, using fstat() probably makes more sense */ - - where = ftell(f); - fseek(f, 0, SEEK_END); - size = ftell(f); - fseek(f, where, SEEK_SET); - - return size; -} - -static void load_linux(PCMachineState *pcms, - FWCfgState *fw_cfg) -{ - uint16_t protocol; - int setup_size, kernel_size, initrd_size = 0, cmdline_size; - uint32_t initrd_max; - uint8_t header[8192], *setup, *kernel, *initrd_data; - hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; - FILE *f; - char *vmode; - MachineState *machine = MACHINE(pcms); - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); - const char *kernel_filename = machine->kernel_filename; - const char *initrd_filename = machine->initrd_filename; - const char *kernel_cmdline = machine->kernel_cmdline; - - /* Align to 16 bytes as a paranoia measure */ - cmdline_size = (strlen(kernel_cmdline)+16) & ~15; - - /* load the kernel header */ - f = fopen(kernel_filename, "rb"); - if (!f || !(kernel_size = get_file_size(f)) || - fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != - MIN(ARRAY_SIZE(header), kernel_size)) { - fprintf(stderr, "qemu: could not load kernel '%s': %s\n", - kernel_filename, strerror(errno)); - exit(1); - } - - /* kernel protocol version */ -#if 0 - fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202)); -#endif - if (ldl_p(header+0x202) == 0x53726448) { - protocol = lduw_p(header+0x206); - } else { - /* This looks like a multiboot kernel. If it is, let's stop - treating it like a Linux kernel. */ - if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename, - kernel_cmdline, kernel_size, header)) { - return; - } - protocol = 0; - } - - if (protocol < 0x200 || !(header[0x211] & 0x01)) { - /* Low kernel */ - real_addr = 0x90000; - cmdline_addr = 0x9a000 - cmdline_size; - prot_addr = 0x10000; - } else if (protocol < 0x202) { - /* High but ancient kernel */ - real_addr = 0x90000; - cmdline_addr = 0x9a000 - cmdline_size; - prot_addr = 0x100000; - } else { - /* High and recent kernel */ - real_addr = 0x10000; - cmdline_addr = 0x20000; - prot_addr = 0x100000; - } - -#if 0 - fprintf(stderr, - "qemu: real_addr = 0x" TARGET_FMT_plx "\n" - "qemu: cmdline_addr = 0x" TARGET_FMT_plx "\n" - "qemu: prot_addr = 0x" TARGET_FMT_plx "\n", - real_addr, - cmdline_addr, - prot_addr); -#endif - - /* highest address for loading the initrd */ - if (protocol >= 0x203) { - initrd_max = ldl_p(header+0x22c); - } else { - initrd_max = 0x37ffffff; - } - - if (initrd_max >= pcms->below_4g_mem_size - pcmc->acpi_data_size) { - initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1; - } - - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1); - fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); - - if (protocol >= 0x202) { - stl_p(header+0x228, cmdline_addr); - } else { - stw_p(header+0x20, 0xA33F); - stw_p(header+0x22, cmdline_addr-real_addr); - } - - /* handle vga= parameter */ - vmode = strstr(kernel_cmdline, "vga="); - if (vmode) { - unsigned int video_mode; - /* skip "vga=" */ - vmode += 4; - if (!strncmp(vmode, "normal", 6)) { - video_mode = 0xffff; - } else if (!strncmp(vmode, "ext", 3)) { - video_mode = 0xfffe; - } else if (!strncmp(vmode, "ask", 3)) { - video_mode = 0xfffd; - } else { - video_mode = strtol(vmode, NULL, 0); - } - stw_p(header+0x1fa, video_mode); - } - - /* loader type */ - /* High nybble = B reserved for QEMU; low nybble is revision number. - If this code is substantially changed, you may want to consider - incrementing the revision. */ - if (protocol >= 0x200) { - header[0x210] = 0xB0; - } - /* heap */ - if (protocol >= 0x201) { - header[0x211] |= 0x80; /* CAN_USE_HEAP */ - stw_p(header+0x224, cmdline_addr-real_addr-0x200); - } - - /* load initrd */ - if (initrd_filename) { - if (protocol < 0x200) { - fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); - exit(1); - } - - initrd_size = get_image_size(initrd_filename); - if (initrd_size < 0) { - fprintf(stderr, "qemu: error reading initrd %s: %s\n", - initrd_filename, strerror(errno)); - exit(1); - } - - initrd_addr = (initrd_max-initrd_size) & ~4095; - - initrd_data = g_malloc(initrd_size); - load_image(initrd_filename, initrd_data); - - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); - - stl_p(header+0x218, initrd_addr); - stl_p(header+0x21c, initrd_size); - } - - /* load kernel and setup */ - setup_size = header[0x1f1]; - if (setup_size == 0) { - setup_size = 4; - } - setup_size = (setup_size+1)*512; - if (setup_size > kernel_size) { - fprintf(stderr, "qemu: invalid kernel header\n"); - exit(1); - } - kernel_size -= setup_size; - - setup = g_malloc(setup_size); - kernel = g_malloc(kernel_size); - fseek(f, 0, SEEK_SET); - if (fread(setup, 1, setup_size, f) != setup_size) { - fprintf(stderr, "fread() failed\n"); - exit(1); - } - if (fread(kernel, 1, kernel_size, f) != kernel_size) { - fprintf(stderr, "fread() failed\n"); - exit(1); - } - fclose(f); - memcpy(setup, header, MIN(sizeof(header), setup_size)); - - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); - - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); - - option_rom[nb_option_roms].name = "linuxboot.bin"; - option_rom[nb_option_roms].bootindex = 0; - nb_option_roms++; -} - -#define NE2000_NB_MAX 6 - -static const int ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360, - 0x280, 0x380 }; -static const int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 }; - -void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd) -{ - static int nb_ne2k = 0; - - if (nb_ne2k == NE2000_NB_MAX) - return; - isa_ne2000_init(bus, ne2000_io[nb_ne2k], - ne2000_irq[nb_ne2k], nd); - nb_ne2k++; -} - -DeviceState *cpu_get_current_apic(void) -{ - if (current_cpu) { - X86CPU *cpu = X86_CPU(current_cpu); - return cpu->apic_state; - } else { - return NULL; - } -} - -void pc_acpi_smi_interrupt(void *opaque, int irq, int level) -{ - X86CPU *cpu = opaque; - - if (level) { - cpu_interrupt(CPU(cpu), CPU_INTERRUPT_SMI); - } -} - -static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id, - Error **errp) -{ - X86CPU *cpu = NULL; - Error *local_err = NULL; - - cpu = cpu_x86_create(cpu_model, &local_err); - if (local_err != NULL) { - goto out; - } - - object_property_set_int(OBJECT(cpu), apic_id, "apic-id", &local_err); - object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); - -out: - if (local_err) { - error_propagate(errp, local_err); - object_unref(OBJECT(cpu)); - cpu = NULL; - } - return cpu; -} - -void pc_hot_add_cpu(const int64_t id, Error **errp) -{ - X86CPU *cpu; - MachineState *machine = MACHINE(qdev_get_machine()); - int64_t apic_id = x86_cpu_apic_id_from_index(id); - Error *local_err = NULL; - - if (id < 0) { - error_setg(errp, "Invalid CPU id: %" PRIi64, id); - return; - } - - if (cpu_exists(apic_id)) { - error_setg(errp, "Unable to add CPU: %" PRIi64 - ", it already exists", id); - return; - } - - if (id >= max_cpus) { - error_setg(errp, "Unable to add CPU: %" PRIi64 - ", max allowed: %d", id, max_cpus - 1); - return; - } - - if (apic_id >= ACPI_CPU_HOTPLUG_ID_LIMIT) { - error_setg(errp, "Unable to add CPU: %" PRIi64 - ", resulting APIC ID (%" PRIi64 ") is too large", - id, apic_id); - return; - } - - cpu = pc_new_cpu(machine->cpu_model, apic_id, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - object_unref(OBJECT(cpu)); -} - -void pc_cpus_init(PCMachineState *pcms) -{ - int i; - X86CPU *cpu = NULL; - MachineState *machine = MACHINE(pcms); - - /* init CPUs */ - if (machine->cpu_model == NULL) { -#ifdef TARGET_X86_64 - machine->cpu_model = "qemu64"; -#else - machine->cpu_model = "qemu32"; -#endif - } - - /* Calculates the limit to CPU APIC ID values - * - * Limit for the APIC ID value, so that all - * CPU APIC IDs are < pcms->apic_id_limit. - * - * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init(). - */ - pcms->apic_id_limit = x86_cpu_apic_id_from_index(max_cpus - 1) + 1; - if (pcms->apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) { - error_report("max_cpus is too large. APIC ID of last CPU is %u", - pcms->apic_id_limit - 1); - exit(1); - } - - pcms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + - sizeof(CPUArchId) * max_cpus); - for (i = 0; i < max_cpus; i++) { - pcms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(i); - pcms->possible_cpus->len++; - if (i < smp_cpus) { - cpu = pc_new_cpu(machine->cpu_model, x86_cpu_apic_id_from_index(i), - &error_fatal); - pcms->possible_cpus->cpus[i].cpu = CPU(cpu); - object_unref(OBJECT(cpu)); - } - } - - /* tell smbios about cpuid version and features */ - smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); -} - -/* pci-info ROM file. Little endian format */ -typedef struct PcRomPciInfo { - uint64_t w32_min; - uint64_t w32_max; - uint64_t w64_min; - uint64_t w64_max; -} PcRomPciInfo; - -static -void pc_machine_done(Notifier *notifier, void *data) -{ - PCMachineState *pcms = container_of(notifier, - PCMachineState, machine_done); - PCIBus *bus = pcms->bus; - - if (bus) { - int extra_hosts = 0; - - QLIST_FOREACH(bus, &bus->child, sibling) { - /* look for expander root buses */ - if (pci_bus_is_root(bus)) { - extra_hosts++; - } - } - if (extra_hosts && pcms->fw_cfg) { - uint64_t *val = g_malloc(sizeof(*val)); - *val = cpu_to_le64(extra_hosts); - fw_cfg_add_file(pcms->fw_cfg, - "etc/extra-pci-roots", val, sizeof(*val)); - } - } - - acpi_setup(); -} - -void pc_guest_info_init(PCMachineState *pcms) -{ - int i, j; - - pcms->apic_xrupt_override = kvm_allows_irq0_override(); - pcms->numa_nodes = nb_numa_nodes; - pcms->node_mem = g_malloc0(pcms->numa_nodes * - sizeof *pcms->node_mem); - for (i = 0; i < nb_numa_nodes; i++) { - pcms->node_mem[i] = numa_info[i].node_mem; - } - - pcms->node_cpu = g_malloc0(pcms->apic_id_limit * - sizeof *pcms->node_cpu); - - for (i = 0; i < max_cpus; i++) { - unsigned int apic_id = x86_cpu_apic_id_from_index(i); - assert(apic_id < pcms->apic_id_limit); - for (j = 0; j < nb_numa_nodes; j++) { - if (test_bit(i, numa_info[j].node_cpu)) { - pcms->node_cpu[apic_id] = j; - break; - } - } - } - - pcms->machine_done.notify = pc_machine_done; - qemu_add_machine_init_done_notifier(&pcms->machine_done); -} - -/* setup pci memory address space mapping into system address space */ -void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory, - MemoryRegion *pci_address_space) -{ - /* Set to lower priority than RAM */ - memory_region_add_subregion_overlap(system_memory, 0x0, - pci_address_space, -1); -} - -void pc_acpi_init(const char *default_dsdt) -{ - char *filename; - - if (acpi_tables != NULL) { - /* manually set via -acpitable, leave it alone */ - return; - } - - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, default_dsdt); - if (filename == NULL) { - fprintf(stderr, "WARNING: failed to find %s\n", default_dsdt); - } else { - QemuOpts *opts = qemu_opts_create(qemu_find_opts("acpi"), NULL, 0, - &error_abort); - Error *err = NULL; - - qemu_opt_set(opts, "file", filename, &error_abort); - - acpi_table_add_builtin(opts, &err); - if (err) { - error_reportf_err(err, "WARNING: failed to load %s: ", - filename); - } - g_free(filename); - } -} - -void xen_load_linux(PCMachineState *pcms) -{ - int i; - FWCfgState *fw_cfg; - - assert(MACHINE(pcms)->kernel_filename != NULL); - - fw_cfg = fw_cfg_init_io(FW_CFG_IO_BASE); - rom_set_fw(fw_cfg); - - load_linux(pcms, fw_cfg); - for (i = 0; i < nb_option_roms; i++) { - assert(!strcmp(option_rom[i].name, "linuxboot.bin") || - !strcmp(option_rom[i].name, "multiboot.bin")); - rom_add_option(option_rom[i].name, option_rom[i].bootindex); - } - pcms->fw_cfg = fw_cfg; -} - -void pc_memory_init(PCMachineState *pcms, - MemoryRegion *system_memory, - MemoryRegion *rom_memory, - MemoryRegion **ram_memory) -{ - int linux_boot, i; - MemoryRegion *ram, *option_rom_mr; - MemoryRegion *ram_below_4g, *ram_above_4g; - FWCfgState *fw_cfg; - MachineState *machine = MACHINE(pcms); - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); - - assert(machine->ram_size == pcms->below_4g_mem_size + - pcms->above_4g_mem_size); - - linux_boot = (machine->kernel_filename != NULL); - - /* Allocate RAM. We allocate it as a single memory region and use - * aliases to address portions of it, mostly for backwards compatibility - * with older qemus that used qemu_ram_alloc(). - */ - ram = g_malloc(sizeof(*ram)); - memory_region_allocate_system_memory(ram, NULL, "pc.ram", - machine->ram_size); - *ram_memory = ram; - ram_below_4g = g_malloc(sizeof(*ram_below_4g)); - memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram, - 0, pcms->below_4g_mem_size); - memory_region_add_subregion(system_memory, 0, ram_below_4g); - e820_add_entry(0, pcms->below_4g_mem_size, E820_RAM); - if (pcms->above_4g_mem_size > 0) { - ram_above_4g = g_malloc(sizeof(*ram_above_4g)); - memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram, - pcms->below_4g_mem_size, - pcms->above_4g_mem_size); - memory_region_add_subregion(system_memory, 0x100000000ULL, - ram_above_4g); - e820_add_entry(0x100000000ULL, pcms->above_4g_mem_size, E820_RAM); - } - - if (!pcmc->has_reserved_memory && - (machine->ram_slots || - (machine->maxram_size > machine->ram_size))) { - MachineClass *mc = MACHINE_GET_CLASS(machine); - - error_report("\"-memory 'slots|maxmem'\" is not supported by: %s", - mc->name); - exit(EXIT_FAILURE); - } - - /* initialize hotplug memory address space */ - if (pcmc->has_reserved_memory && - (machine->ram_size < machine->maxram_size)) { - ram_addr_t hotplug_mem_size = - machine->maxram_size - machine->ram_size; - - if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) { - error_report("unsupported amount of memory slots: %"PRIu64, - machine->ram_slots); - exit(EXIT_FAILURE); - } - - if (QEMU_ALIGN_UP(machine->maxram_size, - TARGET_PAGE_SIZE) != machine->maxram_size) { - error_report("maximum memory size must by aligned to multiple of " - "%d bytes", TARGET_PAGE_SIZE); - exit(EXIT_FAILURE); - } - - pcms->hotplug_memory.base = - ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size, 1ULL << 30); - - if (pcmc->enforce_aligned_dimm) { - /* size hotplug region assuming 1G page max alignment per slot */ - hotplug_mem_size += (1ULL << 30) * machine->ram_slots; - } - - if ((pcms->hotplug_memory.base + hotplug_mem_size) < - hotplug_mem_size) { - error_report("unsupported amount of maximum memory: " RAM_ADDR_FMT, - machine->maxram_size); - exit(EXIT_FAILURE); - } - - memory_region_init(&pcms->hotplug_memory.mr, OBJECT(pcms), - "hotplug-memory", hotplug_mem_size); - memory_region_add_subregion(system_memory, pcms->hotplug_memory.base, - &pcms->hotplug_memory.mr); - } - - /* Initialize PC system firmware */ - pc_system_firmware_init(rom_memory, !pcmc->pci_enabled); - - option_rom_mr = g_malloc(sizeof(*option_rom_mr)); - memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, - &error_fatal); - vmstate_register_ram_global(option_rom_mr); - memory_region_add_subregion_overlap(rom_memory, - PC_ROM_MIN_VGA, - option_rom_mr, - 1); - - fw_cfg = bochs_bios_init(&address_space_memory, pcms); - - rom_set_fw(fw_cfg); - - if (pcmc->has_reserved_memory && pcms->hotplug_memory.base) { - uint64_t *val = g_malloc(sizeof(*val)); - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); - uint64_t res_mem_end = pcms->hotplug_memory.base; - - if (!pcmc->broken_reserved_end) { - res_mem_end += memory_region_size(&pcms->hotplug_memory.mr); - } - *val = cpu_to_le64(ROUND_UP(res_mem_end, 0x1ULL << 30)); - fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val)); - } - - if (linux_boot) { - load_linux(pcms, fw_cfg); - } - - for (i = 0; i < nb_option_roms; i++) { - rom_add_option(option_rom[i].name, option_rom[i].bootindex); - } - pcms->fw_cfg = fw_cfg; -} - -qemu_irq pc_allocate_cpu_irq(void) -{ - return qemu_allocate_irq(pic_irq_request, NULL, 0); -} - -DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus) -{ - DeviceState *dev = NULL; - - rom_set_order_override(FW_CFG_ORDER_OVERRIDE_VGA); - if (pci_bus) { - PCIDevice *pcidev = pci_vga_init(pci_bus); - dev = pcidev ? &pcidev->qdev : NULL; - } else if (isa_bus) { - ISADevice *isadev = isa_vga_init(isa_bus); - dev = isadev ? DEVICE(isadev) : NULL; - } - rom_reset_order_override(); - return dev; -} - -static const MemoryRegionOps ioport80_io_ops = { - .write = ioport80_write, - .read = ioport80_read, - .endianness = DEVICE_NATIVE_ENDIAN, - .impl = { - .min_access_size = 1, - .max_access_size = 1, - }, -}; - -static const MemoryRegionOps ioportF0_io_ops = { - .write = ioportF0_write, - .read = ioportF0_read, - .endianness = DEVICE_NATIVE_ENDIAN, - .impl = { - .min_access_size = 1, - .max_access_size = 1, - }, -}; - -void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, - ISADevice **rtc_state, - bool create_fdctrl, - bool no_vmport, - uint32_t hpet_irqs) -{ - int i; - DriveInfo *fd[MAX_FD]; - DeviceState *hpet = NULL; - int pit_isa_irq = 0; - qemu_irq pit_alt_irq = NULL; - qemu_irq rtc_irq = NULL; - qemu_irq *a20_line; - ISADevice *i8042, *port92, *vmmouse, *pit = NULL; - MemoryRegion *ioport80_io = g_new(MemoryRegion, 1); - MemoryRegion *ioportF0_io = g_new(MemoryRegion, 1); - - memory_region_init_io(ioport80_io, NULL, &ioport80_io_ops, NULL, "ioport80", 1); - memory_region_add_subregion(isa_bus->address_space_io, 0x80, ioport80_io); - - memory_region_init_io(ioportF0_io, NULL, &ioportF0_io_ops, NULL, "ioportF0", 1); - memory_region_add_subregion(isa_bus->address_space_io, 0xf0, ioportF0_io); - - /* - * Check if an HPET shall be created. - * - * Without KVM_CAP_PIT_STATE2, we cannot switch off the in-kernel PIT - * when the HPET wants to take over. Thus we have to disable the latter. - */ - if (!no_hpet && (!kvm_irqchip_in_kernel() || kvm_has_pit_state2())) { - /* In order to set property, here not using sysbus_try_create_simple */ - hpet = qdev_try_create(NULL, TYPE_HPET); - if (hpet) { - /* For pc-piix-*, hpet's intcap is always IRQ2. For pc-q35-1.7 - * and earlier, use IRQ2 for compat. Otherwise, use IRQ16~23, - * IRQ8 and IRQ2. - */ - uint8_t compat = object_property_get_int(OBJECT(hpet), - HPET_INTCAP, NULL); - if (!compat) { - qdev_prop_set_uint32(hpet, HPET_INTCAP, hpet_irqs); - } - qdev_init_nofail(hpet); - sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE); - - for (i = 0; i < GSI_NUM_PINS; i++) { - sysbus_connect_irq(SYS_BUS_DEVICE(hpet), i, gsi[i]); - } - pit_isa_irq = -1; - pit_alt_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_PIT_INT); - rtc_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_RTC_INT); - } - } - *rtc_state = rtc_init(isa_bus, 2000, rtc_irq); - - qemu_register_boot_set(pc_boot_set, *rtc_state); - - if (!xen_enabled()) { - if (kvm_pit_in_kernel()) { - pit = kvm_pit_init(isa_bus, 0x40); - } else { - pit = pit_init(isa_bus, 0x40, pit_isa_irq, pit_alt_irq); - } - if (hpet) { - /* connect PIT to output control line of the HPET */ - qdev_connect_gpio_out(hpet, 0, qdev_get_gpio_in(DEVICE(pit), 0)); - } - pcspk_init(isa_bus, pit); - } - - serial_hds_isa_init(isa_bus, MAX_SERIAL_PORTS); - parallel_hds_isa_init(isa_bus, MAX_PARALLEL_PORTS); - - a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 2); - i8042 = isa_create_simple(isa_bus, "i8042"); - i8042_setup_a20_line(i8042, &a20_line[0]); - if (!no_vmport) { - vmport_init(isa_bus); - vmmouse = isa_try_create(isa_bus, "vmmouse"); - } else { - vmmouse = NULL; - } - if (vmmouse) { - DeviceState *dev = DEVICE(vmmouse); - qdev_prop_set_ptr(dev, "ps2_mouse", i8042); - qdev_init_nofail(dev); - } - port92 = isa_create_simple(isa_bus, "port92"); - port92_init(port92, &a20_line[1]); - - DMA_init(isa_bus, 0); - - for(i = 0; i < MAX_FD; i++) { - fd[i] = drive_get(IF_FLOPPY, 0, i); - create_fdctrl |= !!fd[i]; - } - if (create_fdctrl) { - fdctrl_init_isa(isa_bus, fd); - } -} - -void pc_nic_init(ISABus *isa_bus, PCIBus *pci_bus) -{ - int i; - - rom_set_order_override(FW_CFG_ORDER_OVERRIDE_NIC); - for (i = 0; i < nb_nics; i++) { - NICInfo *nd = &nd_table[i]; - - if (!pci_bus || (nd->model && strcmp(nd->model, "ne2k_isa") == 0)) { - pc_init_ne2k_isa(isa_bus, nd); - } else { - pci_nic_init_nofail(nd, pci_bus, "e1000", NULL); - } - } - rom_reset_order_override(); -} - -void pc_pci_device_init(PCIBus *pci_bus) -{ - int max_bus; - int bus; - - max_bus = drive_get_max_bus(IF_SCSI); - for (bus = 0; bus <= max_bus; bus++) { - pci_create_simple(pci_bus, -1, "lsi53c895a"); - } -} - -void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name) -{ - DeviceState *dev; - SysBusDevice *d; - unsigned int i; - - if (kvm_ioapic_in_kernel()) { - dev = qdev_create(NULL, "kvm-ioapic"); - } else { - dev = qdev_create(NULL, "ioapic"); - } - if (parent_name) { - object_property_add_child(object_resolve_path(parent_name, NULL), - "ioapic", OBJECT(dev), NULL); - } - qdev_init_nofail(dev); - d = SYS_BUS_DEVICE(dev); - sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS); - - for (i = 0; i < IOAPIC_NUM_PINS; i++) { - gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); - } -} - -static void pc_dimm_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - HotplugHandlerClass *hhc; - Error *local_err = NULL; - PCMachineState *pcms = PC_MACHINE(hotplug_dev); - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); - PCDIMMDevice *dimm = PC_DIMM(dev); - PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); - MemoryRegion *mr = ddc->get_memory_region(dimm); - uint64_t align = TARGET_PAGE_SIZE; - - if (memory_region_get_alignment(mr) && pcmc->enforce_aligned_dimm) { - align = memory_region_get_alignment(mr); - } - - if (!pcms->acpi_dev) { - error_setg(&local_err, - "memory hotplug is not enabled: missing acpi device"); - goto out; - } - - pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err); - if (local_err) { - goto out; - } - - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort); -out: - error_propagate(errp, local_err); -} - -static void pc_dimm_unplug_request(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - HotplugHandlerClass *hhc; - Error *local_err = NULL; - PCMachineState *pcms = PC_MACHINE(hotplug_dev); - - if (!pcms->acpi_dev) { - error_setg(&local_err, - "memory hotplug is not enabled: missing acpi device"); - goto out; - } - - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); - -out: - error_propagate(errp, local_err); -} - -static void pc_dimm_unplug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(hotplug_dev); - PCDIMMDevice *dimm = PC_DIMM(dev); - PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); - MemoryRegion *mr = ddc->get_memory_region(dimm); - HotplugHandlerClass *hhc; - Error *local_err = NULL; - - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); - - if (local_err) { - goto out; - } - - pc_dimm_memory_unplug(dev, &pcms->hotplug_memory, mr); - object_unparent(OBJECT(dev)); - - out: - error_propagate(errp, local_err); -} - -static int pc_apic_cmp(const void *a, const void *b) -{ - CPUArchId *apic_a = (CPUArchId *)a; - CPUArchId *apic_b = (CPUArchId *)b; - - return apic_a->arch_id - apic_b->arch_id; -} - -static void pc_cpu_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - CPUClass *cc = CPU_GET_CLASS(dev); - CPUArchId apic_id, *found_cpu; - HotplugHandlerClass *hhc; - Error *local_err = NULL; - PCMachineState *pcms = PC_MACHINE(hotplug_dev); - - if (!dev->hotplugged) { - goto out; - } - - if (!pcms->acpi_dev) { - error_setg(&local_err, - "cpu hotplug is not enabled: missing acpi device"); - goto out; - } - - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); - if (local_err) { - goto out; - } - - /* increment the number of CPUs */ - rtc_set_memory(pcms->rtc, 0x5f, rtc_get_memory(pcms->rtc, 0x5f) + 1); - - apic_id.arch_id = cc->get_arch_id(CPU(dev)); - found_cpu = bsearch(&apic_id, pcms->possible_cpus->cpus, - pcms->possible_cpus->len, sizeof(*pcms->possible_cpus->cpus), - pc_apic_cmp); - assert(found_cpu); - found_cpu->cpu = CPU(dev); -out: - error_propagate(errp, local_err); -} - -static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { - pc_dimm_plug(hotplug_dev, dev, errp); - } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { - pc_cpu_plug(hotplug_dev, dev, errp); - } -} - -static void pc_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { - pc_dimm_unplug_request(hotplug_dev, dev, errp); - } else { - error_setg(errp, "acpi: device unplug request for not supported device" - " type: %s", object_get_typename(OBJECT(dev))); - } -} - -static void pc_machine_device_unplug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { - pc_dimm_unplug(hotplug_dev, dev, errp); - } else { - error_setg(errp, "acpi: device unplug for not supported device" - " type: %s", object_get_typename(OBJECT(dev))); - } -} - -static HotplugHandler *pc_get_hotpug_handler(MachineState *machine, - DeviceState *dev) -{ - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine); - - if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || - object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { - return HOTPLUG_HANDLER(machine); - } - - return pcmc->get_hotplug_handler ? - pcmc->get_hotplug_handler(machine, dev) : NULL; -} - -static void -pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - int64_t value = memory_region_size(&pcms->hotplug_memory.mr); - - visit_type_int(v, name, &value, errp); -} - -static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - uint64_t value = pcms->max_ram_below_4g; - - visit_type_size(v, name, &value, errp); -} - -static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - Error *error = NULL; - uint64_t value; - - visit_type_size(v, name, &value, &error); - if (error) { - error_propagate(errp, error); - return; - } - if (value > (1ULL << 32)) { - error_setg(&error, - "Machine option 'max-ram-below-4g=%"PRIu64 - "' expects size less than or equal to 4G", value); - error_propagate(errp, error); - return; - } - - if (value < (1ULL << 20)) { - error_report("Warning: small max_ram_below_4g(%"PRIu64 - ") less than 1M. BIOS may not work..", - value); - } - - pcms->max_ram_below_4g = value; -} - -static void pc_machine_get_vmport(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - OnOffAuto vmport = pcms->vmport; - - visit_type_OnOffAuto(v, name, &vmport, errp); -} - -static void pc_machine_set_vmport(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - - visit_type_OnOffAuto(v, name, &pcms->vmport, errp); -} - -bool pc_machine_is_smm_enabled(PCMachineState *pcms) -{ - bool smm_available = false; - - if (pcms->smm == ON_OFF_AUTO_OFF) { - return false; - } - - if (tcg_enabled() || qtest_enabled()) { - smm_available = true; - } else if (kvm_enabled()) { - smm_available = kvm_has_smm(); - } - - if (smm_available) { - return true; - } - - if (pcms->smm == ON_OFF_AUTO_ON) { - error_report("System Management Mode not supported by this hypervisor."); - exit(1); - } - return false; -} - -static void pc_machine_get_smm(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - OnOffAuto smm = pcms->smm; - - visit_type_OnOffAuto(v, name, &smm, errp); -} - -static void pc_machine_set_smm(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - - visit_type_OnOffAuto(v, name, &pcms->smm, errp); -} - -static bool pc_machine_get_nvdimm(Object *obj, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - - return pcms->acpi_nvdimm_state.is_enabled; -} - -static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp) -{ - PCMachineState *pcms = PC_MACHINE(obj); - - pcms->acpi_nvdimm_state.is_enabled = value; -} - -static void pc_machine_initfn(Object *obj) -{ - PCMachineState *pcms = PC_MACHINE(obj); - - object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int", - pc_machine_get_hotplug_memory_region_size, - NULL, NULL, NULL, &error_abort); - - pcms->max_ram_below_4g = 1ULL << 32; /* 4G */ - object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G, "size", - pc_machine_get_max_ram_below_4g, - pc_machine_set_max_ram_below_4g, - NULL, NULL, &error_abort); - object_property_set_description(obj, PC_MACHINE_MAX_RAM_BELOW_4G, - "Maximum ram below the 4G boundary (32bit boundary)", - &error_abort); - - pcms->smm = ON_OFF_AUTO_AUTO; - object_property_add(obj, PC_MACHINE_SMM, "OnOffAuto", - pc_machine_get_smm, - pc_machine_set_smm, - NULL, NULL, &error_abort); - object_property_set_description(obj, PC_MACHINE_SMM, - "Enable SMM (pc & q35)", - &error_abort); - - pcms->vmport = ON_OFF_AUTO_AUTO; - object_property_add(obj, PC_MACHINE_VMPORT, "OnOffAuto", - pc_machine_get_vmport, - pc_machine_set_vmport, - NULL, NULL, &error_abort); - object_property_set_description(obj, PC_MACHINE_VMPORT, - "Enable vmport (pc & q35)", - &error_abort); - - /* nvdimm is disabled on default. */ - pcms->acpi_nvdimm_state.is_enabled = false; - object_property_add_bool(obj, PC_MACHINE_NVDIMM, pc_machine_get_nvdimm, - pc_machine_set_nvdimm, &error_abort); -} - -static void pc_machine_reset(void) -{ - CPUState *cs; - X86CPU *cpu; - - qemu_devices_reset(); - - /* Reset APIC after devices have been reset to cancel - * any changes that qemu_devices_reset() might have done. - */ - CPU_FOREACH(cs) { - cpu = X86_CPU(cs); - - if (cpu->apic_state) { - device_reset(cpu->apic_state); - } - } -} - -static unsigned pc_cpu_index_to_socket_id(unsigned cpu_index) -{ - X86CPUTopoInfo topo; - x86_topo_ids_from_idx(smp_cores, smp_threads, cpu_index, - &topo); - return topo.pkg_id; -} - -static CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *machine) -{ - PCMachineState *pcms = PC_MACHINE(machine); - int len = sizeof(CPUArchIdList) + - sizeof(CPUArchId) * (pcms->possible_cpus->len); - CPUArchIdList *list = g_malloc(len); - - memcpy(list, pcms->possible_cpus, len); - return list; -} - -static void pc_machine_class_init(ObjectClass *oc, void *data) -{ - MachineClass *mc = MACHINE_CLASS(oc); - PCMachineClass *pcmc = PC_MACHINE_CLASS(oc); - HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); - - pcmc->get_hotplug_handler = mc->get_hotplug_handler; - pcmc->pci_enabled = true; - pcmc->has_acpi_build = true; - pcmc->rsdp_in_ram = true; - pcmc->smbios_defaults = true; - pcmc->smbios_uuid_encoded = true; - pcmc->gigabyte_align = true; - pcmc->has_reserved_memory = true; - pcmc->kvmclock_enabled = true; - pcmc->enforce_aligned_dimm = true; - /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported - * to be used at the moment, 32K should be enough for a while. */ - pcmc->acpi_data_size = 0x20000 + 0x8000; - pcmc->save_tsc_khz = true; - mc->get_hotplug_handler = pc_get_hotpug_handler; - mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id; - mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids; - mc->default_boot_order = "cad"; - mc->hot_add_cpu = pc_hot_add_cpu; - mc->max_cpus = 255; - mc->reset = pc_machine_reset; - hc->plug = pc_machine_device_plug_cb; - hc->unplug_request = pc_machine_device_unplug_request_cb; - hc->unplug = pc_machine_device_unplug_cb; -} - -static const TypeInfo pc_machine_info = { - .name = TYPE_PC_MACHINE, - .parent = TYPE_MACHINE, - .abstract = true, - .instance_size = sizeof(PCMachineState), - .instance_init = pc_machine_initfn, - .class_size = sizeof(PCMachineClass), - .class_init = pc_machine_class_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_HOTPLUG_HANDLER }, - { } - }, -}; - -static void pc_machine_register_types(void) -{ - type_register_static(&pc_machine_info); -} - -type_init(pc_machine_register_types) diff --git a/qemu/hw/i386/pc_piix.c b/qemu/hw/i386/pc_piix.c deleted file mode 100644 index 7f50116bc..000000000 --- a/qemu/hw/i386/pc_piix.c +++ /dev/null @@ -1,1062 +0,0 @@ -/* - * QEMU PC System Emulator - * - * Copyright (c) 2003-2004 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include <glib.h> - -#include "hw/hw.h" -#include "hw/loader.h" -#include "hw/i386/pc.h" -#include "hw/i386/apic.h" -#include "hw/smbios/smbios.h" -#include "hw/pci/pci.h" -#include "hw/pci/pci_ids.h" -#include "hw/usb.h" -#include "net/net.h" -#include "hw/boards.h" -#include "hw/ide.h" -#include "sysemu/kvm.h" -#include "hw/kvm/clock.h" -#include "sysemu/sysemu.h" -#include "hw/sysbus.h" -#include "sysemu/arch_init.h" -#include "sysemu/block-backend.h" -#include "hw/i2c/smbus.h" -#include "hw/xen/xen.h" -#include "exec/memory.h" -#include "exec/address-spaces.h" -#include "hw/acpi/acpi.h" -#include "cpu.h" -#include "qemu/error-report.h" -#ifdef CONFIG_XEN -#include <xen/hvm/hvm_info_table.h> -#include "hw/xen/xen_pt.h" -#endif -#include "migration/migration.h" -#include "kvm_i386.h" - -#define MAX_IDE_BUS 2 - -static const int ide_iobase[MAX_IDE_BUS] = { 0x1f0, 0x170 }; -static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 }; -static const int ide_irq[MAX_IDE_BUS] = { 14, 15 }; - -/* PC hardware initialisation */ -static void pc_init1(MachineState *machine, - const char *host_type, const char *pci_type) -{ - PCMachineState *pcms = PC_MACHINE(machine); - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); - MemoryRegion *system_memory = get_system_memory(); - MemoryRegion *system_io = get_system_io(); - int i; - PCIBus *pci_bus; - ISABus *isa_bus; - PCII440FXState *i440fx_state; - int piix3_devfn = -1; - qemu_irq *gsi; - qemu_irq *i8259; - qemu_irq smi_irq; - GSIState *gsi_state; - DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS]; - BusState *idebus[MAX_IDE_BUS]; - ISADevice *rtc_state; - MemoryRegion *ram_memory; - MemoryRegion *pci_memory; - MemoryRegion *rom_memory; - ram_addr_t lowmem; - - /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory). - * If it doesn't, we need to split it in chunks below and above 4G. - * In any case, try to make sure that guest addresses aligned at - * 1G boundaries get mapped to host addresses aligned at 1G boundaries. - * For old machine types, use whatever split we used historically to avoid - * breaking migration. - */ - if (machine->ram_size >= 0xe0000000) { - lowmem = pcmc->gigabyte_align ? 0xc0000000 : 0xe0000000; - } else { - lowmem = 0xe0000000; - } - - /* Handle the machine opt max-ram-below-4g. It is basically doing - * min(qemu limit, user limit). - */ - if (lowmem > pcms->max_ram_below_4g) { - lowmem = pcms->max_ram_below_4g; - if (machine->ram_size - lowmem > lowmem && - lowmem & ((1ULL << 30) - 1)) { - error_report("Warning: Large machine and max_ram_below_4g(%"PRIu64 - ") not a multiple of 1G; possible bad performance.", - pcms->max_ram_below_4g); - } - } - - if (machine->ram_size >= lowmem) { - pcms->above_4g_mem_size = machine->ram_size - lowmem; - pcms->below_4g_mem_size = lowmem; - } else { - pcms->above_4g_mem_size = 0; - pcms->below_4g_mem_size = machine->ram_size; - } - - if (xen_enabled()) { - xen_hvm_init(pcms, &ram_memory); - } - - pc_cpus_init(pcms); - - if (kvm_enabled() && pcmc->kvmclock_enabled) { - kvmclock_create(); - } - - if (pcmc->pci_enabled) { - pci_memory = g_new(MemoryRegion, 1); - memory_region_init(pci_memory, NULL, "pci", UINT64_MAX); - rom_memory = pci_memory; - } else { - pci_memory = NULL; - rom_memory = system_memory; - } - - pc_guest_info_init(pcms); - - if (pcmc->smbios_defaults) { - MachineClass *mc = MACHINE_GET_CLASS(machine); - /* These values are guest ABI, do not change */ - smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", - mc->name, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, - SMBIOS_ENTRY_POINT_21); - } - - /* allocate ram and load rom/bios */ - if (!xen_enabled()) { - pc_memory_init(pcms, system_memory, - rom_memory, &ram_memory); - } else if (machine->kernel_filename != NULL) { - /* For xen HVM direct kernel boot, load linux here */ - xen_load_linux(pcms); - } - - gsi_state = g_malloc0(sizeof(*gsi_state)); - if (kvm_ioapic_in_kernel()) { - kvm_pc_setup_irq_routing(pcmc->pci_enabled); - gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, - GSI_NUM_PINS); - } else { - gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); - } - - if (pcmc->pci_enabled) { - pci_bus = i440fx_init(host_type, - pci_type, - &i440fx_state, &piix3_devfn, &isa_bus, gsi, - system_memory, system_io, machine->ram_size, - pcms->below_4g_mem_size, - pcms->above_4g_mem_size, - pci_memory, ram_memory); - pcms->bus = pci_bus; - } else { - pci_bus = NULL; - i440fx_state = NULL; - isa_bus = isa_bus_new(NULL, get_system_memory(), system_io, - &error_abort); - no_hpet = 1; - } - isa_bus_irqs(isa_bus, gsi); - - if (kvm_pic_in_kernel()) { - i8259 = kvm_i8259_init(isa_bus); - } else if (xen_enabled()) { - i8259 = xen_interrupt_controller_init(); - } else { - i8259 = i8259_init(isa_bus, pc_allocate_cpu_irq()); - } - - for (i = 0; i < ISA_NUM_IRQS; i++) { - gsi_state->i8259_irq[i] = i8259[i]; - } - g_free(i8259); - if (pcmc->pci_enabled) { - ioapic_init_gsi(gsi_state, "i440fx"); - } - - pc_register_ferr_irq(gsi[13]); - - pc_vga_init(isa_bus, pcmc->pci_enabled ? pci_bus : NULL); - - assert(pcms->vmport != ON_OFF_AUTO__MAX); - if (pcms->vmport == ON_OFF_AUTO_AUTO) { - pcms->vmport = xen_enabled() ? ON_OFF_AUTO_OFF : ON_OFF_AUTO_ON; - } - - /* init basic PC hardware */ - pc_basic_device_init(isa_bus, gsi, &rtc_state, true, - (pcms->vmport != ON_OFF_AUTO_ON), 0x4); - - pc_nic_init(isa_bus, pci_bus); - - ide_drive_get(hd, ARRAY_SIZE(hd)); - if (pcmc->pci_enabled) { - PCIDevice *dev; - if (xen_enabled()) { - dev = pci_piix3_xen_ide_init(pci_bus, hd, piix3_devfn + 1); - } else { - dev = pci_piix3_ide_init(pci_bus, hd, piix3_devfn + 1); - } - idebus[0] = qdev_get_child_bus(&dev->qdev, "ide.0"); - idebus[1] = qdev_get_child_bus(&dev->qdev, "ide.1"); - } else { - for(i = 0; i < MAX_IDE_BUS; i++) { - ISADevice *dev; - char busname[] = "ide.0"; - dev = isa_ide_init(isa_bus, ide_iobase[i], ide_iobase2[i], - ide_irq[i], - hd[MAX_IDE_DEVS * i], hd[MAX_IDE_DEVS * i + 1]); - /* - * The ide bus name is ide.0 for the first bus and ide.1 for the - * second one. - */ - busname[4] = '0' + i; - idebus[i] = qdev_get_child_bus(DEVICE(dev), busname); - } - } - - pc_cmos_init(pcms, idebus[0], idebus[1], rtc_state); - - if (pcmc->pci_enabled && usb_enabled()) { - pci_create_simple(pci_bus, piix3_devfn + 2, "piix3-usb-uhci"); - } - - if (pcmc->pci_enabled && acpi_enabled) { - DeviceState *piix4_pm; - I2CBus *smbus; - - smi_irq = qemu_allocate_irq(pc_acpi_smi_interrupt, first_cpu, 0); - /* TODO: Populate SPD eeprom data. */ - smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100, - gsi[9], smi_irq, - pc_machine_is_smm_enabled(pcms), - &piix4_pm); - smbus_eeprom_init(smbus, 8, NULL, 0); - - object_property_add_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP, - TYPE_HOTPLUG_HANDLER, - (Object **)&pcms->acpi_dev, - object_property_allow_set_link, - OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort); - object_property_set_link(OBJECT(machine), OBJECT(piix4_pm), - PC_MACHINE_ACPI_DEVICE_PROP, &error_abort); - } - - if (pcmc->pci_enabled) { - pc_pci_device_init(pci_bus); - } - - if (pcms->acpi_nvdimm_state.is_enabled) { - nvdimm_init_acpi_state(&pcms->acpi_nvdimm_state, system_io, - pcms->fw_cfg, OBJECT(pcms)); - } -} - -/* Looking for a pc_compat_2_4() function? It doesn't exist. - * pc_compat_*() functions that run on machine-init time and - * change global QEMU state are deprecated. Please don't create - * one, and implement any pc-*-2.4 (and newer) compat code in - * HW_COMPAT_*, PC_COMPAT_*, or * pc_*_machine_options(). - */ - -static void pc_compat_2_3(MachineState *machine) -{ - PCMachineState *pcms = PC_MACHINE(machine); - savevm_skip_section_footers(); - if (kvm_enabled()) { - pcms->smm = ON_OFF_AUTO_OFF; - } - global_state_set_optional(); - savevm_skip_configuration(); -} - -static void pc_compat_2_2(MachineState *machine) -{ - pc_compat_2_3(machine); - machine->suppress_vmdesc = true; -} - -static void pc_compat_2_1(MachineState *machine) -{ - pc_compat_2_2(machine); - x86_cpu_change_kvm_default("svm", NULL); -} - -static void pc_compat_2_0(MachineState *machine) -{ - pc_compat_2_1(machine); -} - -static void pc_compat_1_7(MachineState *machine) -{ - pc_compat_2_0(machine); - x86_cpu_change_kvm_default("x2apic", NULL); -} - -static void pc_compat_1_6(MachineState *machine) -{ - pc_compat_1_7(machine); -} - -static void pc_compat_1_5(MachineState *machine) -{ - pc_compat_1_6(machine); -} - -static void pc_compat_1_4(MachineState *machine) -{ - pc_compat_1_5(machine); -} - -static void pc_compat_1_3(MachineState *machine) -{ - pc_compat_1_4(machine); - enable_compat_apic_id_mode(); -} - -/* PC compat function for pc-0.14 to pc-1.2 */ -static void pc_compat_1_2(MachineState *machine) -{ - pc_compat_1_3(machine); - x86_cpu_change_kvm_default("kvm-pv-eoi", NULL); -} - -/* PC compat function for pc-0.10 to pc-0.13 */ -static void pc_compat_0_13(MachineState *machine) -{ - pc_compat_1_2(machine); -} - -static void pc_init_isa(MachineState *machine) -{ - if (!machine->cpu_model) { - machine->cpu_model = "486"; - } - x86_cpu_change_kvm_default("kvm-pv-eoi", NULL); - enable_compat_apic_id_mode(); - pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, TYPE_I440FX_PCI_DEVICE); -} - -#ifdef CONFIG_XEN -static void pc_xen_hvm_init_pci(MachineState *machine) -{ - const char *pci_type = has_igd_gfx_passthru ? - TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE : TYPE_I440FX_PCI_DEVICE; - - pc_init1(machine, - TYPE_I440FX_PCI_HOST_BRIDGE, - pci_type); -} - -static void pc_xen_hvm_init(MachineState *machine) -{ - PCIBus *bus; - - if (!xen_enabled()) { - error_report("xenfv machine requires the xen accelerator"); - exit(1); - } - - pc_xen_hvm_init_pci(machine); - - bus = pci_find_primary_bus(); - if (bus != NULL) { - pci_create_simple(bus, -1, "xen-platform"); - } -} -#endif - -#define DEFINE_I440FX_MACHINE(suffix, name, compatfn, optionfn) \ - static void pc_init_##suffix(MachineState *machine) \ - { \ - void (*compat)(MachineState *m) = (compatfn); \ - if (compat) { \ - compat(machine); \ - } \ - pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ - TYPE_I440FX_PCI_DEVICE); \ - } \ - DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) - -static void pc_i440fx_machine_options(MachineClass *m) -{ - m->family = "pc_piix"; - m->desc = "Standard PC (i440FX + PIIX, 1996)"; - m->hot_add_cpu = pc_hot_add_cpu; - m->default_machine_opts = "firmware=bios-256k.bin"; - m->default_display = "std"; -} - -static void pc_i440fx_2_6_machine_options(MachineClass *m) -{ - pc_i440fx_machine_options(m); - m->alias = "pc"; - m->is_default = 1; -} - -DEFINE_I440FX_MACHINE(v2_6, "pc-i440fx-2.6", NULL, - pc_i440fx_2_6_machine_options); - - -static void pc_i440fx_2_5_machine_options(MachineClass *m) -{ - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_i440fx_2_6_machine_options(m); - m->alias = NULL; - m->is_default = 0; - pcmc->save_tsc_khz = false; - m->legacy_fw_cfg_order = 1; - SET_MACHINE_COMPAT(m, PC_COMPAT_2_5); -} - -DEFINE_I440FX_MACHINE(v2_5, "pc-i440fx-2.5", NULL, - pc_i440fx_2_5_machine_options); - - -static void pc_i440fx_2_4_machine_options(MachineClass *m) -{ - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_i440fx_2_5_machine_options(m); - m->hw_version = "2.4.0"; - pcmc->broken_reserved_end = true; - SET_MACHINE_COMPAT(m, PC_COMPAT_2_4); -} - -DEFINE_I440FX_MACHINE(v2_4, "pc-i440fx-2.4", NULL, - pc_i440fx_2_4_machine_options) - - -static void pc_i440fx_2_3_machine_options(MachineClass *m) -{ - pc_i440fx_2_4_machine_options(m); - m->hw_version = "2.3.0"; - SET_MACHINE_COMPAT(m, PC_COMPAT_2_3); -} - -DEFINE_I440FX_MACHINE(v2_3, "pc-i440fx-2.3", pc_compat_2_3, - pc_i440fx_2_3_machine_options); - - -static void pc_i440fx_2_2_machine_options(MachineClass *m) -{ - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_i440fx_2_3_machine_options(m); - m->hw_version = "2.2.0"; - SET_MACHINE_COMPAT(m, PC_COMPAT_2_2); - pcmc->rsdp_in_ram = false; -} - -DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2, - pc_i440fx_2_2_machine_options); - - -static void pc_i440fx_2_1_machine_options(MachineClass *m) -{ - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_i440fx_2_2_machine_options(m); - m->hw_version = "2.1.0"; - m->default_display = NULL; - SET_MACHINE_COMPAT(m, PC_COMPAT_2_1); - pcmc->smbios_uuid_encoded = false; - pcmc->enforce_aligned_dimm = false; -} - -DEFINE_I440FX_MACHINE(v2_1, "pc-i440fx-2.1", pc_compat_2_1, - pc_i440fx_2_1_machine_options); - - - -static void pc_i440fx_2_0_machine_options(MachineClass *m) -{ - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_i440fx_2_1_machine_options(m); - m->hw_version = "2.0.0"; - SET_MACHINE_COMPAT(m, PC_COMPAT_2_0); - pcmc->smbios_legacy_mode = true; - pcmc->has_reserved_memory = false; - /* This value depends on the actual DSDT and SSDT compiled into - * the source QEMU; unfortunately it depends on the binary and - * not on the machine type, so we cannot make pc-i440fx-1.7 work on - * both QEMU 1.7 and QEMU 2.0. - * - * Large variations cause migration to fail for more than one - * consecutive value of the "-smp" maxcpus option. - * - * For small variations of the kind caused by different iasl versions, - * the 4k rounding usually leaves slack. However, there could be still - * one or two values that break. For QEMU 1.7 and QEMU 2.0 the - * slack is only ~10 bytes before one "-smp maxcpus" value breaks! - * - * 6652 is valid for QEMU 2.0, the right value for pc-i440fx-1.7 on - * QEMU 1.7 it is 6414. For RHEL/CentOS 7.0 it is 6418. - */ - pcmc->legacy_acpi_table_size = 6652; - pcmc->acpi_data_size = 0x10000; -} - -DEFINE_I440FX_MACHINE(v2_0, "pc-i440fx-2.0", pc_compat_2_0, - pc_i440fx_2_0_machine_options); - - -static void pc_i440fx_1_7_machine_options(MachineClass *m) -{ - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_i440fx_2_0_machine_options(m); - m->hw_version = "1.7.0"; - m->default_machine_opts = NULL; - m->option_rom_has_mr = true; - SET_MACHINE_COMPAT(m, PC_COMPAT_1_7); - pcmc->smbios_defaults = false; - pcmc->gigabyte_align = false; - pcmc->legacy_acpi_table_size = 6414; -} - -DEFINE_I440FX_MACHINE(v1_7, "pc-i440fx-1.7", pc_compat_1_7, - pc_i440fx_1_7_machine_options); - - -static void pc_i440fx_1_6_machine_options(MachineClass *m) -{ - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_i440fx_1_7_machine_options(m); - m->hw_version = "1.6.0"; - m->rom_file_has_mr = false; - SET_MACHINE_COMPAT(m, PC_COMPAT_1_6); - pcmc->has_acpi_build = false; -} - -DEFINE_I440FX_MACHINE(v1_6, "pc-i440fx-1.6", pc_compat_1_6, - pc_i440fx_1_6_machine_options); - - -static void pc_i440fx_1_5_machine_options(MachineClass *m) -{ - pc_i440fx_1_6_machine_options(m); - m->hw_version = "1.5.0"; - SET_MACHINE_COMPAT(m, PC_COMPAT_1_5); -} - -DEFINE_I440FX_MACHINE(v1_5, "pc-i440fx-1.5", pc_compat_1_5, - pc_i440fx_1_5_machine_options); - - -static void pc_i440fx_1_4_machine_options(MachineClass *m) -{ - pc_i440fx_1_5_machine_options(m); - m->hw_version = "1.4.0"; - m->hot_add_cpu = NULL; - SET_MACHINE_COMPAT(m, PC_COMPAT_1_4); -} - -DEFINE_I440FX_MACHINE(v1_4, "pc-i440fx-1.4", pc_compat_1_4, - pc_i440fx_1_4_machine_options); - - -#define PC_COMPAT_1_3 \ - PC_COMPAT_1_4 \ - {\ - .driver = "usb-tablet",\ - .property = "usb_version",\ - .value = stringify(1),\ - },{\ - .driver = "virtio-net-pci",\ - .property = "ctrl_mac_addr",\ - .value = "off", \ - },{ \ - .driver = "virtio-net-pci", \ - .property = "mq", \ - .value = "off", \ - }, {\ - .driver = "e1000",\ - .property = "autonegotiation",\ - .value = "off",\ - }, - - -static void pc_i440fx_1_3_machine_options(MachineClass *m) -{ - pc_i440fx_1_4_machine_options(m); - m->hw_version = "1.3.0"; - SET_MACHINE_COMPAT(m, PC_COMPAT_1_3); -} - -DEFINE_I440FX_MACHINE(v1_3, "pc-1.3", pc_compat_1_3, - pc_i440fx_1_3_machine_options); - - -#define PC_COMPAT_1_2 \ - PC_COMPAT_1_3 \ - {\ - .driver = "nec-usb-xhci",\ - .property = "msi",\ - .value = "off",\ - },{\ - .driver = "nec-usb-xhci",\ - .property = "msix",\ - .value = "off",\ - },{\ - .driver = "ivshmem",\ - .property = "use64",\ - .value = "0",\ - },{\ - .driver = "qxl",\ - .property = "revision",\ - .value = stringify(3),\ - },{\ - .driver = "qxl-vga",\ - .property = "revision",\ - .value = stringify(3),\ - },{\ - .driver = "VGA",\ - .property = "mmio",\ - .value = "off",\ - }, - -static void pc_i440fx_1_2_machine_options(MachineClass *m) -{ - pc_i440fx_1_3_machine_options(m); - m->hw_version = "1.2.0"; - SET_MACHINE_COMPAT(m, PC_COMPAT_1_2); -} - -DEFINE_I440FX_MACHINE(v1_2, "pc-1.2", pc_compat_1_2, - pc_i440fx_1_2_machine_options); - - -#define PC_COMPAT_1_1 \ - PC_COMPAT_1_2 \ - {\ - .driver = "virtio-scsi-pci",\ - .property = "hotplug",\ - .value = "off",\ - },{\ - .driver = "virtio-scsi-pci",\ - .property = "param_change",\ - .value = "off",\ - },{\ - .driver = "VGA",\ - .property = "vgamem_mb",\ - .value = stringify(8),\ - },{\ - .driver = "vmware-svga",\ - .property = "vgamem_mb",\ - .value = stringify(8),\ - },{\ - .driver = "qxl-vga",\ - .property = "vgamem_mb",\ - .value = stringify(8),\ - },{\ - .driver = "qxl",\ - .property = "vgamem_mb",\ - .value = stringify(8),\ - },{\ - .driver = "virtio-blk-pci",\ - .property = "config-wce",\ - .value = "off",\ - }, - -static void pc_i440fx_1_1_machine_options(MachineClass *m) -{ - pc_i440fx_1_2_machine_options(m); - m->hw_version = "1.1.0"; - SET_MACHINE_COMPAT(m, PC_COMPAT_1_1); -} - -DEFINE_I440FX_MACHINE(v1_1, "pc-1.1", pc_compat_1_2, - pc_i440fx_1_1_machine_options); - - -#define PC_COMPAT_1_0 \ - PC_COMPAT_1_1 \ - {\ - .driver = TYPE_ISA_FDC,\ - .property = "check_media_rate",\ - .value = "off",\ - }, {\ - .driver = "virtio-balloon-pci",\ - .property = "class",\ - .value = stringify(PCI_CLASS_MEMORY_RAM),\ - },{\ - .driver = "apic-common",\ - .property = "vapic",\ - .value = "off",\ - },{\ - .driver = TYPE_USB_DEVICE,\ - .property = "full-path",\ - .value = "no",\ - }, - -static void pc_i440fx_1_0_machine_options(MachineClass *m) -{ - pc_i440fx_1_1_machine_options(m); - m->hw_version = "1.0"; - SET_MACHINE_COMPAT(m, PC_COMPAT_1_0); -} - -DEFINE_I440FX_MACHINE(v1_0, "pc-1.0", pc_compat_1_2, - pc_i440fx_1_0_machine_options); - - -#define PC_COMPAT_0_15 \ - PC_COMPAT_1_0 - -static void pc_i440fx_0_15_machine_options(MachineClass *m) -{ - pc_i440fx_1_0_machine_options(m); - m->hw_version = "0.15"; - SET_MACHINE_COMPAT(m, PC_COMPAT_0_15); -} - -DEFINE_I440FX_MACHINE(v0_15, "pc-0.15", pc_compat_1_2, - pc_i440fx_0_15_machine_options); - - -#define PC_COMPAT_0_14 \ - PC_COMPAT_0_15 \ - {\ - .driver = "virtio-blk-pci",\ - .property = "event_idx",\ - .value = "off",\ - },{\ - .driver = "virtio-serial-pci",\ - .property = "event_idx",\ - .value = "off",\ - },{\ - .driver = "virtio-net-pci",\ - .property = "event_idx",\ - .value = "off",\ - },{\ - .driver = "virtio-balloon-pci",\ - .property = "event_idx",\ - .value = "off",\ - },{\ - .driver = "qxl",\ - .property = "revision",\ - .value = stringify(2),\ - },{\ - .driver = "qxl-vga",\ - .property = "revision",\ - .value = stringify(2),\ - }, - -static void pc_i440fx_0_14_machine_options(MachineClass *m) -{ - pc_i440fx_0_15_machine_options(m); - m->hw_version = "0.14"; - SET_MACHINE_COMPAT(m, PC_COMPAT_0_14); -} - -DEFINE_I440FX_MACHINE(v0_14, "pc-0.14", pc_compat_1_2, - pc_i440fx_0_14_machine_options); - - -#define PC_COMPAT_0_13 \ - PC_COMPAT_0_14 \ - {\ - .driver = TYPE_PCI_DEVICE,\ - .property = "command_serr_enable",\ - .value = "off",\ - },{\ - .driver = "AC97",\ - .property = "use_broken_id",\ - .value = stringify(1),\ - },{\ - .driver = "virtio-9p-pci",\ - .property = "vectors",\ - .value = stringify(0),\ - },{\ - .driver = "VGA",\ - .property = "rombar",\ - .value = stringify(0),\ - },{\ - .driver = "vmware-svga",\ - .property = "rombar",\ - .value = stringify(0),\ - }, - -static void pc_i440fx_0_13_machine_options(MachineClass *m) -{ - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_i440fx_0_14_machine_options(m); - m->hw_version = "0.13"; - SET_MACHINE_COMPAT(m, PC_COMPAT_0_13); - pcmc->kvmclock_enabled = false; -} - -DEFINE_I440FX_MACHINE(v0_13, "pc-0.13", pc_compat_0_13, - pc_i440fx_0_13_machine_options); - - -#define PC_COMPAT_0_12 \ - PC_COMPAT_0_13 \ - {\ - .driver = "virtio-serial-pci",\ - .property = "max_ports",\ - .value = stringify(1),\ - },{\ - .driver = "virtio-serial-pci",\ - .property = "vectors",\ - .value = stringify(0),\ - },{\ - .driver = "usb-mouse",\ - .property = "serial",\ - .value = "1",\ - },{\ - .driver = "usb-tablet",\ - .property = "serial",\ - .value = "1",\ - },{\ - .driver = "usb-kbd",\ - .property = "serial",\ - .value = "1",\ - }, - -static void pc_i440fx_0_12_machine_options(MachineClass *m) -{ - pc_i440fx_0_13_machine_options(m); - m->hw_version = "0.12"; - SET_MACHINE_COMPAT(m, PC_COMPAT_0_12); -} - -DEFINE_I440FX_MACHINE(v0_12, "pc-0.12", pc_compat_0_13, - pc_i440fx_0_12_machine_options); - - -#define PC_COMPAT_0_11 \ - PC_COMPAT_0_12 \ - {\ - .driver = "virtio-blk-pci",\ - .property = "vectors",\ - .value = stringify(0),\ - },{\ - .driver = TYPE_PCI_DEVICE,\ - .property = "rombar",\ - .value = stringify(0),\ - },{\ - .driver = "ide-drive",\ - .property = "ver",\ - .value = "0.11",\ - },{\ - .driver = "scsi-disk",\ - .property = "ver",\ - .value = "0.11",\ - }, - -static void pc_i440fx_0_11_machine_options(MachineClass *m) -{ - pc_i440fx_0_12_machine_options(m); - m->hw_version = "0.11"; - SET_MACHINE_COMPAT(m, PC_COMPAT_0_11); -} - -DEFINE_I440FX_MACHINE(v0_11, "pc-0.11", pc_compat_0_13, - pc_i440fx_0_11_machine_options); - - -#define PC_COMPAT_0_10 \ - PC_COMPAT_0_11 \ - {\ - .driver = "virtio-blk-pci",\ - .property = "class",\ - .value = stringify(PCI_CLASS_STORAGE_OTHER),\ - },{\ - .driver = "virtio-serial-pci",\ - .property = "class",\ - .value = stringify(PCI_CLASS_DISPLAY_OTHER),\ - },{\ - .driver = "virtio-net-pci",\ - .property = "vectors",\ - .value = stringify(0),\ - },{\ - .driver = "ide-drive",\ - .property = "ver",\ - .value = "0.10",\ - },{\ - .driver = "scsi-disk",\ - .property = "ver",\ - .value = "0.10",\ - }, - -static void pc_i440fx_0_10_machine_options(MachineClass *m) -{ - pc_i440fx_0_11_machine_options(m); - m->hw_version = "0.10"; - SET_MACHINE_COMPAT(m, PC_COMPAT_0_10); -} - -DEFINE_I440FX_MACHINE(v0_10, "pc-0.10", pc_compat_0_13, - pc_i440fx_0_10_machine_options); - -typedef struct { - uint16_t gpu_device_id; - uint16_t pch_device_id; - uint8_t pch_revision_id; -} IGDDeviceIDInfo; - -/* In real world different GPU should have different PCH. But actually - * the different PCH DIDs likely map to different PCH SKUs. We do the - * same thing for the GPU. For PCH, the different SKUs are going to be - * all the same silicon design and implementation, just different - * features turn on and off with fuses. The SW interfaces should be - * consistent across all SKUs in a given family (eg LPT). But just same - * features may not be supported. - * - * Most of these different PCH features probably don't matter to the - * Gfx driver, but obviously any difference in display port connections - * will so it should be fine with any PCH in case of passthrough. - * - * So currently use one PCH version, 0x8c4e, to cover all HSW(Haswell) - * scenarios, 0x9cc3 for BDW(Broadwell). - */ -static const IGDDeviceIDInfo igd_combo_id_infos[] = { - /* HSW Classic */ - {0x0402, 0x8c4e, 0x04}, /* HSWGT1D, HSWD_w7 */ - {0x0406, 0x8c4e, 0x04}, /* HSWGT1M, HSWM_w7 */ - {0x0412, 0x8c4e, 0x04}, /* HSWGT2D, HSWD_w7 */ - {0x0416, 0x8c4e, 0x04}, /* HSWGT2M, HSWM_w7 */ - {0x041E, 0x8c4e, 0x04}, /* HSWGT15D, HSWD_w7 */ - /* HSW ULT */ - {0x0A06, 0x8c4e, 0x04}, /* HSWGT1UT, HSWM_w7 */ - {0x0A16, 0x8c4e, 0x04}, /* HSWGT2UT, HSWM_w7 */ - {0x0A26, 0x8c4e, 0x06}, /* HSWGT3UT, HSWM_w7 */ - {0x0A2E, 0x8c4e, 0x04}, /* HSWGT3UT28W, HSWM_w7 */ - {0x0A1E, 0x8c4e, 0x04}, /* HSWGT2UX, HSWM_w7 */ - {0x0A0E, 0x8c4e, 0x04}, /* HSWGT1ULX, HSWM_w7 */ - /* HSW CRW */ - {0x0D26, 0x8c4e, 0x04}, /* HSWGT3CW, HSWM_w7 */ - {0x0D22, 0x8c4e, 0x04}, /* HSWGT3CWDT, HSWD_w7 */ - /* HSW Server */ - {0x041A, 0x8c4e, 0x04}, /* HSWSVGT2, HSWD_w7 */ - /* HSW SRVR */ - {0x040A, 0x8c4e, 0x04}, /* HSWSVGT1, HSWD_w7 */ - /* BSW */ - {0x1606, 0x9cc3, 0x03}, /* BDWULTGT1, BDWM_w7 */ - {0x1616, 0x9cc3, 0x03}, /* BDWULTGT2, BDWM_w7 */ - {0x1626, 0x9cc3, 0x03}, /* BDWULTGT3, BDWM_w7 */ - {0x160E, 0x9cc3, 0x03}, /* BDWULXGT1, BDWM_w7 */ - {0x161E, 0x9cc3, 0x03}, /* BDWULXGT2, BDWM_w7 */ - {0x1602, 0x9cc3, 0x03}, /* BDWHALOGT1, BDWM_w7 */ - {0x1612, 0x9cc3, 0x03}, /* BDWHALOGT2, BDWM_w7 */ - {0x1622, 0x9cc3, 0x03}, /* BDWHALOGT3, BDWM_w7 */ - {0x162B, 0x9cc3, 0x03}, /* BDWHALO28W, BDWM_w7 */ - {0x162A, 0x9cc3, 0x03}, /* BDWGT3WRKS, BDWM_w7 */ - {0x162D, 0x9cc3, 0x03}, /* BDWGT3SRVR, BDWM_w7 */ -}; - -static void isa_bridge_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - - dc->desc = "ISA bridge faked to support IGD PT"; - k->vendor_id = PCI_VENDOR_ID_INTEL; - k->class_id = PCI_CLASS_BRIDGE_ISA; -}; - -static TypeInfo isa_bridge_info = { - .name = "igd-passthrough-isa-bridge", - .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(PCIDevice), - .class_init = isa_bridge_class_init, -}; - -static void pt_graphics_register_types(void) -{ - type_register_static(&isa_bridge_info); -} -type_init(pt_graphics_register_types) - -void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id) -{ - struct PCIDevice *bridge_dev; - int i, num; - uint16_t pch_dev_id = 0xffff; - uint8_t pch_rev_id; - - num = ARRAY_SIZE(igd_combo_id_infos); - for (i = 0; i < num; i++) { - if (gpu_dev_id == igd_combo_id_infos[i].gpu_device_id) { - pch_dev_id = igd_combo_id_infos[i].pch_device_id; - pch_rev_id = igd_combo_id_infos[i].pch_revision_id; - } - } - - if (pch_dev_id == 0xffff) { - return; - } - - /* Currently IGD drivers always need to access PCH by 1f.0. */ - bridge_dev = pci_create_simple(bus, PCI_DEVFN(0x1f, 0), - "igd-passthrough-isa-bridge"); - - /* - * Note that vendor id is always PCI_VENDOR_ID_INTEL. - */ - if (!bridge_dev) { - fprintf(stderr, "set igd-passthrough-isa-bridge failed!\n"); - return; - } - pci_config_set_device_id(bridge_dev->config, pch_dev_id); - pci_config_set_revision(bridge_dev->config, pch_rev_id); -} - -static void isapc_machine_options(MachineClass *m) -{ - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - m->desc = "ISA-only PC"; - m->max_cpus = 1; - m->option_rom_has_mr = true; - m->rom_file_has_mr = false; - pcmc->pci_enabled = false; - pcmc->has_acpi_build = false; - pcmc->smbios_defaults = false; - pcmc->gigabyte_align = false; - pcmc->smbios_legacy_mode = true; - pcmc->has_reserved_memory = false; -} - -DEFINE_PC_MACHINE(isapc, "isapc", pc_init_isa, - isapc_machine_options); - - -#ifdef CONFIG_XEN -static void xenfv_machine_options(MachineClass *m) -{ - m->desc = "Xen Fully-virtualized PC"; - m->max_cpus = HVM_MAX_VCPUS; - m->default_machine_opts = "accel=xen"; - m->hot_add_cpu = pc_hot_add_cpu; -} - -DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init, - xenfv_machine_options); -#endif diff --git a/qemu/hw/i386/pc_q35.c b/qemu/hw/i386/pc_q35.c deleted file mode 100644 index 04aae8958..000000000 --- a/qemu/hw/i386/pc_q35.c +++ /dev/null @@ -1,318 +0,0 @@ -/* - * Q35 chipset based pc system emulator - * - * Copyright (c) 2003-2004 Fabrice Bellard - * Copyright (c) 2009, 2010 - * Isaku Yamahata <yamahata at valinux co jp> - * VA Linux Systems Japan K.K. - * Copyright (C) 2012 Jason Baron <jbaron@redhat.com> - * - * This is based on pc.c, but heavily modified. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include "qemu/osdep.h" -#include "hw/hw.h" -#include "hw/loader.h" -#include "sysemu/arch_init.h" -#include "hw/i2c/smbus.h" -#include "hw/boards.h" -#include "hw/timer/mc146818rtc.h" -#include "hw/xen/xen.h" -#include "sysemu/kvm.h" -#include "hw/kvm/clock.h" -#include "hw/pci-host/q35.h" -#include "exec/address-spaces.h" -#include "hw/i386/pc.h" -#include "hw/i386/ich9.h" -#include "hw/smbios/smbios.h" -#include "hw/ide/pci.h" -#include "hw/ide/ahci.h" -#include "hw/usb.h" -#include "qemu/error-report.h" -#include "migration/migration.h" - -/* ICH9 AHCI has 6 ports */ -#define MAX_SATA_PORTS 6 - -/* PC hardware initialisation */ -static void pc_q35_init(MachineState *machine) -{ - PCMachineState *pcms = PC_MACHINE(machine); - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); - Q35PCIHost *q35_host; - PCIHostState *phb; - PCIBus *host_bus; - PCIDevice *lpc; - BusState *idebus[MAX_SATA_PORTS]; - ISADevice *rtc_state; - MemoryRegion *system_io = get_system_io(); - MemoryRegion *pci_memory; - MemoryRegion *rom_memory; - MemoryRegion *ram_memory; - GSIState *gsi_state; - ISABus *isa_bus; - qemu_irq *gsi; - qemu_irq *i8259; - int i; - ICH9LPCState *ich9_lpc; - PCIDevice *ahci; - ram_addr_t lowmem; - DriveInfo *hd[MAX_SATA_PORTS]; - MachineClass *mc = MACHINE_GET_CLASS(machine); - - /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory - * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping - * also known as MMCFG). - * If it doesn't, we need to split it in chunks below and above 4G. - * In any case, try to make sure that guest addresses aligned at - * 1G boundaries get mapped to host addresses aligned at 1G boundaries. - */ - if (machine->ram_size >= 0xb0000000) { - lowmem = 0x80000000; - } else { - lowmem = 0xb0000000; - } - - /* Handle the machine opt max-ram-below-4g. It is basically doing - * min(qemu limit, user limit). - */ - if (lowmem > pcms->max_ram_below_4g) { - lowmem = pcms->max_ram_below_4g; - if (machine->ram_size - lowmem > lowmem && - lowmem & ((1ULL << 30) - 1)) { - error_report("Warning: Large machine and max_ram_below_4g(%"PRIu64 - ") not a multiple of 1G; possible bad performance.", - pcms->max_ram_below_4g); - } - } - - if (machine->ram_size >= lowmem) { - pcms->above_4g_mem_size = machine->ram_size - lowmem; - pcms->below_4g_mem_size = lowmem; - } else { - pcms->above_4g_mem_size = 0; - pcms->below_4g_mem_size = machine->ram_size; - } - - if (xen_enabled()) { - xen_hvm_init(pcms, &ram_memory); - } - - pc_cpus_init(pcms); - - kvmclock_create(); - - /* pci enabled */ - if (pcmc->pci_enabled) { - pci_memory = g_new(MemoryRegion, 1); - memory_region_init(pci_memory, NULL, "pci", UINT64_MAX); - rom_memory = pci_memory; - } else { - pci_memory = NULL; - rom_memory = get_system_memory(); - } - - pc_guest_info_init(pcms); - - if (pcmc->smbios_defaults) { - /* These values are guest ABI, do not change */ - smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", - mc->name, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, - SMBIOS_ENTRY_POINT_21); - } - - /* allocate ram and load rom/bios */ - if (!xen_enabled()) { - pc_memory_init(pcms, get_system_memory(), - rom_memory, &ram_memory); - } - - /* irq lines */ - gsi_state = g_malloc0(sizeof(*gsi_state)); - if (kvm_ioapic_in_kernel()) { - kvm_pc_setup_irq_routing(pcmc->pci_enabled); - gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, - GSI_NUM_PINS); - } else { - gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); - } - - /* create pci host bus */ - q35_host = Q35_HOST_DEVICE(qdev_create(NULL, TYPE_Q35_HOST_DEVICE)); - - object_property_add_child(qdev_get_machine(), "q35", OBJECT(q35_host), NULL); - q35_host->mch.ram_memory = ram_memory; - q35_host->mch.pci_address_space = pci_memory; - q35_host->mch.system_memory = get_system_memory(); - q35_host->mch.address_space_io = system_io; - q35_host->mch.below_4g_mem_size = pcms->below_4g_mem_size; - q35_host->mch.above_4g_mem_size = pcms->above_4g_mem_size; - /* pci */ - qdev_init_nofail(DEVICE(q35_host)); - phb = PCI_HOST_BRIDGE(q35_host); - host_bus = phb->bus; - pcms->bus = phb->bus; - /* create ISA bus */ - lpc = pci_create_simple_multifunction(host_bus, PCI_DEVFN(ICH9_LPC_DEV, - ICH9_LPC_FUNC), true, - TYPE_ICH9_LPC_DEVICE); - - object_property_add_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP, - TYPE_HOTPLUG_HANDLER, - (Object **)&pcms->acpi_dev, - object_property_allow_set_link, - OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort); - object_property_set_link(OBJECT(machine), OBJECT(lpc), - PC_MACHINE_ACPI_DEVICE_PROP, &error_abort); - - ich9_lpc = ICH9_LPC_DEVICE(lpc); - ich9_lpc->pic = gsi; - ich9_lpc->ioapic = gsi_state->ioapic_irq; - pci_bus_irqs(host_bus, ich9_lpc_set_irq, ich9_lpc_map_irq, ich9_lpc, - ICH9_LPC_NB_PIRQS); - pci_bus_set_route_irq_fn(host_bus, ich9_route_intx_pin_to_irq); - isa_bus = ich9_lpc->isa_bus; - - /*end early*/ - isa_bus_irqs(isa_bus, gsi); - - if (kvm_pic_in_kernel()) { - i8259 = kvm_i8259_init(isa_bus); - } else if (xen_enabled()) { - i8259 = xen_interrupt_controller_init(); - } else { - i8259 = i8259_init(isa_bus, pc_allocate_cpu_irq()); - } - - for (i = 0; i < ISA_NUM_IRQS; i++) { - gsi_state->i8259_irq[i] = i8259[i]; - } - if (pcmc->pci_enabled) { - ioapic_init_gsi(gsi_state, "q35"); - } - - pc_register_ferr_irq(gsi[13]); - - assert(pcms->vmport != ON_OFF_AUTO__MAX); - if (pcms->vmport == ON_OFF_AUTO_AUTO) { - pcms->vmport = xen_enabled() ? ON_OFF_AUTO_OFF : ON_OFF_AUTO_ON; - } - - /* init basic PC hardware */ - pc_basic_device_init(isa_bus, gsi, &rtc_state, !mc->no_floppy, - (pcms->vmport != ON_OFF_AUTO_ON), 0xff0104); - - /* connect pm stuff to lpc */ - ich9_lpc_pm_init(lpc, pc_machine_is_smm_enabled(pcms)); - - /* ahci and SATA device, for q35 1 ahci controller is built-in */ - ahci = pci_create_simple_multifunction(host_bus, - PCI_DEVFN(ICH9_SATA1_DEV, - ICH9_SATA1_FUNC), - true, "ich9-ahci"); - idebus[0] = qdev_get_child_bus(&ahci->qdev, "ide.0"); - idebus[1] = qdev_get_child_bus(&ahci->qdev, "ide.1"); - g_assert(MAX_SATA_PORTS == ICH_AHCI(ahci)->ahci.ports); - ide_drive_get(hd, ICH_AHCI(ahci)->ahci.ports); - ahci_ide_create_devs(ahci, hd); - - if (usb_enabled()) { - /* Should we create 6 UHCI according to ich9 spec? */ - ehci_create_ich9_with_companions(host_bus, 0x1d); - } - - /* TODO: Populate SPD eeprom data. */ - smbus_eeprom_init(ich9_smb_init(host_bus, - PCI_DEVFN(ICH9_SMB_DEV, ICH9_SMB_FUNC), - 0xb100), - 8, NULL, 0); - - pc_cmos_init(pcms, idebus[0], idebus[1], rtc_state); - - /* the rest devices to which pci devfn is automatically assigned */ - pc_vga_init(isa_bus, host_bus); - pc_nic_init(isa_bus, host_bus); - if (pcmc->pci_enabled) { - pc_pci_device_init(host_bus); - } - - if (pcms->acpi_nvdimm_state.is_enabled) { - nvdimm_init_acpi_state(&pcms->acpi_nvdimm_state, system_io, - pcms->fw_cfg, OBJECT(pcms)); - } -} - -#define DEFINE_Q35_MACHINE(suffix, name, compatfn, optionfn) \ - static void pc_init_##suffix(MachineState *machine) \ - { \ - void (*compat)(MachineState *m) = (compatfn); \ - if (compat) { \ - compat(machine); \ - } \ - pc_q35_init(machine); \ - } \ - DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) - - -static void pc_q35_machine_options(MachineClass *m) -{ - m->family = "pc_q35"; - m->desc = "Standard PC (Q35 + ICH9, 2009)"; - m->hot_add_cpu = pc_hot_add_cpu; - m->units_per_default_bus = 1; - m->default_machine_opts = "firmware=bios-256k.bin"; - m->default_display = "std"; - m->no_floppy = 1; -} - -static void pc_q35_2_6_machine_options(MachineClass *m) -{ - pc_q35_machine_options(m); - m->alias = "q35"; -} - -DEFINE_Q35_MACHINE(v2_6, "pc-q35-2.6", NULL, - pc_q35_2_6_machine_options); - -static void pc_q35_2_5_machine_options(MachineClass *m) -{ - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_q35_2_6_machine_options(m); - m->alias = NULL; - pcmc->save_tsc_khz = false; - m->legacy_fw_cfg_order = 1; - SET_MACHINE_COMPAT(m, PC_COMPAT_2_5); -} - -DEFINE_Q35_MACHINE(v2_5, "pc-q35-2.5", NULL, - pc_q35_2_5_machine_options); - -static void pc_q35_2_4_machine_options(MachineClass *m) -{ - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_q35_2_5_machine_options(m); - m->hw_version = "2.4.0"; - pcmc->broken_reserved_end = true; - SET_MACHINE_COMPAT(m, PC_COMPAT_2_4); -} - -DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, - pc_q35_2_4_machine_options); diff --git a/qemu/hw/i386/pc_sysfw.c b/qemu/hw/i386/pc_sysfw.c deleted file mode 100644 index f915ad0a3..000000000 --- a/qemu/hw/i386/pc_sysfw.c +++ /dev/null @@ -1,253 +0,0 @@ -/* - * QEMU PC System Firmware - * - * Copyright (c) 2003-2004 Fabrice Bellard - * Copyright (c) 2011-2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qapi/error.h" -#include "sysemu/block-backend.h" -#include "qemu/error-report.h" -#include "hw/sysbus.h" -#include "hw/hw.h" -#include "hw/i386/pc.h" -#include "hw/boards.h" -#include "hw/loader.h" -#include "sysemu/sysemu.h" -#include "hw/block/flash.h" -#include "sysemu/kvm.h" - -#define BIOS_FILENAME "bios.bin" - -typedef struct PcSysFwDevice { - SysBusDevice busdev; - uint8_t isapc_ram_fw; -} PcSysFwDevice; - -static void pc_isa_bios_init(MemoryRegion *rom_memory, - MemoryRegion *flash_mem, - int ram_size) -{ - int isa_bios_size; - MemoryRegion *isa_bios; - uint64_t flash_size; - void *flash_ptr, *isa_bios_ptr; - - flash_size = memory_region_size(flash_mem); - - /* map the last 128KB of the BIOS in ISA space */ - isa_bios_size = MIN(flash_size, 128 * 1024); - isa_bios = g_malloc(sizeof(*isa_bios)); - memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, - &error_fatal); - vmstate_register_ram_global(isa_bios); - memory_region_add_subregion_overlap(rom_memory, - 0x100000 - isa_bios_size, - isa_bios, - 1); - - /* copy ISA rom image from top of flash memory */ - flash_ptr = memory_region_get_ram_ptr(flash_mem); - isa_bios_ptr = memory_region_get_ram_ptr(isa_bios); - memcpy(isa_bios_ptr, - ((uint8_t*)flash_ptr) + (flash_size - isa_bios_size), - isa_bios_size); - - memory_region_set_readonly(isa_bios, true); -} - -#define FLASH_MAP_UNIT_MAX 2 - -/* We don't have a theoretically justifiable exact lower bound on the base - * address of any flash mapping. In practice, the IO-APIC MMIO range is - * [0xFEE00000..0xFEE01000[ -- see IO_APIC_DEFAULT_ADDRESS --, leaving free - * only 18MB-4KB below 4G. For now, restrict the cumulative mapping to 8MB in - * size. - */ -#define FLASH_MAP_BASE_MIN ((hwaddr)(0x100000000ULL - 8*1024*1024)) - -/* This function maps flash drives from 4G downward, in order of their unit - * numbers. The mapping starts at unit#0, with unit number increments of 1, and - * stops before the first missing flash drive, or before - * unit#FLASH_MAP_UNIT_MAX, whichever is reached first. - * - * Addressing within one flash drive is of course not reversed. - * - * An error message is printed and the process exits if: - * - the size of the backing file for a flash drive is non-positive, or not a - * multiple of the required sector size, or - * - the current mapping's base address would fall below FLASH_MAP_BASE_MIN. - * - * The drive with unit#0 (if available) is mapped at the highest address, and - * it is passed to pc_isa_bios_init(). Merging several drives for isa-bios is - * not supported. - */ -static void pc_system_flash_init(MemoryRegion *rom_memory) -{ - int unit; - DriveInfo *pflash_drv; - BlockBackend *blk; - int64_t size; - char *fatal_errmsg = NULL; - hwaddr phys_addr = 0x100000000ULL; - int sector_bits, sector_size; - pflash_t *system_flash; - MemoryRegion *flash_mem; - char name[64]; - - sector_bits = 12; - sector_size = 1 << sector_bits; - - for (unit = 0; - (unit < FLASH_MAP_UNIT_MAX && - (pflash_drv = drive_get(IF_PFLASH, 0, unit)) != NULL); - ++unit) { - blk = blk_by_legacy_dinfo(pflash_drv); - size = blk_getlength(blk); - if (size < 0) { - fatal_errmsg = g_strdup_printf("failed to get backing file size"); - } else if (size == 0) { - fatal_errmsg = g_strdup_printf("PC system firmware (pflash) " - "cannot have zero size"); - } else if ((size % sector_size) != 0) { - fatal_errmsg = g_strdup_printf("PC system firmware (pflash) " - "must be a multiple of 0x%x", sector_size); - } else if (phys_addr < size || phys_addr - size < FLASH_MAP_BASE_MIN) { - fatal_errmsg = g_strdup_printf("oversized backing file, pflash " - "segments cannot be mapped under " - TARGET_FMT_plx, FLASH_MAP_BASE_MIN); - } - if (fatal_errmsg != NULL) { - Location loc; - - /* push a new, "none" location on the location stack; overwrite its - * contents with the location saved in the option; print the error - * (includes location); pop the top - */ - loc_push_none(&loc); - if (pflash_drv->opts != NULL) { - qemu_opts_loc_restore(pflash_drv->opts); - } - error_report("%s", fatal_errmsg); - loc_pop(&loc); - g_free(fatal_errmsg); - exit(1); - } - - phys_addr -= size; - - /* pflash_cfi01_register() creates a deep copy of the name */ - snprintf(name, sizeof name, "system.flash%d", unit); - system_flash = pflash_cfi01_register(phys_addr, NULL /* qdev */, name, - size, blk, sector_size, - size >> sector_bits, - 1 /* width */, - 0x0000 /* id0 */, - 0x0000 /* id1 */, - 0x0000 /* id2 */, - 0x0000 /* id3 */, - 0 /* be */); - if (unit == 0) { - flash_mem = pflash_cfi01_get_memory(system_flash); - pc_isa_bios_init(rom_memory, flash_mem, size); - } - } -} - -static void old_pc_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw) -{ - char *filename; - MemoryRegion *bios, *isa_bios; - int bios_size, isa_bios_size; - int ret; - - /* BIOS load */ - if (bios_name == NULL) { - bios_name = BIOS_FILENAME; - } - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); - if (filename) { - bios_size = get_image_size(filename); - } else { - bios_size = -1; - } - if (bios_size <= 0 || - (bios_size % 65536) != 0) { - goto bios_error; - } - bios = g_malloc(sizeof(*bios)); - memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); - vmstate_register_ram_global(bios); - if (!isapc_ram_fw) { - memory_region_set_readonly(bios, true); - } - ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); - if (ret != 0) { - bios_error: - fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); - exit(1); - } - g_free(filename); - - /* map the last 128KB of the BIOS in ISA space */ - isa_bios_size = bios_size; - if (isa_bios_size > (128 * 1024)) { - isa_bios_size = 128 * 1024; - } - isa_bios = g_malloc(sizeof(*isa_bios)); - memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, - bios_size - isa_bios_size, isa_bios_size); - memory_region_add_subregion_overlap(rom_memory, - 0x100000 - isa_bios_size, - isa_bios, - 1); - if (!isapc_ram_fw) { - memory_region_set_readonly(isa_bios, true); - } - - /* map all the bios at the top of memory */ - memory_region_add_subregion(rom_memory, - (uint32_t)(-bios_size), - bios); -} - -void pc_system_firmware_init(MemoryRegion *rom_memory, bool isapc_ram_fw) -{ - DriveInfo *pflash_drv; - - pflash_drv = drive_get(IF_PFLASH, 0, 0); - - if (isapc_ram_fw || pflash_drv == NULL) { - /* When a pflash drive is not found, use rom-mode */ - old_pc_system_rom_init(rom_memory, isapc_ram_fw); - return; - } - - if (kvm_enabled() && !kvm_readonly_mem_enabled()) { - /* Older KVM cannot execute from device memory. So, flash memory - * cannot be used unless the readonly memory kvm capability is present. */ - fprintf(stderr, "qemu: pflash with kvm requires KVM readonly memory support\n"); - exit(1); - } - - pc_system_flash_init(rom_memory); -} diff --git a/qemu/hw/i386/pci-assign-load-rom.c b/qemu/hw/i386/pci-assign-load-rom.c deleted file mode 100644 index 4bbb08c95..000000000 --- a/qemu/hw/i386/pci-assign-load-rom.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * This is splited from hw/i386/kvm/pci-assign.c - */ -#include "qemu/osdep.h" -#include "qapi/error.h" -#include "hw/hw.h" -#include "hw/i386/pc.h" -#include "qemu/error-report.h" -#include "ui/console.h" -#include "hw/loader.h" -#include "monitor/monitor.h" -#include "qemu/range.h" -#include "sysemu/sysemu.h" -#include "hw/pci/pci.h" -#include "hw/pci/pci-assign.h" - -/* - * Scan the assigned devices for the devices that have an option ROM, and then - * load the corresponding ROM data to RAM. If an error occurs while loading an - * option ROM, we just ignore that option ROM and continue with the next one. - */ -void *pci_assign_dev_load_option_rom(PCIDevice *dev, struct Object *owner, - int *size, unsigned int domain, - unsigned int bus, unsigned int slot, - unsigned int function) -{ - char name[32], rom_file[64]; - FILE *fp; - uint8_t val; - struct stat st; - void *ptr = NULL; - - /* If loading ROM from file, pci handles it */ - if (dev->romfile || !dev->rom_bar) { - return NULL; - } - - snprintf(rom_file, sizeof(rom_file), - "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom", - domain, bus, slot, function); - - if (stat(rom_file, &st)) { - return NULL; - } - - /* Write "1" to the ROM file to enable it */ - fp = fopen(rom_file, "r+"); - if (fp == NULL) { - error_report("pci-assign: Cannot open %s: %s", rom_file, strerror(errno)); - return NULL; - } - val = 1; - if (fwrite(&val, 1, 1, fp) != 1) { - goto close_rom; - } - fseek(fp, 0, SEEK_SET); - - snprintf(name, sizeof(name), "%s.rom", object_get_typename(owner)); - memory_region_init_ram(&dev->rom, owner, name, st.st_size, &error_abort); - vmstate_register_ram(&dev->rom, &dev->qdev); - ptr = memory_region_get_ram_ptr(&dev->rom); - memset(ptr, 0xff, st.st_size); - - if (!fread(ptr, 1, st.st_size, fp)) { - error_report("pci-assign: Cannot read from host %s", rom_file); - error_printf("Device option ROM contents are probably invalid " - "(check dmesg).\nSkip option ROM probe with rombar=0, " - "or load from file with romfile=\n"); - goto close_rom; - } - - pci_register_bar(dev, PCI_ROM_SLOT, 0, &dev->rom); - dev->has_rom = true; - *size = st.st_size; -close_rom: - /* Write "0" to disable ROM */ - fseek(fp, 0, SEEK_SET); - val = 0; - if (!fwrite(&val, 1, 1, fp)) { - DEBUG("%s\n", "Failed to disable pci-sysfs rom file"); - } - fclose(fp); - - return ptr; -} diff --git a/qemu/hw/i386/xen/Makefile.objs b/qemu/hw/i386/xen/Makefile.objs deleted file mode 100644 index 801a68d32..000000000 --- a/qemu/hw/i386/xen/Makefile.objs +++ /dev/null @@ -1 +0,0 @@ -obj-y += xen_platform.o xen_apic.o xen_pvdevice.o diff --git a/qemu/hw/i386/xen/xen_apic.c b/qemu/hw/i386/xen/xen_apic.c deleted file mode 100644 index 21d68ee04..000000000 --- a/qemu/hw/i386/xen/xen_apic.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Xen basic APIC support - * - * Copyright (c) 2012 Citrix - * - * Authors: - * Wei Liu <wei.liu2@citrix.com> - * - * This work is licensed under the terms of the GNU GPL version 2 or - * later. See the COPYING file in the top-level directory. - */ -#include "qemu/osdep.h" -#include "hw/i386/apic_internal.h" -#include "hw/pci/msi.h" -#include "hw/xen/xen.h" - -static uint64_t xen_apic_mem_read(void *opaque, hwaddr addr, - unsigned size) -{ - return ~(uint64_t)0; -} - -static void xen_apic_mem_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - if (size != sizeof(uint32_t)) { - fprintf(stderr, "Xen: APIC write data size = %d, invalid\n", size); - return; - } - - xen_hvm_inject_msi(addr, data); -} - -static const MemoryRegionOps xen_apic_io_ops = { - .read = xen_apic_mem_read, - .write = xen_apic_mem_write, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void xen_apic_realize(DeviceState *dev, Error **errp) -{ - APICCommonState *s = APIC_COMMON(dev); - - s->vapic_control = 0; - memory_region_init_io(&s->io_memory, OBJECT(s), &xen_apic_io_ops, s, - "xen-apic-msi", APIC_SPACE_SIZE); - msi_nonbroken = true; -} - -static void xen_apic_set_base(APICCommonState *s, uint64_t val) -{ -} - -static void xen_apic_set_tpr(APICCommonState *s, uint8_t val) -{ -} - -static uint8_t xen_apic_get_tpr(APICCommonState *s) -{ - return 0; -} - -static void xen_apic_vapic_base_update(APICCommonState *s) -{ -} - -static void xen_apic_external_nmi(APICCommonState *s) -{ -} - -static void xen_apic_class_init(ObjectClass *klass, void *data) -{ - APICCommonClass *k = APIC_COMMON_CLASS(klass); - - k->realize = xen_apic_realize; - k->set_base = xen_apic_set_base; - k->set_tpr = xen_apic_set_tpr; - k->get_tpr = xen_apic_get_tpr; - k->vapic_base_update = xen_apic_vapic_base_update; - k->external_nmi = xen_apic_external_nmi; -} - -static const TypeInfo xen_apic_info = { - .name = "xen-apic", - .parent = TYPE_APIC_COMMON, - .instance_size = sizeof(APICCommonState), - .class_init = xen_apic_class_init, -}; - -static void xen_apic_register_types(void) -{ - type_register_static(&xen_apic_info); -} - -type_init(xen_apic_register_types) diff --git a/qemu/hw/i386/xen/xen_platform.c b/qemu/hw/i386/xen/xen_platform.c deleted file mode 100644 index aa7839324..000000000 --- a/qemu/hw/i386/xen/xen_platform.c +++ /dev/null @@ -1,455 +0,0 @@ -/* - * XEN platform pci device, formerly known as the event channel device - * - * Copyright (c) 2003-2004 Intel Corp. - * Copyright (c) 2006 XenSource - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qapi/error.h" -#include "hw/hw.h" -#include "hw/i386/pc.h" -#include "hw/ide.h" -#include "hw/pci/pci.h" -#include "hw/irq.h" -#include "hw/xen/xen_common.h" -#include "hw/xen/xen_backend.h" -#include "trace.h" -#include "exec/address-spaces.h" -#include "sysemu/block-backend.h" -#include "qemu/error-report.h" - -#include <xenguest.h> - -//#define DEBUG_PLATFORM - -#ifdef DEBUG_PLATFORM -#define DPRINTF(fmt, ...) do { \ - fprintf(stderr, "xen_platform: " fmt, ## __VA_ARGS__); \ -} while (0) -#else -#define DPRINTF(fmt, ...) do { } while (0) -#endif - -#define PFFLAG_ROM_LOCK 1 /* Sets whether ROM memory area is RW or RO */ - -typedef struct PCIXenPlatformState { - /*< private >*/ - PCIDevice parent_obj; - /*< public >*/ - - MemoryRegion fixed_io; - MemoryRegion bar; - MemoryRegion mmio_bar; - uint8_t flags; /* used only for version_id == 2 */ - int drivers_blacklisted; - uint16_t driver_product_version; - - /* Log from guest drivers */ - char log_buffer[4096]; - int log_buffer_off; -} PCIXenPlatformState; - -#define TYPE_XEN_PLATFORM "xen-platform" -#define XEN_PLATFORM(obj) \ - OBJECT_CHECK(PCIXenPlatformState, (obj), TYPE_XEN_PLATFORM) - -#define XEN_PLATFORM_IOPORT 0x10 - -/* Send bytes to syslog */ -static void log_writeb(PCIXenPlatformState *s, char val) -{ - if (val == '\n' || s->log_buffer_off == sizeof(s->log_buffer) - 1) { - /* Flush buffer */ - s->log_buffer[s->log_buffer_off] = 0; - trace_xen_platform_log(s->log_buffer); - s->log_buffer_off = 0; - } else { - s->log_buffer[s->log_buffer_off++] = val; - } -} - -/* Xen Platform, Fixed IOPort */ -#define UNPLUG_ALL_IDE_DISKS 1 -#define UNPLUG_ALL_NICS 2 -#define UNPLUG_AUX_IDE_DISKS 4 - -static void unplug_nic(PCIBus *b, PCIDevice *d, void *o) -{ - /* We have to ignore passthrough devices */ - if (pci_get_word(d->config + PCI_CLASS_DEVICE) == - PCI_CLASS_NETWORK_ETHERNET - && strcmp(d->name, "xen-pci-passthrough") != 0) { - object_unparent(OBJECT(d)); - } -} - -static void pci_unplug_nics(PCIBus *bus) -{ - pci_for_each_device(bus, 0, unplug_nic, NULL); -} - -static void unplug_disks(PCIBus *b, PCIDevice *d, void *o) -{ - /* We have to ignore passthrough devices */ - if (pci_get_word(d->config + PCI_CLASS_DEVICE) == - PCI_CLASS_STORAGE_IDE - && strcmp(d->name, "xen-pci-passthrough") != 0) { - pci_piix3_xen_ide_unplug(DEVICE(d)); - } -} - -static void pci_unplug_disks(PCIBus *bus) -{ - pci_for_each_device(bus, 0, unplug_disks, NULL); -} - -static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t val) -{ - PCIXenPlatformState *s = opaque; - - switch (addr) { - case 0: { - PCIDevice *pci_dev = PCI_DEVICE(s); - /* Unplug devices. Value is a bitmask of which devices to - unplug, with bit 0 the IDE devices, bit 1 the network - devices, and bit 2 the non-primary-master IDE devices. */ - if (val & UNPLUG_ALL_IDE_DISKS) { - DPRINTF("unplug disks\n"); - blk_drain_all(); - blk_flush_all(); - pci_unplug_disks(pci_dev->bus); - } - if (val & UNPLUG_ALL_NICS) { - DPRINTF("unplug nics\n"); - pci_unplug_nics(pci_dev->bus); - } - if (val & UNPLUG_AUX_IDE_DISKS) { - DPRINTF("unplug auxiliary disks not supported\n"); - } - break; - } - case 2: - switch (val) { - case 1: - DPRINTF("Citrix Windows PV drivers loaded in guest\n"); - break; - case 0: - DPRINTF("Guest claimed to be running PV product 0?\n"); - break; - default: - DPRINTF("Unknown PV product %d loaded in guest\n", val); - break; - } - s->driver_product_version = val; - break; - } -} - -static void platform_fixed_ioport_writel(void *opaque, uint32_t addr, - uint32_t val) -{ - switch (addr) { - case 0: - /* PV driver version */ - break; - } -} - -static void platform_fixed_ioport_writeb(void *opaque, uint32_t addr, uint32_t val) -{ - PCIXenPlatformState *s = opaque; - - switch (addr) { - case 0: /* Platform flags */ { - hvmmem_type_t mem_type = (val & PFFLAG_ROM_LOCK) ? - HVMMEM_ram_ro : HVMMEM_ram_rw; - if (xc_hvm_set_mem_type(xen_xc, xen_domid, mem_type, 0xc0, 0x40)) { - DPRINTF("unable to change ro/rw state of ROM memory area!\n"); - } else { - s->flags = val & PFFLAG_ROM_LOCK; - DPRINTF("changed ro/rw state of ROM memory area. now is %s state.\n", - (mem_type == HVMMEM_ram_ro ? "ro":"rw")); - } - break; - } - case 2: - log_writeb(s, val); - break; - } -} - -static uint32_t platform_fixed_ioport_readw(void *opaque, uint32_t addr) -{ - PCIXenPlatformState *s = opaque; - - switch (addr) { - case 0: - if (s->drivers_blacklisted) { - /* The drivers will recognise this magic number and refuse - * to do anything. */ - return 0xd249; - } else { - /* Magic value so that you can identify the interface. */ - return 0x49d2; - } - default: - return 0xffff; - } -} - -static uint32_t platform_fixed_ioport_readb(void *opaque, uint32_t addr) -{ - PCIXenPlatformState *s = opaque; - - switch (addr) { - case 0: - /* Platform flags */ - return s->flags; - case 2: - /* Version number */ - return 1; - default: - return 0xff; - } -} - -static void platform_fixed_ioport_reset(void *opaque) -{ - PCIXenPlatformState *s = opaque; - - platform_fixed_ioport_writeb(s, 0, 0); -} - -static uint64_t platform_fixed_ioport_read(void *opaque, - hwaddr addr, - unsigned size) -{ - switch (size) { - case 1: - return platform_fixed_ioport_readb(opaque, addr); - case 2: - return platform_fixed_ioport_readw(opaque, addr); - default: - return -1; - } -} - -static void platform_fixed_ioport_write(void *opaque, hwaddr addr, - - uint64_t val, unsigned size) -{ - switch (size) { - case 1: - platform_fixed_ioport_writeb(opaque, addr, val); - break; - case 2: - platform_fixed_ioport_writew(opaque, addr, val); - break; - case 4: - platform_fixed_ioport_writel(opaque, addr, val); - break; - } -} - - -static const MemoryRegionOps platform_fixed_io_ops = { - .read = platform_fixed_ioport_read, - .write = platform_fixed_ioport_write, - .valid = { - .unaligned = true, - }, - .impl = { - .min_access_size = 1, - .max_access_size = 4, - .unaligned = true, - }, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - -static void platform_fixed_ioport_init(PCIXenPlatformState* s) -{ - memory_region_init_io(&s->fixed_io, OBJECT(s), &platform_fixed_io_ops, s, - "xen-fixed", 16); - memory_region_add_subregion(get_system_io(), XEN_PLATFORM_IOPORT, - &s->fixed_io); -} - -/* Xen Platform PCI Device */ - -static uint64_t xen_platform_ioport_readb(void *opaque, hwaddr addr, - unsigned int size) -{ - if (addr == 0) { - return platform_fixed_ioport_readb(opaque, 0); - } else { - return ~0u; - } -} - -static void xen_platform_ioport_writeb(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - PCIXenPlatformState *s = opaque; - - switch (addr) { - case 0: /* Platform flags */ - platform_fixed_ioport_writeb(opaque, 0, (uint32_t)val); - break; - case 8: - log_writeb(s, (uint32_t)val); - break; - default: - break; - } -} - -static const MemoryRegionOps xen_pci_io_ops = { - .read = xen_platform_ioport_readb, - .write = xen_platform_ioport_writeb, - .impl.min_access_size = 1, - .impl.max_access_size = 1, -}; - -static void platform_ioport_bar_setup(PCIXenPlatformState *d) -{ - memory_region_init_io(&d->bar, OBJECT(d), &xen_pci_io_ops, d, - "xen-pci", 0x100); -} - -static uint64_t platform_mmio_read(void *opaque, hwaddr addr, - unsigned size) -{ - DPRINTF("Warning: attempted read from physical address " - "0x" TARGET_FMT_plx " in xen platform mmio space\n", addr); - - return 0; -} - -static void platform_mmio_write(void *opaque, hwaddr addr, - uint64_t val, unsigned size) -{ - DPRINTF("Warning: attempted write of 0x%"PRIx64" to physical " - "address 0x" TARGET_FMT_plx " in xen platform mmio space\n", - val, addr); -} - -static const MemoryRegionOps platform_mmio_handler = { - .read = &platform_mmio_read, - .write = &platform_mmio_write, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static void platform_mmio_setup(PCIXenPlatformState *d) -{ - memory_region_init_io(&d->mmio_bar, OBJECT(d), &platform_mmio_handler, d, - "xen-mmio", 0x1000000); -} - -static int xen_platform_post_load(void *opaque, int version_id) -{ - PCIXenPlatformState *s = opaque; - - platform_fixed_ioport_writeb(s, 0, s->flags); - - return 0; -} - -static const VMStateDescription vmstate_xen_platform = { - .name = "platform", - .version_id = 4, - .minimum_version_id = 4, - .post_load = xen_platform_post_load, - .fields = (VMStateField[]) { - VMSTATE_PCI_DEVICE(parent_obj, PCIXenPlatformState), - VMSTATE_UINT8(flags, PCIXenPlatformState), - VMSTATE_END_OF_LIST() - } -}; - -static void xen_platform_realize(PCIDevice *dev, Error **errp) -{ - PCIXenPlatformState *d = XEN_PLATFORM(dev); - uint8_t *pci_conf; - - /* Device will crash on reset if xen is not initialized */ - if (!xen_enabled()) { - error_setg(errp, "xen-platform device requires the Xen accelerator"); - return; - } - - pci_conf = dev->config; - - pci_set_word(pci_conf + PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY); - - pci_config_set_prog_interface(pci_conf, 0); - - pci_conf[PCI_INTERRUPT_PIN] = 1; - - platform_ioport_bar_setup(d); - pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &d->bar); - - /* reserve 16MB mmio address for share memory*/ - platform_mmio_setup(d); - pci_register_bar(dev, 1, PCI_BASE_ADDRESS_MEM_PREFETCH, - &d->mmio_bar); - - platform_fixed_ioport_init(d); -} - -static void platform_reset(DeviceState *dev) -{ - PCIXenPlatformState *s = XEN_PLATFORM(dev); - - platform_fixed_ioport_reset(s); -} - -static void xen_platform_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - - k->realize = xen_platform_realize; - k->vendor_id = PCI_VENDOR_ID_XEN; - k->device_id = PCI_DEVICE_ID_XEN_PLATFORM; - k->class_id = PCI_CLASS_OTHERS << 8 | 0x80; - k->subsystem_vendor_id = PCI_VENDOR_ID_XEN; - k->subsystem_id = PCI_DEVICE_ID_XEN_PLATFORM; - k->revision = 1; - set_bit(DEVICE_CATEGORY_MISC, dc->categories); - dc->desc = "XEN platform pci device"; - dc->reset = platform_reset; - dc->vmsd = &vmstate_xen_platform; -} - -static const TypeInfo xen_platform_info = { - .name = TYPE_XEN_PLATFORM, - .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(PCIXenPlatformState), - .class_init = xen_platform_class_init, -}; - -static void xen_platform_register_types(void) -{ - type_register_static(&xen_platform_info); -} - -type_init(xen_platform_register_types) diff --git a/qemu/hw/i386/xen/xen_pvdevice.c b/qemu/hw/i386/xen/xen_pvdevice.c deleted file mode 100644 index c093b3445..000000000 --- a/qemu/hw/i386/xen/xen_pvdevice.c +++ /dev/null @@ -1,137 +0,0 @@ -/* Copyright (c) Citrix Systems Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, - * with or without modification, are permitted provided - * that the following conditions are met: - * - * * Redistributions of source code must retain the above - * copyright notice, this list of conditions and the - * following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the - * following disclaimer in the documentation and/or other - * materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND - * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "qemu/osdep.h" -#include "qapi/error.h" -#include "hw/hw.h" -#include "hw/pci/pci.h" -#include "trace.h" - -#define TYPE_XEN_PV_DEVICE "xen-pvdevice" - -#define XEN_PV_DEVICE(obj) \ - OBJECT_CHECK(XenPVDevice, (obj), TYPE_XEN_PV_DEVICE) - -typedef struct XenPVDevice { - /*< private >*/ - PCIDevice parent_obj; - /*< public >*/ - uint16_t vendor_id; - uint16_t device_id; - uint8_t revision; - uint32_t size; - MemoryRegion mmio; -} XenPVDevice; - -static uint64_t xen_pv_mmio_read(void *opaque, hwaddr addr, - unsigned size) -{ - trace_xen_pv_mmio_read(addr); - - return ~(uint64_t)0; -} - -static void xen_pv_mmio_write(void *opaque, hwaddr addr, - uint64_t val, unsigned size) -{ - trace_xen_pv_mmio_write(addr); -} - -static const MemoryRegionOps xen_pv_mmio_ops = { - .read = &xen_pv_mmio_read, - .write = &xen_pv_mmio_write, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - -static void xen_pv_realize(PCIDevice *pci_dev, Error **errp) -{ - XenPVDevice *d = XEN_PV_DEVICE(pci_dev); - uint8_t *pci_conf; - - /* device-id property must always be supplied */ - if (d->device_id == 0xffff) { - error_setg(errp, "Device ID invalid, it must always be supplied"); - return; - } - - pci_conf = pci_dev->config; - - pci_set_word(pci_conf + PCI_VENDOR_ID, d->vendor_id); - pci_set_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID, d->vendor_id); - pci_set_word(pci_conf + PCI_DEVICE_ID, d->device_id); - pci_set_word(pci_conf + PCI_SUBSYSTEM_ID, d->device_id); - pci_set_byte(pci_conf + PCI_REVISION_ID, d->revision); - - pci_set_word(pci_conf + PCI_COMMAND, PCI_COMMAND_MEMORY); - - pci_config_set_prog_interface(pci_conf, 0); - - pci_conf[PCI_INTERRUPT_PIN] = 1; - - memory_region_init_io(&d->mmio, NULL, &xen_pv_mmio_ops, d, - "mmio", d->size); - - pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_MEM_PREFETCH, - &d->mmio); -} - -static Property xen_pv_props[] = { - DEFINE_PROP_UINT16("vendor-id", XenPVDevice, vendor_id, PCI_VENDOR_ID_XEN), - DEFINE_PROP_UINT16("device-id", XenPVDevice, device_id, 0xffff), - DEFINE_PROP_UINT8("revision", XenPVDevice, revision, 0x01), - DEFINE_PROP_UINT32("size", XenPVDevice, size, 0x400000), - DEFINE_PROP_END_OF_LIST() -}; - -static void xen_pv_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - - k->realize = xen_pv_realize; - k->class_id = PCI_CLASS_SYSTEM_OTHER; - dc->desc = "Xen PV Device"; - dc->props = xen_pv_props; -} - -static const TypeInfo xen_pv_type_info = { - .name = TYPE_XEN_PV_DEVICE, - .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(XenPVDevice), - .class_init = xen_pv_class_init, -}; - -static void xen_pv_register_types(void) -{ - type_register_static(&xen_pv_type_info); -} - -type_init(xen_pv_register_types) |