diff options
Diffstat (limited to 'qemu/hw/i386')
33 files changed, 25858 insertions, 0 deletions
diff --git a/qemu/hw/i386/Makefile.objs b/qemu/hw/i386/Makefile.objs new file mode 100644 index 000000000..bd4f147f9 --- /dev/null +++ b/qemu/hw/i386/Makefile.objs @@ -0,0 +1,33 @@ +obj-$(CONFIG_KVM) += kvm/ +obj-y += multiboot.o smbios.o +obj-y += pc.o pc_piix.o pc_q35.o +obj-y += pc_sysfw.o +obj-y += intel_iommu.o +obj-$(CONFIG_XEN) += ../xenpv/ xen/ + +obj-y += kvmvapic.o +obj-y += acpi-build.o +hw/i386/acpi-build.o: hw/i386/acpi-build.c \ + hw/i386/acpi-dsdt.hex hw/i386/q35-acpi-dsdt.hex + +iasl-option=$(shell if test -z "`$(1) $(2) 2>&1 > /dev/null`" \ + ; then echo "$(2)"; else echo "$(3)"; fi ;) + +ifdef IASL +#IASL Present. Generate hex files from .dsl +hw/i386/%.hex: $(SRC_PATH)/hw/i386/%.dsl $(SRC_PATH)/scripts/acpi_extract_preprocess.py $(SRC_PATH)/scripts/acpi_extract.py + $(call quiet-command, $(CPP) -x c -P $(QEMU_DGFLAGS) $(QEMU_INCLUDES) $< -o $*.dsl.i.orig, " CPP $(TARGET_DIR)$*.dsl.i.orig") + $(call quiet-command, $(PYTHON) $(SRC_PATH)/scripts/acpi_extract_preprocess.py $*.dsl.i.orig > $*.dsl.i, " ACPI_PREPROCESS $(TARGET_DIR)$*.dsl.i") + $(call quiet-command, $(IASL) $(call iasl-option,$(IASL),-Pn,) -vs -l -tc -p $* $*.dsl.i $(if $(V), , > /dev/null) 2>&1 ," IASL $(TARGET_DIR)$*.dsl.i") + $(call quiet-command, $(PYTHON) $(SRC_PATH)/scripts/acpi_extract.py $*.lst > $*.off, " ACPI_EXTRACT $(TARGET_DIR)$*.off") + $(call quiet-command, cat $*.off > $@, " CAT $(TARGET_DIR)$@") +else +#IASL Not present. Restore pre-generated hex files. +hw/i386/%.hex: $(SRC_PATH)/hw/i386/%.hex.generated + $(call quiet-command, cp -f $< $@, " CP $(TARGET_DIR)$@") +endif + +.PHONY: cleanhex +cleanhex: + rm -f hw/i386/*hex +clean: cleanhex diff --git a/qemu/hw/i386/acpi-build.c b/qemu/hw/i386/acpi-build.c new file mode 100644 index 000000000..46eddb8e4 --- /dev/null +++ b/qemu/hw/i386/acpi-build.c @@ -0,0 +1,1939 @@ +/* Support for generating ACPI tables and passing them to Guests + * + * Copyright (C) 2008-2010 Kevin O'Connor <kevin@koconnor.net> + * Copyright (C) 2006 Fabrice Bellard + * Copyright (C) 2013 Red Hat Inc + * + * Author: Michael S. Tsirkin <mst@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "acpi-build.h" +#include <stddef.h> +#include <glib.h> +#include "qemu-common.h" +#include "qemu/bitmap.h" +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "hw/pci/pci.h" +#include "qom/cpu.h" +#include "hw/i386/pc.h" +#include "target-i386/cpu.h" +#include "hw/timer/hpet.h" +#include "hw/acpi/acpi-defs.h" +#include "hw/acpi/acpi.h" +#include "hw/nvram/fw_cfg.h" +#include "hw/acpi/bios-linker-loader.h" +#include "hw/loader.h" +#include "hw/isa/isa.h" +#include "hw/acpi/memory_hotplug.h" +#include "sysemu/tpm.h" +#include "hw/acpi/tpm.h" +#include "sysemu/tpm_backend.h" + +/* Supported chipsets: */ +#include "hw/acpi/piix4.h" +#include "hw/acpi/pcihp.h" +#include "hw/i386/ich9.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci-host/q35.h" +#include "hw/i386/intel_iommu.h" + +#include "hw/i386/q35-acpi-dsdt.hex" +#include "hw/i386/acpi-dsdt.hex" + +#include "hw/acpi/aml-build.h" + +#include "qapi/qmp/qint.h" +#include "qom/qom-qobject.h" + +/* These are used to size the ACPI tables for -M pc-i440fx-1.7 and + * -M pc-i440fx-2.0. Even if the actual amount of AML generated grows + * a little bit, there should be plenty of free space since the DSDT + * shrunk by ~1.5k between QEMU 2.0 and QEMU 2.1. + */ +#define ACPI_BUILD_LEGACY_CPU_AML_SIZE 97 +#define ACPI_BUILD_ALIGN_SIZE 0x1000 + +#define ACPI_BUILD_TABLE_SIZE 0x20000 + +/* #define DEBUG_ACPI_BUILD */ +#ifdef DEBUG_ACPI_BUILD +#define ACPI_BUILD_DPRINTF(fmt, ...) \ + do {printf("ACPI_BUILD: " fmt, ## __VA_ARGS__); } while (0) +#else +#define ACPI_BUILD_DPRINTF(fmt, ...) +#endif + +typedef struct AcpiCpuInfo { + DECLARE_BITMAP(found_cpus, ACPI_CPU_HOTPLUG_ID_LIMIT); +} AcpiCpuInfo; + +typedef struct AcpiMcfgInfo { + uint64_t mcfg_base; + uint32_t mcfg_size; +} AcpiMcfgInfo; + +typedef struct AcpiPmInfo { + bool s3_disabled; + bool s4_disabled; + bool pcihp_bridge_en; + uint8_t s4_val; + uint16_t sci_int; + uint8_t acpi_enable_cmd; + uint8_t acpi_disable_cmd; + uint32_t gpe0_blk; + uint32_t gpe0_blk_len; + uint32_t io_base; + uint16_t cpu_hp_io_base; + uint16_t cpu_hp_io_len; + uint16_t mem_hp_io_base; + uint16_t mem_hp_io_len; + uint16_t pcihp_io_base; + uint16_t pcihp_io_len; +} AcpiPmInfo; + +typedef struct AcpiMiscInfo { + bool has_hpet; + TPMVersion tpm_version; + const unsigned char *dsdt_code; + unsigned dsdt_size; + uint16_t pvpanic_port; + uint16_t applesmc_io_base; +} AcpiMiscInfo; + +typedef struct AcpiBuildPciBusHotplugState { + GArray *device_table; + GArray *notify_table; + struct AcpiBuildPciBusHotplugState *parent; + bool pcihp_bridge_en; +} AcpiBuildPciBusHotplugState; + +static void acpi_get_dsdt(AcpiMiscInfo *info) +{ + Object *piix = piix4_pm_find(); + Object *lpc = ich9_lpc_find(); + assert(!!piix != !!lpc); + + if (piix) { + info->dsdt_code = AcpiDsdtAmlCode; + info->dsdt_size = sizeof AcpiDsdtAmlCode; + } + if (lpc) { + info->dsdt_code = Q35AcpiDsdtAmlCode; + info->dsdt_size = sizeof Q35AcpiDsdtAmlCode; + } +} + +static +int acpi_add_cpu_info(Object *o, void *opaque) +{ + AcpiCpuInfo *cpu = opaque; + uint64_t apic_id; + + if (object_dynamic_cast(o, TYPE_CPU)) { + apic_id = object_property_get_int(o, "apic-id", NULL); + assert(apic_id < ACPI_CPU_HOTPLUG_ID_LIMIT); + + set_bit(apic_id, cpu->found_cpus); + } + + object_child_foreach(o, acpi_add_cpu_info, opaque); + return 0; +} + +static void acpi_get_cpu_info(AcpiCpuInfo *cpu) +{ + Object *root = object_get_root(); + + memset(cpu->found_cpus, 0, sizeof cpu->found_cpus); + object_child_foreach(root, acpi_add_cpu_info, cpu); +} + +static void acpi_get_pm_info(AcpiPmInfo *pm) +{ + Object *piix = piix4_pm_find(); + Object *lpc = ich9_lpc_find(); + Object *obj = NULL; + QObject *o; + + pm->pcihp_io_base = 0; + pm->pcihp_io_len = 0; + if (piix) { + obj = piix; + pm->cpu_hp_io_base = PIIX4_CPU_HOTPLUG_IO_BASE; + pm->pcihp_io_base = + object_property_get_int(obj, ACPI_PCIHP_IO_BASE_PROP, NULL); + pm->pcihp_io_len = + object_property_get_int(obj, ACPI_PCIHP_IO_LEN_PROP, NULL); + } + if (lpc) { + obj = lpc; + pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; + } + assert(obj); + + pm->cpu_hp_io_len = ACPI_GPE_PROC_LEN; + pm->mem_hp_io_base = ACPI_MEMORY_HOTPLUG_BASE; + pm->mem_hp_io_len = ACPI_MEMORY_HOTPLUG_IO_LEN; + + /* Fill in optional s3/s4 related properties */ + o = object_property_get_qobject(obj, ACPI_PM_PROP_S3_DISABLED, NULL); + if (o) { + pm->s3_disabled = qint_get_int(qobject_to_qint(o)); + } else { + pm->s3_disabled = false; + } + qobject_decref(o); + o = object_property_get_qobject(obj, ACPI_PM_PROP_S4_DISABLED, NULL); + if (o) { + pm->s4_disabled = qint_get_int(qobject_to_qint(o)); + } else { + pm->s4_disabled = false; + } + qobject_decref(o); + o = object_property_get_qobject(obj, ACPI_PM_PROP_S4_VAL, NULL); + if (o) { + pm->s4_val = qint_get_int(qobject_to_qint(o)); + } else { + pm->s4_val = false; + } + qobject_decref(o); + + /* Fill in mandatory properties */ + pm->sci_int = object_property_get_int(obj, ACPI_PM_PROP_SCI_INT, NULL); + + pm->acpi_enable_cmd = object_property_get_int(obj, + ACPI_PM_PROP_ACPI_ENABLE_CMD, + NULL); + pm->acpi_disable_cmd = object_property_get_int(obj, + ACPI_PM_PROP_ACPI_DISABLE_CMD, + NULL); + pm->io_base = object_property_get_int(obj, ACPI_PM_PROP_PM_IO_BASE, + NULL); + pm->gpe0_blk = object_property_get_int(obj, ACPI_PM_PROP_GPE0_BLK, + NULL); + pm->gpe0_blk_len = object_property_get_int(obj, ACPI_PM_PROP_GPE0_BLK_LEN, + NULL); + pm->pcihp_bridge_en = + object_property_get_bool(obj, "acpi-pci-hotplug-with-bridge-support", + NULL); +} + +static void acpi_get_misc_info(AcpiMiscInfo *info) +{ + info->has_hpet = hpet_find(); + info->tpm_version = tpm_get_version(); + info->pvpanic_port = pvpanic_port(); + info->applesmc_io_base = applesmc_port(); +} + +/* + * Because of the PXB hosts we cannot simply query TYPE_PCI_HOST_BRIDGE. + * On i386 arch we only have two pci hosts, so we can look only for them. + */ +static Object *acpi_get_i386_pci_host(void) +{ + PCIHostState *host; + + host = OBJECT_CHECK(PCIHostState, + object_resolve_path("/machine/i440fx", NULL), + TYPE_PCI_HOST_BRIDGE); + if (!host) { + host = OBJECT_CHECK(PCIHostState, + object_resolve_path("/machine/q35", NULL), + TYPE_PCI_HOST_BRIDGE); + } + + return OBJECT(host); +} + +static void acpi_get_pci_info(PcPciInfo *info) +{ + Object *pci_host; + + + pci_host = acpi_get_i386_pci_host(); + g_assert(pci_host); + + info->w32.begin = object_property_get_int(pci_host, + PCI_HOST_PROP_PCI_HOLE_START, + NULL); + info->w32.end = object_property_get_int(pci_host, + PCI_HOST_PROP_PCI_HOLE_END, + NULL); + info->w64.begin = object_property_get_int(pci_host, + PCI_HOST_PROP_PCI_HOLE64_START, + NULL); + info->w64.end = object_property_get_int(pci_host, + PCI_HOST_PROP_PCI_HOLE64_END, + NULL); +} + +#define ACPI_PORT_SMI_CMD 0x00b2 /* TODO: this is APM_CNT_IOPORT */ + +static void acpi_align_size(GArray *blob, unsigned align) +{ + /* Align size to multiple of given size. This reduces the chance + * we need to change size in the future (breaking cross version migration). + */ + g_array_set_size(blob, ROUND_UP(acpi_data_len(blob), align)); +} + +/* FACS */ +static void +build_facs(GArray *table_data, GArray *linker, PcGuestInfo *guest_info) +{ + AcpiFacsDescriptorRev1 *facs = acpi_data_push(table_data, sizeof *facs); + memcpy(&facs->signature, "FACS", 4); + facs->length = cpu_to_le32(sizeof(*facs)); +} + +/* Load chipset information in FADT */ +static void fadt_setup(AcpiFadtDescriptorRev1 *fadt, AcpiPmInfo *pm) +{ + fadt->model = 1; + fadt->reserved1 = 0; + fadt->sci_int = cpu_to_le16(pm->sci_int); + fadt->smi_cmd = cpu_to_le32(ACPI_PORT_SMI_CMD); + fadt->acpi_enable = pm->acpi_enable_cmd; + fadt->acpi_disable = pm->acpi_disable_cmd; + /* EVT, CNT, TMR offset matches hw/acpi/core.c */ + fadt->pm1a_evt_blk = cpu_to_le32(pm->io_base); + fadt->pm1a_cnt_blk = cpu_to_le32(pm->io_base + 0x04); + fadt->pm_tmr_blk = cpu_to_le32(pm->io_base + 0x08); + fadt->gpe0_blk = cpu_to_le32(pm->gpe0_blk); + /* EVT, CNT, TMR length matches hw/acpi/core.c */ + fadt->pm1_evt_len = 4; + fadt->pm1_cnt_len = 2; + fadt->pm_tmr_len = 4; + fadt->gpe0_blk_len = pm->gpe0_blk_len; + fadt->plvl2_lat = cpu_to_le16(0xfff); /* C2 state not supported */ + fadt->plvl3_lat = cpu_to_le16(0xfff); /* C3 state not supported */ + fadt->flags = cpu_to_le32((1 << ACPI_FADT_F_WBINVD) | + (1 << ACPI_FADT_F_PROC_C1) | + (1 << ACPI_FADT_F_SLP_BUTTON) | + (1 << ACPI_FADT_F_RTC_S4)); + fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_USE_PLATFORM_CLOCK); + /* APIC destination mode ("Flat Logical") has an upper limit of 8 CPUs + * For more than 8 CPUs, "Clustered Logical" mode has to be used + */ + if (max_cpus > 8) { + fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_FORCE_APIC_CLUSTER_MODEL); + } +} + + +/* FADT */ +static void +build_fadt(GArray *table_data, GArray *linker, AcpiPmInfo *pm, + unsigned facs, unsigned dsdt) +{ + AcpiFadtDescriptorRev1 *fadt = acpi_data_push(table_data, sizeof(*fadt)); + + fadt->firmware_ctrl = cpu_to_le32(facs); + /* FACS address to be filled by Guest linker */ + bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, + ACPI_BUILD_TABLE_FILE, + table_data, &fadt->firmware_ctrl, + sizeof fadt->firmware_ctrl); + + fadt->dsdt = cpu_to_le32(dsdt); + /* DSDT address to be filled by Guest linker */ + bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, + ACPI_BUILD_TABLE_FILE, + table_data, &fadt->dsdt, + sizeof fadt->dsdt); + + fadt_setup(fadt, pm); + + build_header(linker, table_data, + (void *)fadt, "FACP", sizeof(*fadt), 1); +} + +static void +build_madt(GArray *table_data, GArray *linker, AcpiCpuInfo *cpu, + PcGuestInfo *guest_info) +{ + int madt_start = table_data->len; + + AcpiMultipleApicTable *madt; + AcpiMadtIoApic *io_apic; + AcpiMadtIntsrcovr *intsrcovr; + AcpiMadtLocalNmi *local_nmi; + int i; + + madt = acpi_data_push(table_data, sizeof *madt); + madt->local_apic_address = cpu_to_le32(APIC_DEFAULT_ADDRESS); + madt->flags = cpu_to_le32(1); + + for (i = 0; i < guest_info->apic_id_limit; i++) { + AcpiMadtProcessorApic *apic = acpi_data_push(table_data, sizeof *apic); + apic->type = ACPI_APIC_PROCESSOR; + apic->length = sizeof(*apic); + apic->processor_id = i; + apic->local_apic_id = i; + if (test_bit(i, cpu->found_cpus)) { + apic->flags = cpu_to_le32(1); + } else { + apic->flags = cpu_to_le32(0); + } + } + io_apic = acpi_data_push(table_data, sizeof *io_apic); + io_apic->type = ACPI_APIC_IO; + io_apic->length = sizeof(*io_apic); +#define ACPI_BUILD_IOAPIC_ID 0x0 + io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID; + io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS); + io_apic->interrupt = cpu_to_le32(0); + + if (guest_info->apic_xrupt_override) { + intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr); + intsrcovr->type = ACPI_APIC_XRUPT_OVERRIDE; + intsrcovr->length = sizeof(*intsrcovr); + intsrcovr->source = 0; + intsrcovr->gsi = cpu_to_le32(2); + intsrcovr->flags = cpu_to_le16(0); /* conforms to bus specifications */ + } + for (i = 1; i < 16; i++) { +#define ACPI_BUILD_PCI_IRQS ((1<<5) | (1<<9) | (1<<10) | (1<<11)) + if (!(ACPI_BUILD_PCI_IRQS & (1 << i))) { + /* No need for a INT source override structure. */ + continue; + } + intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr); + intsrcovr->type = ACPI_APIC_XRUPT_OVERRIDE; + intsrcovr->length = sizeof(*intsrcovr); + intsrcovr->source = i; + intsrcovr->gsi = cpu_to_le32(i); + intsrcovr->flags = cpu_to_le16(0xd); /* active high, level triggered */ + } + + local_nmi = acpi_data_push(table_data, sizeof *local_nmi); + local_nmi->type = ACPI_APIC_LOCAL_NMI; + local_nmi->length = sizeof(*local_nmi); + local_nmi->processor_id = 0xff; /* all processors */ + local_nmi->flags = cpu_to_le16(0); + local_nmi->lint = 1; /* ACPI_LINT1 */ + + build_header(linker, table_data, + (void *)(table_data->data + madt_start), "APIC", + table_data->len - madt_start, 1); +} + +/* Assign BSEL property to all buses. In the future, this can be changed + * to only assign to buses that support hotplug. + */ +static void *acpi_set_bsel(PCIBus *bus, void *opaque) +{ + unsigned *bsel_alloc = opaque; + unsigned *bus_bsel; + + if (qbus_is_hotpluggable(BUS(bus))) { + bus_bsel = g_malloc(sizeof *bus_bsel); + + *bus_bsel = (*bsel_alloc)++; + object_property_add_uint32_ptr(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, + bus_bsel, NULL); + } + + return bsel_alloc; +} + +static void acpi_set_pci_info(void) +{ + PCIBus *bus = find_i440fx(); /* TODO: Q35 support */ + unsigned bsel_alloc = 0; + + if (bus) { + /* Scan all PCI buses. Set property to enable acpi based hotplug. */ + pci_for_each_bus_depth_first(bus, acpi_set_bsel, NULL, &bsel_alloc); + } +} + +static void build_append_pcihp_notify_entry(Aml *method, int slot) +{ + Aml *if_ctx; + int32_t devfn = PCI_DEVFN(slot, 0); + + if_ctx = aml_if(aml_and(aml_arg(0), aml_int(0x1U << slot))); + aml_append(if_ctx, aml_notify(aml_name("S%.02X", devfn), aml_arg(1))); + aml_append(method, if_ctx); +} + +static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, + bool pcihp_bridge_en) +{ + Aml *dev, *notify_method, *method; + QObject *bsel; + PCIBus *sec; + int i; + + bsel = object_property_get_qobject(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, NULL); + if (bsel) { + int64_t bsel_val = qint_get_int(qobject_to_qint(bsel)); + + aml_append(parent_scope, aml_name_decl("BSEL", aml_int(bsel_val))); + notify_method = aml_method("DVNT", 2); + } + + for (i = 0; i < ARRAY_SIZE(bus->devices); i += PCI_FUNC_MAX) { + DeviceClass *dc; + PCIDeviceClass *pc; + PCIDevice *pdev = bus->devices[i]; + int slot = PCI_SLOT(i); + bool hotplug_enabled_dev; + bool bridge_in_acpi; + + if (!pdev) { + if (bsel) { /* add hotplug slots for non present devices */ + dev = aml_device("S%.02X", PCI_DEVFN(slot, 0)); + aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); + aml_append(dev, aml_name_decl("_ADR", aml_int(slot << 16))); + method = aml_method("_EJ0", 1); + aml_append(method, + aml_call2("PCEJ", aml_name("BSEL"), aml_name("_SUN")) + ); + aml_append(dev, method); + aml_append(parent_scope, dev); + + build_append_pcihp_notify_entry(notify_method, slot); + } + continue; + } + + pc = PCI_DEVICE_GET_CLASS(pdev); + dc = DEVICE_GET_CLASS(pdev); + + /* When hotplug for bridges is enabled, bridges are + * described in ACPI separately (see build_pci_bus_end). + * In this case they aren't themselves hot-pluggable. + * Hotplugged bridges *are* hot-pluggable. + */ + bridge_in_acpi = pc->is_bridge && pcihp_bridge_en && + !DEVICE(pdev)->hotplugged; + + hotplug_enabled_dev = bsel && dc->hotpluggable && !bridge_in_acpi; + + if (pc->class_id == PCI_CLASS_BRIDGE_ISA) { + continue; + } + + /* start to compose PCI slot descriptor */ + dev = aml_device("S%.02X", PCI_DEVFN(slot, 0)); + aml_append(dev, aml_name_decl("_ADR", aml_int(slot << 16))); + + if (pc->class_id == PCI_CLASS_DISPLAY_VGA) { + /* add VGA specific AML methods */ + int s3d; + + if (object_dynamic_cast(OBJECT(pdev), "qxl-vga")) { + s3d = 3; + } else { + s3d = 0; + } + + method = aml_method("_S1D", 0); + aml_append(method, aml_return(aml_int(0))); + aml_append(dev, method); + + method = aml_method("_S2D", 0); + aml_append(method, aml_return(aml_int(0))); + aml_append(dev, method); + + method = aml_method("_S3D", 0); + aml_append(method, aml_return(aml_int(s3d))); + aml_append(dev, method); + } else if (hotplug_enabled_dev) { + /* add _SUN/_EJ0 to make slot hotpluggable */ + aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); + + method = aml_method("_EJ0", 1); + aml_append(method, + aml_call2("PCEJ", aml_name("BSEL"), aml_name("_SUN")) + ); + aml_append(dev, method); + + if (bsel) { + build_append_pcihp_notify_entry(notify_method, slot); + } + } else if (bridge_in_acpi) { + /* + * device is coldplugged bridge, + * add child device descriptions into its scope + */ + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev)); + + build_append_pci_bus_devices(dev, sec_bus, pcihp_bridge_en); + } + /* slot descriptor has been composed, add it into parent context */ + aml_append(parent_scope, dev); + } + + if (bsel) { + aml_append(parent_scope, notify_method); + } + + /* Append PCNT method to notify about events on local and child buses. + * Add unconditionally for root since DSDT expects it. + */ + method = aml_method("PCNT", 0); + + /* If bus supports hotplug select it and notify about local events */ + if (bsel) { + int64_t bsel_val = qint_get_int(qobject_to_qint(bsel)); + aml_append(method, aml_store(aml_int(bsel_val), aml_name("BNUM"))); + aml_append(method, + aml_call2("DVNT", aml_name("PCIU"), aml_int(1) /* Device Check */) + ); + aml_append(method, + aml_call2("DVNT", aml_name("PCID"), aml_int(3)/* Eject Request */) + ); + } + + /* Notify about child bus events in any case */ + if (pcihp_bridge_en) { + QLIST_FOREACH(sec, &bus->child, sibling) { + int32_t devfn = sec->parent_dev->devfn; + + aml_append(method, aml_name("^S%.02X.PCNT", devfn)); + } + } + aml_append(parent_scope, method); + qobject_decref(bsel); +} + +/* + * initialize_route - Initialize the interrupt routing rule + * through a specific LINK: + * if (lnk_idx == idx) + * route using link 'link_name' + */ +static Aml *initialize_route(Aml *route, const char *link_name, + Aml *lnk_idx, int idx) +{ + Aml *if_ctx = aml_if(aml_equal(lnk_idx, aml_int(idx))); + Aml *pkg = aml_package(4); + + aml_append(pkg, aml_int(0)); + aml_append(pkg, aml_int(0)); + aml_append(pkg, aml_name("%s", link_name)); + aml_append(pkg, aml_int(0)); + aml_append(if_ctx, aml_store(pkg, route)); + + return if_ctx; +} + +/* + * build_prt - Define interrupt rounting rules + * + * Returns an array of 128 routes, one for each device, + * based on device location. + * The main goal is to equaly distribute the interrupts + * over the 4 existing ACPI links (works only for i440fx). + * The hash function is (slot + pin) & 3 -> "LNK[D|A|B|C]". + * + */ +static Aml *build_prt(void) +{ + Aml *method, *while_ctx, *pin, *res; + + method = aml_method("_PRT", 0); + res = aml_local(0); + pin = aml_local(1); + aml_append(method, aml_store(aml_package(128), res)); + aml_append(method, aml_store(aml_int(0), pin)); + + /* while (pin < 128) */ + while_ctx = aml_while(aml_lless(pin, aml_int(128))); + { + Aml *slot = aml_local(2); + Aml *lnk_idx = aml_local(3); + Aml *route = aml_local(4); + + /* slot = pin >> 2 */ + aml_append(while_ctx, + aml_store(aml_shiftright(pin, aml_int(2)), slot)); + /* lnk_idx = (slot + pin) & 3 */ + aml_append(while_ctx, + aml_store(aml_and(aml_add(pin, slot), aml_int(3)), lnk_idx)); + + /* route[2] = "LNK[D|A|B|C]", selection based on pin % 3 */ + aml_append(while_ctx, initialize_route(route, "LNKD", lnk_idx, 0)); + aml_append(while_ctx, initialize_route(route, "LNKA", lnk_idx, 1)); + aml_append(while_ctx, initialize_route(route, "LNKB", lnk_idx, 2)); + aml_append(while_ctx, initialize_route(route, "LNKC", lnk_idx, 3)); + + /* route[0] = 0x[slot]FFFF */ + aml_append(while_ctx, + aml_store(aml_or(aml_shiftleft(slot, aml_int(16)), aml_int(0xFFFF)), + aml_index(route, aml_int(0)))); + /* route[1] = pin & 3 */ + aml_append(while_ctx, + aml_store(aml_and(pin, aml_int(3)), aml_index(route, aml_int(1)))); + /* res[pin] = route */ + aml_append(while_ctx, aml_store(route, aml_index(res, pin))); + /* pin++ */ + aml_append(while_ctx, aml_increment(pin)); + } + aml_append(method, while_ctx); + /* return res*/ + aml_append(method, aml_return(res)); + + return method; +} + +typedef struct CrsRangeEntry { + uint64_t base; + uint64_t limit; +} CrsRangeEntry; + +static void crs_range_insert(GPtrArray *ranges, uint64_t base, uint64_t limit) +{ + CrsRangeEntry *entry; + + entry = g_malloc(sizeof(*entry)); + entry->base = base; + entry->limit = limit; + + g_ptr_array_add(ranges, entry); +} + +static void crs_range_free(gpointer data) +{ + CrsRangeEntry *entry = (CrsRangeEntry *)data; + g_free(entry); +} + +static gint crs_range_compare(gconstpointer a, gconstpointer b) +{ + CrsRangeEntry *entry_a = *(CrsRangeEntry **)a; + CrsRangeEntry *entry_b = *(CrsRangeEntry **)b; + + return (int64_t)entry_a->base - (int64_t)entry_b->base; +} + +/* + * crs_replace_with_free_ranges - given the 'used' ranges within [start - end] + * interval, computes the 'free' ranges from the same interval. + * Example: If the input array is { [a1 - a2],[b1 - b2] }, the function + * will return { [base - a1], [a2 - b1], [b2 - limit] }. + */ +static void crs_replace_with_free_ranges(GPtrArray *ranges, + uint64_t start, uint64_t end) +{ + GPtrArray *free_ranges = g_ptr_array_new_with_free_func(crs_range_free); + uint64_t free_base = start; + int i; + + g_ptr_array_sort(ranges, crs_range_compare); + for (i = 0; i < ranges->len; i++) { + CrsRangeEntry *used = g_ptr_array_index(ranges, i); + + if (free_base < used->base) { + crs_range_insert(free_ranges, free_base, used->base - 1); + } + + free_base = used->limit + 1; + } + + if (free_base < end) { + crs_range_insert(free_ranges, free_base, end); + } + + g_ptr_array_set_size(ranges, 0); + for (i = 0; i < free_ranges->len; i++) { + g_ptr_array_add(ranges, g_ptr_array_index(free_ranges, i)); + } + + g_ptr_array_free(free_ranges, false); +} + +static Aml *build_crs(PCIHostState *host, + GPtrArray *io_ranges, GPtrArray *mem_ranges) +{ + Aml *crs = aml_resource_template(); + uint8_t max_bus = pci_bus_num(host->bus); + uint8_t type; + int devfn; + + for (devfn = 0; devfn < ARRAY_SIZE(host->bus->devices); devfn++) { + int i; + uint64_t range_base, range_limit; + PCIDevice *dev = host->bus->devices[devfn]; + + if (!dev) { + continue; + } + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + PCIIORegion *r = &dev->io_regions[i]; + + range_base = r->addr; + range_limit = r->addr + r->size - 1; + + /* + * Work-around for old bioses + * that do not support multiple root buses + */ + if (!range_base || range_base > range_limit) { + continue; + } + + if (r->type & PCI_BASE_ADDRESS_SPACE_IO) { + aml_append(crs, + aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, + AML_POS_DECODE, AML_ENTIRE_RANGE, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(io_ranges, range_base, range_limit); + } else { /* "memory" */ + aml_append(crs, + aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, + AML_MAX_FIXED, AML_NON_CACHEABLE, + AML_READ_WRITE, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(mem_ranges, range_base, range_limit); + } + } + + type = dev->config[PCI_HEADER_TYPE] & ~PCI_HEADER_TYPE_MULTI_FUNCTION; + if (type == PCI_HEADER_TYPE_BRIDGE) { + uint8_t subordinate = dev->config[PCI_SUBORDINATE_BUS]; + if (subordinate > max_bus) { + max_bus = subordinate; + } + + range_base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_IO); + range_limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_IO); + + /* + * Work-around for old bioses + * that do not support multiple root buses + */ + if (range_base && range_base <= range_limit) { + aml_append(crs, + aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, + AML_POS_DECODE, AML_ENTIRE_RANGE, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(io_ranges, range_base, range_limit); + } + + range_base = + pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_MEMORY); + range_limit = + pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_MEMORY); + + /* + * Work-around for old bioses + * that do not support multiple root buses + */ + if (range_base && range_base <= range_limit) { + aml_append(crs, + aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, + AML_MAX_FIXED, AML_NON_CACHEABLE, + AML_READ_WRITE, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(mem_ranges, range_base, range_limit); + } + + range_base = + pci_bridge_get_base(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); + range_limit = + pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); + + /* + * Work-around for old bioses + * that do not support multiple root buses + */ + if (range_base && range_base <= range_limit) { + aml_append(crs, + aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, + AML_MAX_FIXED, AML_NON_CACHEABLE, + AML_READ_WRITE, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(mem_ranges, range_base, range_limit); + } + } + } + + aml_append(crs, + aml_word_bus_number(AML_MIN_FIXED, AML_MAX_FIXED, AML_POS_DECODE, + 0, + pci_bus_num(host->bus), + max_bus, + 0, + max_bus - pci_bus_num(host->bus) + 1)); + + return crs; +} + +static void +build_ssdt(GArray *table_data, GArray *linker, + AcpiCpuInfo *cpu, AcpiPmInfo *pm, AcpiMiscInfo *misc, + PcPciInfo *pci, PcGuestInfo *guest_info) +{ + MachineState *machine = MACHINE(qdev_get_machine()); + uint32_t nr_mem = machine->ram_slots; + unsigned acpi_cpus = guest_info->apic_id_limit; + Aml *ssdt, *sb_scope, *scope, *pkg, *dev, *method, *crs, *field, *ifctx; + PCIBus *bus = NULL; + GPtrArray *io_ranges = g_ptr_array_new_with_free_func(crs_range_free); + GPtrArray *mem_ranges = g_ptr_array_new_with_free_func(crs_range_free); + CrsRangeEntry *entry; + int root_bus_limit = 0xFF; + int i; + + ssdt = init_aml_allocator(); + /* The current AML generator can cover the APIC ID range [0..255], + * inclusive, for VCPU hotplug. */ + QEMU_BUILD_BUG_ON(ACPI_CPU_HOTPLUG_ID_LIMIT > 256); + g_assert(acpi_cpus <= ACPI_CPU_HOTPLUG_ID_LIMIT); + + /* Reserve space for header */ + acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader)); + + /* Extra PCI root buses are implemented only for i440fx */ + bus = find_i440fx(); + if (bus) { + QLIST_FOREACH(bus, &bus->child, sibling) { + uint8_t bus_num = pci_bus_num(bus); + uint8_t numa_node = pci_bus_numa_node(bus); + + /* look only for expander root buses */ + if (!pci_bus_is_root(bus)) { + continue; + } + + if (bus_num < root_bus_limit) { + root_bus_limit = bus_num - 1; + } + + scope = aml_scope("\\_SB"); + dev = aml_device("PC%.02X", bus_num); + aml_append(dev, aml_name_decl("_UID", aml_int(bus_num))); + aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); + aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num))); + + if (numa_node != NUMA_NODE_UNASSIGNED) { + aml_append(dev, aml_name_decl("_PXM", aml_int(numa_node))); + } + + aml_append(dev, build_prt()); + crs = build_crs(PCI_HOST_BRIDGE(BUS(bus)->parent), + io_ranges, mem_ranges); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); + aml_append(ssdt, scope); + } + } + + scope = aml_scope("\\_SB.PCI0"); + /* build PCI0._CRS */ + crs = aml_resource_template(); + aml_append(crs, + aml_word_bus_number(AML_MIN_FIXED, AML_MAX_FIXED, AML_POS_DECODE, + 0x0000, 0x0, root_bus_limit, + 0x0000, root_bus_limit + 1)); + aml_append(crs, aml_io(AML_DECODE16, 0x0CF8, 0x0CF8, 0x01, 0x08)); + + aml_append(crs, + aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, + AML_POS_DECODE, AML_ENTIRE_RANGE, + 0x0000, 0x0000, 0x0CF7, 0x0000, 0x0CF8)); + + crs_replace_with_free_ranges(io_ranges, 0x0D00, 0xFFFF); + for (i = 0; i < io_ranges->len; i++) { + entry = g_ptr_array_index(io_ranges, i); + aml_append(crs, + aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, + AML_POS_DECODE, AML_ENTIRE_RANGE, + 0x0000, entry->base, entry->limit, + 0x0000, entry->limit - entry->base + 1)); + } + + aml_append(crs, + aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, + AML_CACHEABLE, AML_READ_WRITE, + 0, 0x000A0000, 0x000BFFFF, 0, 0x00020000)); + + crs_replace_with_free_ranges(mem_ranges, pci->w32.begin, pci->w32.end - 1); + for (i = 0; i < mem_ranges->len; i++) { + entry = g_ptr_array_index(mem_ranges, i); + aml_append(crs, + aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, + AML_NON_CACHEABLE, AML_READ_WRITE, + 0, entry->base, entry->limit, + 0, entry->limit - entry->base + 1)); + } + + if (pci->w64.begin) { + aml_append(crs, + aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, + AML_CACHEABLE, AML_READ_WRITE, + 0, pci->w64.begin, pci->w64.end - 1, 0, + pci->w64.end - pci->w64.begin)); + } + aml_append(scope, aml_name_decl("_CRS", crs)); + + /* reserve GPE0 block resources */ + dev = aml_device("GPE0"); + aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A06"))); + aml_append(dev, aml_name_decl("_UID", aml_string("GPE0 resources"))); + /* device present, functioning, decoding, not shown in UI */ + aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); + crs = aml_resource_template(); + aml_append(crs, + aml_io(AML_DECODE16, pm->gpe0_blk, pm->gpe0_blk, 1, pm->gpe0_blk_len) + ); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); + + g_ptr_array_free(io_ranges, true); + g_ptr_array_free(mem_ranges, true); + + /* reserve PCIHP resources */ + if (pm->pcihp_io_len) { + dev = aml_device("PHPR"); + aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A06"))); + aml_append(dev, + aml_name_decl("_UID", aml_string("PCI Hotplug resources"))); + /* device present, functioning, decoding, not shown in UI */ + aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); + crs = aml_resource_template(); + aml_append(crs, + aml_io(AML_DECODE16, pm->pcihp_io_base, pm->pcihp_io_base, 1, + pm->pcihp_io_len) + ); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); + } + aml_append(ssdt, scope); + + /* create S3_ / S4_ / S5_ packages if necessary */ + scope = aml_scope("\\"); + if (!pm->s3_disabled) { + pkg = aml_package(4); + aml_append(pkg, aml_int(1)); /* PM1a_CNT.SLP_TYP */ + aml_append(pkg, aml_int(1)); /* PM1b_CNT.SLP_TYP, FIXME: not impl. */ + aml_append(pkg, aml_int(0)); /* reserved */ + aml_append(pkg, aml_int(0)); /* reserved */ + aml_append(scope, aml_name_decl("_S3", pkg)); + } + + if (!pm->s4_disabled) { + pkg = aml_package(4); + aml_append(pkg, aml_int(pm->s4_val)); /* PM1a_CNT.SLP_TYP */ + /* PM1b_CNT.SLP_TYP, FIXME: not impl. */ + aml_append(pkg, aml_int(pm->s4_val)); + aml_append(pkg, aml_int(0)); /* reserved */ + aml_append(pkg, aml_int(0)); /* reserved */ + aml_append(scope, aml_name_decl("_S4", pkg)); + } + + pkg = aml_package(4); + aml_append(pkg, aml_int(0)); /* PM1a_CNT.SLP_TYP */ + aml_append(pkg, aml_int(0)); /* PM1b_CNT.SLP_TYP not impl. */ + aml_append(pkg, aml_int(0)); /* reserved */ + aml_append(pkg, aml_int(0)); /* reserved */ + aml_append(scope, aml_name_decl("_S5", pkg)); + aml_append(ssdt, scope); + + if (misc->applesmc_io_base) { + scope = aml_scope("\\_SB.PCI0.ISA"); + dev = aml_device("SMC"); + + aml_append(dev, aml_name_decl("_HID", aml_eisaid("APP0001"))); + /* device present, functioning, decoding, not shown in UI */ + aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); + + crs = aml_resource_template(); + aml_append(crs, + aml_io(AML_DECODE16, misc->applesmc_io_base, misc->applesmc_io_base, + 0x01, APPLESMC_MAX_DATA_LENGTH) + ); + aml_append(crs, aml_irq_no_flags(6)); + aml_append(dev, aml_name_decl("_CRS", crs)); + + aml_append(scope, dev); + aml_append(ssdt, scope); + } + + if (misc->pvpanic_port) { + scope = aml_scope("\\_SB.PCI0.ISA"); + + dev = aml_device("PEVT"); + aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0001"))); + + crs = aml_resource_template(); + aml_append(crs, + aml_io(AML_DECODE16, misc->pvpanic_port, misc->pvpanic_port, 1, 1) + ); + aml_append(dev, aml_name_decl("_CRS", crs)); + + aml_append(dev, aml_operation_region("PEOR", AML_SYSTEM_IO, + misc->pvpanic_port, 1)); + field = aml_field("PEOR", AML_BYTE_ACC, AML_PRESERVE); + aml_append(field, aml_named_field("PEPT", 8)); + aml_append(dev, field); + + /* device present, functioning, decoding, shown in UI */ + aml_append(dev, aml_name_decl("_STA", aml_int(0xF))); + + method = aml_method("RDPT", 0); + aml_append(method, aml_store(aml_name("PEPT"), aml_local(0))); + aml_append(method, aml_return(aml_local(0))); + aml_append(dev, method); + + method = aml_method("WRPT", 1); + aml_append(method, aml_store(aml_arg(0), aml_name("PEPT"))); + aml_append(dev, method); + + aml_append(scope, dev); + aml_append(ssdt, scope); + } + + sb_scope = aml_scope("\\_SB"); + { + /* create PCI0.PRES device and its _CRS to reserve CPU hotplug MMIO */ + dev = aml_device("PCI0." stringify(CPU_HOTPLUG_RESOURCE_DEVICE)); + aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A06"))); + aml_append(dev, + aml_name_decl("_UID", aml_string("CPU Hotplug resources")) + ); + /* device present, functioning, decoding, not shown in UI */ + aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); + crs = aml_resource_template(); + aml_append(crs, + aml_io(AML_DECODE16, pm->cpu_hp_io_base, pm->cpu_hp_io_base, 1, + pm->cpu_hp_io_len) + ); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(sb_scope, dev); + /* declare CPU hotplug MMIO region and PRS field to access it */ + aml_append(sb_scope, aml_operation_region( + "PRST", AML_SYSTEM_IO, pm->cpu_hp_io_base, pm->cpu_hp_io_len)); + field = aml_field("PRST", AML_BYTE_ACC, AML_PRESERVE); + aml_append(field, aml_named_field("PRS", 256)); + aml_append(sb_scope, field); + + /* build Processor object for each processor */ + for (i = 0; i < acpi_cpus; i++) { + dev = aml_processor(i, 0, 0, "CP%.02X", i); + + method = aml_method("_MAT", 0); + aml_append(method, aml_return(aml_call1("CPMA", aml_int(i)))); + aml_append(dev, method); + + method = aml_method("_STA", 0); + aml_append(method, aml_return(aml_call1("CPST", aml_int(i)))); + aml_append(dev, method); + + method = aml_method("_EJ0", 1); + aml_append(method, + aml_return(aml_call2("CPEJ", aml_int(i), aml_arg(0))) + ); + aml_append(dev, method); + + aml_append(sb_scope, dev); + } + + /* build this code: + * Method(NTFY, 2) {If (LEqual(Arg0, 0x00)) {Notify(CP00, Arg1)} ...} + */ + /* Arg0 = Processor ID = APIC ID */ + method = aml_method("NTFY", 2); + for (i = 0; i < acpi_cpus; i++) { + ifctx = aml_if(aml_equal(aml_arg(0), aml_int(i))); + aml_append(ifctx, + aml_notify(aml_name("CP%.02X", i), aml_arg(1)) + ); + aml_append(method, ifctx); + } + aml_append(sb_scope, method); + + /* build "Name(CPON, Package() { One, One, ..., Zero, Zero, ... })" + * + * Note: The ability to create variable-sized packages was first + * introduced in ACPI 2.0. ACPI 1.0 only allowed fixed-size packages + * ith up to 255 elements. Windows guests up to win2k8 fail when + * VarPackageOp is used. + */ + pkg = acpi_cpus <= 255 ? aml_package(acpi_cpus) : + aml_varpackage(acpi_cpus); + + for (i = 0; i < acpi_cpus; i++) { + uint8_t b = test_bit(i, cpu->found_cpus) ? 0x01 : 0x00; + aml_append(pkg, aml_int(b)); + } + aml_append(sb_scope, aml_name_decl("CPON", pkg)); + + /* build memory devices */ + assert(nr_mem <= ACPI_MAX_RAM_SLOTS); + scope = aml_scope("\\_SB.PCI0." stringify(MEMORY_HOTPLUG_DEVICE)); + aml_append(scope, + aml_name_decl(stringify(MEMORY_SLOTS_NUMBER), aml_int(nr_mem)) + ); + + crs = aml_resource_template(); + aml_append(crs, + aml_io(AML_DECODE16, pm->mem_hp_io_base, pm->mem_hp_io_base, 0, + pm->mem_hp_io_len) + ); + aml_append(scope, aml_name_decl("_CRS", crs)); + + aml_append(scope, aml_operation_region( + stringify(MEMORY_HOTPLUG_IO_REGION), AML_SYSTEM_IO, + pm->mem_hp_io_base, pm->mem_hp_io_len) + ); + + field = aml_field(stringify(MEMORY_HOTPLUG_IO_REGION), AML_DWORD_ACC, + AML_PRESERVE); + aml_append(field, /* read only */ + aml_named_field(stringify(MEMORY_SLOT_ADDR_LOW), 32)); + aml_append(field, /* read only */ + aml_named_field(stringify(MEMORY_SLOT_ADDR_HIGH), 32)); + aml_append(field, /* read only */ + aml_named_field(stringify(MEMORY_SLOT_SIZE_LOW), 32)); + aml_append(field, /* read only */ + aml_named_field(stringify(MEMORY_SLOT_SIZE_HIGH), 32)); + aml_append(field, /* read only */ + aml_named_field(stringify(MEMORY_SLOT_PROXIMITY), 32)); + aml_append(scope, field); + + field = aml_field(stringify(MEMORY_HOTPLUG_IO_REGION), AML_BYTE_ACC, + AML_WRITE_AS_ZEROS); + aml_append(field, aml_reserved_field(160 /* bits, Offset(20) */)); + aml_append(field, /* 1 if enabled, read only */ + aml_named_field(stringify(MEMORY_SLOT_ENABLED), 1)); + aml_append(field, + /*(read) 1 if has a insert event. (write) 1 to clear event */ + aml_named_field(stringify(MEMORY_SLOT_INSERT_EVENT), 1)); + aml_append(field, + /* (read) 1 if has a remove event. (write) 1 to clear event */ + aml_named_field(stringify(MEMORY_SLOT_REMOVE_EVENT), 1)); + aml_append(field, + /* initiates device eject, write only */ + aml_named_field(stringify(MEMORY_SLOT_EJECT), 1)); + aml_append(scope, field); + + field = aml_field(stringify(MEMORY_HOTPLUG_IO_REGION), AML_DWORD_ACC, + AML_PRESERVE); + aml_append(field, /* DIMM selector, write only */ + aml_named_field(stringify(MEMORY_SLOT_SLECTOR), 32)); + aml_append(field, /* _OST event code, write only */ + aml_named_field(stringify(MEMORY_SLOT_OST_EVENT), 32)); + aml_append(field, /* _OST status code, write only */ + aml_named_field(stringify(MEMORY_SLOT_OST_STATUS), 32)); + aml_append(scope, field); + + aml_append(sb_scope, scope); + + for (i = 0; i < nr_mem; i++) { + #define BASEPATH "\\_SB.PCI0." stringify(MEMORY_HOTPLUG_DEVICE) "." + const char *s; + + dev = aml_device("MP%02X", i); + aml_append(dev, aml_name_decl("_UID", aml_string("0x%02X", i))); + aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0C80"))); + + method = aml_method("_CRS", 0); + s = BASEPATH stringify(MEMORY_SLOT_CRS_METHOD); + aml_append(method, aml_return(aml_call1(s, aml_name("_UID")))); + aml_append(dev, method); + + method = aml_method("_STA", 0); + s = BASEPATH stringify(MEMORY_SLOT_STATUS_METHOD); + aml_append(method, aml_return(aml_call1(s, aml_name("_UID")))); + aml_append(dev, method); + + method = aml_method("_PXM", 0); + s = BASEPATH stringify(MEMORY_SLOT_PROXIMITY_METHOD); + aml_append(method, aml_return(aml_call1(s, aml_name("_UID")))); + aml_append(dev, method); + + method = aml_method("_OST", 3); + s = BASEPATH stringify(MEMORY_SLOT_OST_METHOD); + aml_append(method, aml_return(aml_call4( + s, aml_name("_UID"), aml_arg(0), aml_arg(1), aml_arg(2) + ))); + aml_append(dev, method); + + method = aml_method("_EJ0", 1); + s = BASEPATH stringify(MEMORY_SLOT_EJECT_METHOD); + aml_append(method, aml_return(aml_call2( + s, aml_name("_UID"), aml_arg(0)))); + aml_append(dev, method); + + aml_append(sb_scope, dev); + } + + /* build Method(MEMORY_SLOT_NOTIFY_METHOD, 2) { + * If (LEqual(Arg0, 0x00)) {Notify(MP00, Arg1)} ... } + */ + method = aml_method(stringify(MEMORY_SLOT_NOTIFY_METHOD), 2); + for (i = 0; i < nr_mem; i++) { + ifctx = aml_if(aml_equal(aml_arg(0), aml_int(i))); + aml_append(ifctx, + aml_notify(aml_name("MP%.02X", i), aml_arg(1)) + ); + aml_append(method, ifctx); + } + aml_append(sb_scope, method); + + { + Object *pci_host; + PCIBus *bus = NULL; + + pci_host = acpi_get_i386_pci_host(); + if (pci_host) { + bus = PCI_HOST_BRIDGE(pci_host)->bus; + } + + if (bus) { + Aml *scope = aml_scope("PCI0"); + /* Scan all PCI buses. Generate tables to support hotplug. */ + build_append_pci_bus_devices(scope, bus, pm->pcihp_bridge_en); + + if (misc->tpm_version != TPM_VERSION_UNSPEC) { + dev = aml_device("ISA.TPM"); + aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0C31"))); + aml_append(dev, aml_name_decl("_STA", aml_int(0xF))); + crs = aml_resource_template(); + aml_append(crs, aml_memory32_fixed(TPM_TIS_ADDR_BASE, + TPM_TIS_ADDR_SIZE, AML_READ_WRITE)); + aml_append(crs, aml_irq_no_flags(TPM_TIS_IRQ)); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); + } + + aml_append(sb_scope, scope); + } + } + aml_append(ssdt, sb_scope); + } + + /* copy AML table into ACPI tables blob and patch header there */ + g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len); + build_header(linker, table_data, + (void *)(table_data->data + table_data->len - ssdt->buf->len), + "SSDT", ssdt->buf->len, 1); + free_aml_allocator(); +} + +static void +build_hpet(GArray *table_data, GArray *linker) +{ + Acpi20Hpet *hpet; + + hpet = acpi_data_push(table_data, sizeof(*hpet)); + /* Note timer_block_id value must be kept in sync with value advertised by + * emulated hpet + */ + hpet->timer_block_id = cpu_to_le32(0x8086a201); + hpet->addr.address = cpu_to_le64(HPET_BASE); + build_header(linker, table_data, + (void *)hpet, "HPET", sizeof(*hpet), 1); +} + +static void +build_tpm_tcpa(GArray *table_data, GArray *linker, GArray *tcpalog) +{ + Acpi20Tcpa *tcpa = acpi_data_push(table_data, sizeof *tcpa); + uint64_t log_area_start_address = acpi_data_len(tcpalog); + + tcpa->platform_class = cpu_to_le16(TPM_TCPA_ACPI_CLASS_CLIENT); + tcpa->log_area_minimum_length = cpu_to_le32(TPM_LOG_AREA_MINIMUM_SIZE); + tcpa->log_area_start_address = cpu_to_le64(log_area_start_address); + + bios_linker_loader_alloc(linker, ACPI_BUILD_TPMLOG_FILE, 1, + false /* high memory */); + + /* log area start address to be filled by Guest linker */ + bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, + ACPI_BUILD_TPMLOG_FILE, + table_data, &tcpa->log_area_start_address, + sizeof(tcpa->log_area_start_address)); + + build_header(linker, table_data, + (void *)tcpa, "TCPA", sizeof(*tcpa), 2); + + acpi_data_push(tcpalog, TPM_LOG_AREA_MINIMUM_SIZE); +} + +static void +build_tpm2(GArray *table_data, GArray *linker) +{ + Acpi20TPM2 *tpm2_ptr; + + tpm2_ptr = acpi_data_push(table_data, sizeof *tpm2_ptr); + + tpm2_ptr->platform_class = cpu_to_le16(TPM2_ACPI_CLASS_CLIENT); + tpm2_ptr->control_area_address = cpu_to_le64(0); + tpm2_ptr->start_method = cpu_to_le32(TPM2_START_METHOD_MMIO); + + build_header(linker, table_data, + (void *)tpm2_ptr, "TPM2", sizeof(*tpm2_ptr), 4); +} + +typedef enum { + MEM_AFFINITY_NOFLAGS = 0, + MEM_AFFINITY_ENABLED = (1 << 0), + MEM_AFFINITY_HOTPLUGGABLE = (1 << 1), + MEM_AFFINITY_NON_VOLATILE = (1 << 2), +} MemoryAffinityFlags; + +static void +acpi_build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, + uint64_t len, int node, MemoryAffinityFlags flags) +{ + numamem->type = ACPI_SRAT_MEMORY; + numamem->length = sizeof(*numamem); + memset(numamem->proximity, 0, 4); + numamem->proximity[0] = node; + numamem->flags = cpu_to_le32(flags); + numamem->base_addr = cpu_to_le64(base); + numamem->range_length = cpu_to_le64(len); +} + +static void +build_srat(GArray *table_data, GArray *linker, PcGuestInfo *guest_info) +{ + AcpiSystemResourceAffinityTable *srat; + AcpiSratProcessorAffinity *core; + AcpiSratMemoryAffinity *numamem; + + int i; + uint64_t curnode; + int srat_start, numa_start, slots; + uint64_t mem_len, mem_base, next_base; + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + ram_addr_t hotplugabble_address_space_size = + object_property_get_int(OBJECT(pcms), PC_MACHINE_MEMHP_REGION_SIZE, + NULL); + + srat_start = table_data->len; + + srat = acpi_data_push(table_data, sizeof *srat); + srat->reserved1 = cpu_to_le32(1); + core = (void *)(srat + 1); + + for (i = 0; i < guest_info->apic_id_limit; ++i) { + core = acpi_data_push(table_data, sizeof *core); + core->type = ACPI_SRAT_PROCESSOR; + core->length = sizeof(*core); + core->local_apic_id = i; + curnode = guest_info->node_cpu[i]; + core->proximity_lo = curnode; + memset(core->proximity_hi, 0, 3); + core->local_sapic_eid = 0; + core->flags = cpu_to_le32(1); + } + + + /* the memory map is a bit tricky, it contains at least one hole + * from 640k-1M and possibly another one from 3.5G-4G. + */ + next_base = 0; + numa_start = table_data->len; + + numamem = acpi_data_push(table_data, sizeof *numamem); + acpi_build_srat_memory(numamem, 0, 640*1024, 0, MEM_AFFINITY_ENABLED); + next_base = 1024 * 1024; + for (i = 1; i < guest_info->numa_nodes + 1; ++i) { + mem_base = next_base; + mem_len = guest_info->node_mem[i - 1]; + if (i == 1) { + mem_len -= 1024 * 1024; + } + next_base = mem_base + mem_len; + + /* Cut out the ACPI_PCI hole */ + if (mem_base <= guest_info->ram_size_below_4g && + next_base > guest_info->ram_size_below_4g) { + mem_len -= next_base - guest_info->ram_size_below_4g; + if (mem_len > 0) { + numamem = acpi_data_push(table_data, sizeof *numamem); + acpi_build_srat_memory(numamem, mem_base, mem_len, i - 1, + MEM_AFFINITY_ENABLED); + } + mem_base = 1ULL << 32; + mem_len = next_base - guest_info->ram_size_below_4g; + next_base += (1ULL << 32) - guest_info->ram_size_below_4g; + } + numamem = acpi_data_push(table_data, sizeof *numamem); + acpi_build_srat_memory(numamem, mem_base, mem_len, i - 1, + MEM_AFFINITY_ENABLED); + } + slots = (table_data->len - numa_start) / sizeof *numamem; + for (; slots < guest_info->numa_nodes + 2; slots++) { + numamem = acpi_data_push(table_data, sizeof *numamem); + acpi_build_srat_memory(numamem, 0, 0, 0, MEM_AFFINITY_NOFLAGS); + } + + /* + * Entry is required for Windows to enable memory hotplug in OS. + * Memory devices may override proximity set by this entry, + * providing _PXM method if necessary. + */ + if (hotplugabble_address_space_size) { + numamem = acpi_data_push(table_data, sizeof *numamem); + acpi_build_srat_memory(numamem, pcms->hotplug_memory.base, + hotplugabble_address_space_size, 0, + MEM_AFFINITY_HOTPLUGGABLE | + MEM_AFFINITY_ENABLED); + } + + build_header(linker, table_data, + (void *)(table_data->data + srat_start), + "SRAT", + table_data->len - srat_start, 1); +} + +static void +build_mcfg_q35(GArray *table_data, GArray *linker, AcpiMcfgInfo *info) +{ + AcpiTableMcfg *mcfg; + const char *sig; + int len = sizeof(*mcfg) + 1 * sizeof(mcfg->allocation[0]); + + mcfg = acpi_data_push(table_data, len); + mcfg->allocation[0].address = cpu_to_le64(info->mcfg_base); + /* Only a single allocation so no need to play with segments */ + mcfg->allocation[0].pci_segment = cpu_to_le16(0); + mcfg->allocation[0].start_bus_number = 0; + mcfg->allocation[0].end_bus_number = PCIE_MMCFG_BUS(info->mcfg_size - 1); + + /* MCFG is used for ECAM which can be enabled or disabled by guest. + * To avoid table size changes (which create migration issues), + * always create the table even if there are no allocations, + * but set the signature to a reserved value in this case. + * ACPI spec requires OSPMs to ignore such tables. + */ + if (info->mcfg_base == PCIE_BASE_ADDR_UNMAPPED) { + /* Reserved signature: ignored by OSPM */ + sig = "QEMU"; + } else { + sig = "MCFG"; + } + build_header(linker, table_data, (void *)mcfg, sig, len, 1); +} + +static void +build_dmar_q35(GArray *table_data, GArray *linker) +{ + int dmar_start = table_data->len; + + AcpiTableDmar *dmar; + AcpiDmarHardwareUnit *drhd; + + dmar = acpi_data_push(table_data, sizeof(*dmar)); + dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1; + dmar->flags = 0; /* No intr_remap for now */ + + /* DMAR Remapping Hardware Unit Definition structure */ + drhd = acpi_data_push(table_data, sizeof(*drhd)); + drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT); + drhd->length = cpu_to_le16(sizeof(*drhd)); /* No device scope now */ + drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL; + drhd->pci_segment = cpu_to_le16(0); + drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR); + + build_header(linker, table_data, (void *)(table_data->data + dmar_start), + "DMAR", table_data->len - dmar_start, 1); +} + +static void +build_dsdt(GArray *table_data, GArray *linker, AcpiMiscInfo *misc) +{ + AcpiTableHeader *dsdt; + + assert(misc->dsdt_code && misc->dsdt_size); + + dsdt = acpi_data_push(table_data, misc->dsdt_size); + memcpy(dsdt, misc->dsdt_code, misc->dsdt_size); + + memset(dsdt, 0, sizeof *dsdt); + build_header(linker, table_data, dsdt, "DSDT", + misc->dsdt_size, 1); +} + +static GArray * +build_rsdp(GArray *rsdp_table, GArray *linker, unsigned rsdt) +{ + AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp); + + bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, 16, + true /* fseg memory */); + + memcpy(&rsdp->signature, "RSD PTR ", 8); + memcpy(rsdp->oem_id, ACPI_BUILD_APPNAME6, 6); + rsdp->rsdt_physical_address = cpu_to_le32(rsdt); + /* Address to be filled by Guest linker */ + bios_linker_loader_add_pointer(linker, ACPI_BUILD_RSDP_FILE, + ACPI_BUILD_TABLE_FILE, + rsdp_table, &rsdp->rsdt_physical_address, + sizeof rsdp->rsdt_physical_address); + rsdp->checksum = 0; + /* Checksum to be filled by Guest linker */ + bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, + rsdp, rsdp, sizeof *rsdp, &rsdp->checksum); + + return rsdp_table; +} + +typedef +struct AcpiBuildState { + /* Copy of table in RAM (for patching). */ + MemoryRegion *table_mr; + /* Is table patched? */ + uint8_t patched; + PcGuestInfo *guest_info; + void *rsdp; + MemoryRegion *rsdp_mr; + MemoryRegion *linker_mr; +} AcpiBuildState; + +static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg) +{ + Object *pci_host; + QObject *o; + + pci_host = acpi_get_i386_pci_host(); + g_assert(pci_host); + + o = object_property_get_qobject(pci_host, PCIE_HOST_MCFG_BASE, NULL); + if (!o) { + return false; + } + mcfg->mcfg_base = qint_get_int(qobject_to_qint(o)); + qobject_decref(o); + + o = object_property_get_qobject(pci_host, PCIE_HOST_MCFG_SIZE, NULL); + assert(o); + mcfg->mcfg_size = qint_get_int(qobject_to_qint(o)); + qobject_decref(o); + return true; +} + +static bool acpi_has_iommu(void) +{ + bool ambiguous; + Object *intel_iommu; + + intel_iommu = object_resolve_path_type("", TYPE_INTEL_IOMMU_DEVICE, + &ambiguous); + return intel_iommu && !ambiguous; +} + +static +void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) +{ + GArray *table_offsets; + unsigned facs, ssdt, dsdt, rsdt; + AcpiCpuInfo cpu; + AcpiPmInfo pm; + AcpiMiscInfo misc; + AcpiMcfgInfo mcfg; + PcPciInfo pci; + uint8_t *u; + size_t aml_len = 0; + GArray *tables_blob = tables->table_data; + + acpi_get_cpu_info(&cpu); + acpi_get_pm_info(&pm); + acpi_get_dsdt(&misc); + acpi_get_misc_info(&misc); + acpi_get_pci_info(&pci); + + table_offsets = g_array_new(false, true /* clear */, + sizeof(uint32_t)); + ACPI_BUILD_DPRINTF("init ACPI tables\n"); + + bios_linker_loader_alloc(tables->linker, ACPI_BUILD_TABLE_FILE, + 64 /* Ensure FACS is aligned */, + false /* high memory */); + + /* + * FACS is pointed to by FADT. + * We place it first since it's the only table that has alignment + * requirements. + */ + facs = tables_blob->len; + build_facs(tables_blob, tables->linker, guest_info); + + /* DSDT is pointed to by FADT */ + dsdt = tables_blob->len; + build_dsdt(tables_blob, tables->linker, &misc); + + /* Count the size of the DSDT and SSDT, we will need it for legacy + * sizing of ACPI tables. + */ + aml_len += tables_blob->len - dsdt; + + /* ACPI tables pointed to by RSDT */ + acpi_add_table(table_offsets, tables_blob); + build_fadt(tables_blob, tables->linker, &pm, facs, dsdt); + + ssdt = tables_blob->len; + acpi_add_table(table_offsets, tables_blob); + build_ssdt(tables_blob, tables->linker, &cpu, &pm, &misc, &pci, + guest_info); + aml_len += tables_blob->len - ssdt; + + acpi_add_table(table_offsets, tables_blob); + build_madt(tables_blob, tables->linker, &cpu, guest_info); + + if (misc.has_hpet) { + acpi_add_table(table_offsets, tables_blob); + build_hpet(tables_blob, tables->linker); + } + if (misc.tpm_version != TPM_VERSION_UNSPEC) { + acpi_add_table(table_offsets, tables_blob); + build_tpm_tcpa(tables_blob, tables->linker, tables->tcpalog); + + if (misc.tpm_version == TPM_VERSION_2_0) { + acpi_add_table(table_offsets, tables_blob); + build_tpm2(tables_blob, tables->linker); + } + } + if (guest_info->numa_nodes) { + acpi_add_table(table_offsets, tables_blob); + build_srat(tables_blob, tables->linker, guest_info); + } + if (acpi_get_mcfg(&mcfg)) { + acpi_add_table(table_offsets, tables_blob); + build_mcfg_q35(tables_blob, tables->linker, &mcfg); + } + if (acpi_has_iommu()) { + acpi_add_table(table_offsets, tables_blob); + build_dmar_q35(tables_blob, tables->linker); + } + + /* Add tables supplied by user (if any) */ + for (u = acpi_table_first(); u; u = acpi_table_next(u)) { + unsigned len = acpi_table_len(u); + + acpi_add_table(table_offsets, tables_blob); + g_array_append_vals(tables_blob, u, len); + } + + /* RSDT is pointed to by RSDP */ + rsdt = tables_blob->len; + build_rsdt(tables_blob, tables->linker, table_offsets); + + /* RSDP is in FSEG memory, so allocate it separately */ + build_rsdp(tables->rsdp, tables->linker, rsdt); + + /* We'll expose it all to Guest so we want to reduce + * chance of size changes. + * + * We used to align the tables to 4k, but of course this would + * too simple to be enough. 4k turned out to be too small an + * alignment very soon, and in fact it is almost impossible to + * keep the table size stable for all (max_cpus, max_memory_slots) + * combinations. So the table size is always 64k for pc-i440fx-2.1 + * and we give an error if the table grows beyond that limit. + * + * We still have the problem of migrating from "-M pc-i440fx-2.0". For + * that, we exploit the fact that QEMU 2.1 generates _smaller_ tables + * than 2.0 and we can always pad the smaller tables with zeros. We can + * then use the exact size of the 2.0 tables. + * + * All this is for PIIX4, since QEMU 2.0 didn't support Q35 migration. + */ + if (guest_info->legacy_acpi_table_size) { + /* Subtracting aml_len gives the size of fixed tables. Then add the + * size of the PIIX4 DSDT/SSDT in QEMU 2.0. + */ + int legacy_aml_len = + guest_info->legacy_acpi_table_size + + ACPI_BUILD_LEGACY_CPU_AML_SIZE * max_cpus; + int legacy_table_size = + ROUND_UP(tables_blob->len - aml_len + legacy_aml_len, + ACPI_BUILD_ALIGN_SIZE); + if (tables_blob->len > legacy_table_size) { + /* Should happen only with PCI bridges and -M pc-i440fx-2.0. */ + error_report("Warning: migration may not work."); + } + g_array_set_size(tables_blob, legacy_table_size); + } else { + /* Make sure we have a buffer in case we need to resize the tables. */ + if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) { + /* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots. */ + error_report("Warning: ACPI tables are larger than 64k."); + error_report("Warning: migration may not work."); + error_report("Warning: please remove CPUs, NUMA nodes, " + "memory slots or PCI bridges."); + } + acpi_align_size(tables_blob, ACPI_BUILD_TABLE_SIZE); + } + + acpi_align_size(tables->linker, ACPI_BUILD_ALIGN_SIZE); + + /* Cleanup memory that's no longer used. */ + g_array_free(table_offsets, true); +} + +static void acpi_ram_update(MemoryRegion *mr, GArray *data) +{ + uint32_t size = acpi_data_len(data); + + /* Make sure RAM size is correct - in case it got changed e.g. by migration */ + memory_region_ram_resize(mr, size, &error_abort); + + memcpy(memory_region_get_ram_ptr(mr), data->data, size); + memory_region_set_dirty(mr, 0, size); +} + +static void acpi_build_update(void *build_opaque, uint32_t offset) +{ + AcpiBuildState *build_state = build_opaque; + AcpiBuildTables tables; + + /* No state to update or already patched? Nothing to do. */ + if (!build_state || build_state->patched) { + return; + } + build_state->patched = 1; + + acpi_build_tables_init(&tables); + + acpi_build(build_state->guest_info, &tables); + + acpi_ram_update(build_state->table_mr, tables.table_data); + + if (build_state->rsdp) { + memcpy(build_state->rsdp, tables.rsdp->data, acpi_data_len(tables.rsdp)); + } else { + acpi_ram_update(build_state->rsdp_mr, tables.rsdp); + } + + acpi_ram_update(build_state->linker_mr, tables.linker); + acpi_build_tables_cleanup(&tables, true); +} + +static void acpi_build_reset(void *build_opaque) +{ + AcpiBuildState *build_state = build_opaque; + build_state->patched = 0; +} + +static MemoryRegion *acpi_add_rom_blob(AcpiBuildState *build_state, + GArray *blob, const char *name, + uint64_t max_size) +{ + return rom_add_blob(name, blob->data, acpi_data_len(blob), max_size, -1, + name, acpi_build_update, build_state); +} + +static const VMStateDescription vmstate_acpi_build = { + .name = "acpi_build", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT8(patched, AcpiBuildState), + VMSTATE_END_OF_LIST() + }, +}; + +void acpi_setup(PcGuestInfo *guest_info) +{ + AcpiBuildTables tables; + AcpiBuildState *build_state; + + if (!guest_info->fw_cfg) { + ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n"); + return; + } + + if (!guest_info->has_acpi_build) { + ACPI_BUILD_DPRINTF("ACPI build disabled. Bailing out.\n"); + return; + } + + if (!acpi_enabled) { + ACPI_BUILD_DPRINTF("ACPI disabled. Bailing out.\n"); + return; + } + + build_state = g_malloc0(sizeof *build_state); + + build_state->guest_info = guest_info; + + acpi_set_pci_info(); + + acpi_build_tables_init(&tables); + acpi_build(build_state->guest_info, &tables); + + /* Now expose it all to Guest */ + build_state->table_mr = acpi_add_rom_blob(build_state, tables.table_data, + ACPI_BUILD_TABLE_FILE, + ACPI_BUILD_TABLE_MAX_SIZE); + assert(build_state->table_mr != NULL); + + build_state->linker_mr = + acpi_add_rom_blob(build_state, tables.linker, "etc/table-loader", 0); + + fw_cfg_add_file(guest_info->fw_cfg, ACPI_BUILD_TPMLOG_FILE, + tables.tcpalog->data, acpi_data_len(tables.tcpalog)); + + if (!guest_info->rsdp_in_ram) { + /* + * Keep for compatibility with old machine types. + * Though RSDP is small, its contents isn't immutable, so + * we'll update it along with the rest of tables on guest access. + */ + uint32_t rsdp_size = acpi_data_len(tables.rsdp); + + build_state->rsdp = g_memdup(tables.rsdp->data, rsdp_size); + fw_cfg_add_file_callback(guest_info->fw_cfg, ACPI_BUILD_RSDP_FILE, + acpi_build_update, build_state, + build_state->rsdp, rsdp_size); + build_state->rsdp_mr = NULL; + } else { + build_state->rsdp = NULL; + build_state->rsdp_mr = acpi_add_rom_blob(build_state, tables.rsdp, + ACPI_BUILD_RSDP_FILE, 0); + } + + qemu_register_reset(acpi_build_reset, build_state); + acpi_build_reset(build_state); + vmstate_register(NULL, 0, &vmstate_acpi_build, build_state); + + /* Cleanup tables but don't free the memory: we track it + * in build_state. + */ + acpi_build_tables_cleanup(&tables, false); +} diff --git a/qemu/hw/i386/acpi-build.h b/qemu/hw/i386/acpi-build.h new file mode 100644 index 000000000..e57b1aafd --- /dev/null +++ b/qemu/hw/i386/acpi-build.h @@ -0,0 +1,9 @@ + +#ifndef HW_I386_ACPI_BUILD_H +#define HW_I386_ACPI_BUILD_H + +#include "qemu/typedefs.h" + +void acpi_setup(PcGuestInfo *); + +#endif diff --git a/qemu/hw/i386/acpi-dsdt-cpu-hotplug.dsl b/qemu/hw/i386/acpi-dsdt-cpu-hotplug.dsl new file mode 100644 index 000000000..1aff74627 --- /dev/null +++ b/qemu/hw/i386/acpi-dsdt-cpu-hotplug.dsl @@ -0,0 +1,90 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/**************************************************************** + * CPU hotplug + ****************************************************************/ + +Scope(\_SB) { + /* Objects filled in by run-time generated SSDT */ + External(NTFY, MethodObj) + External(CPON, PkgObj) + External(PRS, FieldUnitObj) + + /* Methods called by run-time generated SSDT Processor objects */ + Method(CPMA, 1, NotSerialized) { + // _MAT method - create an madt apic buffer + // Arg0 = Processor ID = Local APIC ID + // Local0 = CPON flag for this cpu + Store(DerefOf(Index(CPON, Arg0)), Local0) + // Local1 = Buffer (in madt apic form) to return + Store(Buffer(8) {0x00, 0x08, 0x00, 0x00, 0x00, 0, 0, 0}, Local1) + // Update the processor id, lapic id, and enable/disable status + Store(Arg0, Index(Local1, 2)) + Store(Arg0, Index(Local1, 3)) + Store(Local0, Index(Local1, 4)) + Return (Local1) + } + Method(CPST, 1, NotSerialized) { + // _STA method - return ON status of cpu + // Arg0 = Processor ID = Local APIC ID + // Local0 = CPON flag for this cpu + Store(DerefOf(Index(CPON, Arg0)), Local0) + If (Local0) { + Return (0xF) + } Else { + Return (0x0) + } + } + Method(CPEJ, 2, NotSerialized) { + // _EJ0 method - eject callback + Sleep(200) + } + +#define CPU_STATUS_LEN ACPI_GPE_PROC_LEN + Method(PRSC, 0) { + // Local5 = active cpu bitmap + Store(PRS, Local5) + // Local2 = last read byte from bitmap + Store(Zero, Local2) + // Local0 = Processor ID / APIC ID iterator + Store(Zero, Local0) + While (LLess(Local0, SizeOf(CPON))) { + // Local1 = CPON flag for this cpu + Store(DerefOf(Index(CPON, Local0)), Local1) + If (And(Local0, 0x07)) { + // Shift down previously read bitmap byte + ShiftRight(Local2, 1, Local2) + } Else { + // Read next byte from cpu bitmap + Store(DerefOf(Index(Local5, ShiftRight(Local0, 3))), Local2) + } + // Local3 = active state for this cpu + Store(And(Local2, 1), Local3) + + If (LNotEqual(Local1, Local3)) { + // State change - update CPON with new state + Store(Local3, Index(CPON, Local0)) + // Do CPU notify + If (LEqual(Local3, 1)) { + NTFY(Local0, 1) + } Else { + NTFY(Local0, 3) + } + } + Increment(Local0) + } + } +} diff --git a/qemu/hw/i386/acpi-dsdt-dbug.dsl b/qemu/hw/i386/acpi-dsdt-dbug.dsl new file mode 100644 index 000000000..86230f75a --- /dev/null +++ b/qemu/hw/i386/acpi-dsdt-dbug.dsl @@ -0,0 +1,41 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/**************************************************************** + * Debugging + ****************************************************************/ + +Scope(\) { + /* Debug Output */ + OperationRegion(DBG, SystemIO, 0x0402, 0x01) + Field(DBG, ByteAcc, NoLock, Preserve) { + DBGB, 8, + } + + /* Debug method - use this method to send output to the QEMU + * BIOS debug port. This method handles strings, integers, + * and buffers. For example: DBUG("abc") DBUG(0x123) */ + Method(DBUG, 1) { + ToHexString(Arg0, Local0) + ToBuffer(Local0, Local0) + Subtract(SizeOf(Local0), 1, Local1) + Store(Zero, Local2) + While (LLess(Local2, Local1)) { + Store(DerefOf(Index(Local0, Local2)), DBGB) + Increment(Local2) + } + Store(0x0A, DBGB) + } +} diff --git a/qemu/hw/i386/acpi-dsdt-hpet.dsl b/qemu/hw/i386/acpi-dsdt-hpet.dsl new file mode 100644 index 000000000..44961b87a --- /dev/null +++ b/qemu/hw/i386/acpi-dsdt-hpet.dsl @@ -0,0 +1,48 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/**************************************************************** + * HPET + ****************************************************************/ + +Scope(\_SB) { + Device(HPET) { + Name(_HID, EISAID("PNP0103")) + Name(_UID, 0) + OperationRegion(HPTM, SystemMemory, 0xFED00000, 0x400) + Field(HPTM, DWordAcc, Lock, Preserve) { + VEND, 32, + PRD, 32, + } + Method(_STA, 0, NotSerialized) { + Store(VEND, Local0) + Store(PRD, Local1) + ShiftRight(Local0, 16, Local0) + If (LOr(LEqual(Local0, 0), LEqual(Local0, 0xffff))) { + Return (0x0) + } + If (LOr(LEqual(Local1, 0), LGreater(Local1, 100000000))) { + Return (0x0) + } + Return (0x0F) + } + Name(_CRS, ResourceTemplate() { + Memory32Fixed(ReadOnly, + 0xFED00000, // Address Base + 0x00000400, // Address Length + ) + }) + } +} diff --git a/qemu/hw/i386/acpi-dsdt-isa.dsl b/qemu/hw/i386/acpi-dsdt-isa.dsl new file mode 100644 index 000000000..89caa1649 --- /dev/null +++ b/qemu/hw/i386/acpi-dsdt-isa.dsl @@ -0,0 +1,117 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* Common legacy ISA style devices. */ +Scope(\_SB.PCI0.ISA) { + + Device(RTC) { + Name(_HID, EisaId("PNP0B00")) + Name(_CRS, ResourceTemplate() { + IO(Decode16, 0x0070, 0x0070, 0x10, 0x02) + IRQNoFlags() { 8 } + IO(Decode16, 0x0072, 0x0072, 0x02, 0x06) + }) + } + + Device(KBD) { + Name(_HID, EisaId("PNP0303")) + Method(_STA, 0, NotSerialized) { + Return (0x0f) + } + Name(_CRS, ResourceTemplate() { + IO(Decode16, 0x0060, 0x0060, 0x01, 0x01) + IO(Decode16, 0x0064, 0x0064, 0x01, 0x01) + IRQNoFlags() { 1 } + }) + } + + Device(MOU) { + Name(_HID, EisaId("PNP0F13")) + Method(_STA, 0, NotSerialized) { + Return (0x0f) + } + Name(_CRS, ResourceTemplate() { + IRQNoFlags() { 12 } + }) + } + + Device(FDC0) { + Name(_HID, EisaId("PNP0700")) + Method(_STA, 0, NotSerialized) { + Store(FDEN, Local0) + If (LEqual(Local0, 0)) { + Return (0x00) + } Else { + Return (0x0F) + } + } + Name(_CRS, ResourceTemplate() { + IO(Decode16, 0x03F2, 0x03F2, 0x00, 0x04) + IO(Decode16, 0x03F7, 0x03F7, 0x00, 0x01) + IRQNoFlags() { 6 } + DMA(Compatibility, NotBusMaster, Transfer8) { 2 } + }) + } + + Device(LPT) { + Name(_HID, EisaId("PNP0400")) + Method(_STA, 0, NotSerialized) { + Store(LPEN, Local0) + If (LEqual(Local0, 0)) { + Return (0x00) + } Else { + Return (0x0F) + } + } + Name(_CRS, ResourceTemplate() { + IO(Decode16, 0x0378, 0x0378, 0x08, 0x08) + IRQNoFlags() { 7 } + }) + } + + Device(COM1) { + Name(_HID, EisaId("PNP0501")) + Name(_UID, 0x01) + Method(_STA, 0, NotSerialized) { + Store(CAEN, Local0) + If (LEqual(Local0, 0)) { + Return (0x00) + } Else { + Return (0x0F) + } + } + Name(_CRS, ResourceTemplate() { + IO(Decode16, 0x03F8, 0x03F8, 0x00, 0x08) + IRQNoFlags() { 4 } + }) + } + + Device(COM2) { + Name(_HID, EisaId("PNP0501")) + Name(_UID, 0x02) + Method(_STA, 0, NotSerialized) { + Store(CBEN, Local0) + If (LEqual(Local0, 0)) { + Return (0x00) + } Else { + Return (0x0F) + } + } + Name(_CRS, ResourceTemplate() { + IO(Decode16, 0x02F8, 0x02F8, 0x00, 0x08) + IRQNoFlags() { 3 } + }) + } +} diff --git a/qemu/hw/i386/acpi-dsdt-mem-hotplug.dsl b/qemu/hw/i386/acpi-dsdt-mem-hotplug.dsl new file mode 100644 index 000000000..c2bb6a160 --- /dev/null +++ b/qemu/hw/i386/acpi-dsdt-mem-hotplug.dsl @@ -0,0 +1,171 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + + External(MEMORY_SLOT_NOTIFY_METHOD, MethodObj) + + Scope(\_SB.PCI0) { + Device(MEMORY_HOTPLUG_DEVICE) { + Name(_HID, "PNP0A06") + Name(_UID, "Memory hotplug resources") + External(MEMORY_SLOTS_NUMBER, IntObj) + + /* Memory hotplug IO registers */ + External(MEMORY_SLOT_ADDR_LOW, FieldUnitObj) // read only + External(MEMORY_SLOT_ADDR_HIGH, FieldUnitObj) // read only + External(MEMORY_SLOT_SIZE_LOW, FieldUnitObj) // read only + External(MEMORY_SLOT_SIZE_HIGH, FieldUnitObj) // read only + External(MEMORY_SLOT_PROXIMITY, FieldUnitObj) // read only + External(MEMORY_SLOT_ENABLED, FieldUnitObj) // 1 if enabled, read only + External(MEMORY_SLOT_INSERT_EVENT, FieldUnitObj) // (read) 1 if has a insert event. (write) 1 to clear event + External(MEMORY_SLOT_REMOVE_EVENT, FieldUnitObj) // (read) 1 if has a remove event. (write) 1 to clear event + External(MEMORY_SLOT_EJECT, FieldUnitObj) // initiates device eject, write only + External(MEMORY_SLOT_SLECTOR, FieldUnitObj) // DIMM selector, write only + External(MEMORY_SLOT_OST_EVENT, FieldUnitObj) // _OST event code, write only + External(MEMORY_SLOT_OST_STATUS, FieldUnitObj) // _OST status code, write only + + Method(_STA, 0) { + If (LEqual(MEMORY_SLOTS_NUMBER, Zero)) { + Return(0x0) + } + /* present, functioning, decoding, not shown in UI */ + Return(0xB) + } + + Mutex (MEMORY_SLOT_LOCK, 0) + + Method(MEMORY_SLOT_SCAN_METHOD, 0) { + If (LEqual(MEMORY_SLOTS_NUMBER, Zero)) { + Return(Zero) + } + + Store(Zero, Local0) // Mem devs iterrator + Acquire(MEMORY_SLOT_LOCK, 0xFFFF) + while (LLess(Local0, MEMORY_SLOTS_NUMBER)) { + Store(Local0, MEMORY_SLOT_SLECTOR) // select Local0 DIMM + If (LEqual(MEMORY_SLOT_INSERT_EVENT, One)) { // Memory device needs check + MEMORY_SLOT_NOTIFY_METHOD(Local0, 1) + Store(1, MEMORY_SLOT_INSERT_EVENT) + } Elseif (LEqual(MEMORY_SLOT_REMOVE_EVENT, One)) { // Ejection request + MEMORY_SLOT_NOTIFY_METHOD(Local0, 3) + Store(1, MEMORY_SLOT_REMOVE_EVENT) + } + Add(Local0, One, Local0) // goto next DIMM + } + Release(MEMORY_SLOT_LOCK) + Return(One) + } + + Method(MEMORY_SLOT_STATUS_METHOD, 1) { + Store(Zero, Local0) + + Acquire(MEMORY_SLOT_LOCK, 0xFFFF) + Store(ToInteger(Arg0), MEMORY_SLOT_SLECTOR) // select DIMM + + If (LEqual(MEMORY_SLOT_ENABLED, One)) { + Store(0xF, Local0) + } + + Release(MEMORY_SLOT_LOCK) + Return(Local0) + } + + Method(MEMORY_SLOT_CRS_METHOD, 1, Serialized) { + Acquire(MEMORY_SLOT_LOCK, 0xFFFF) + Store(ToInteger(Arg0), MEMORY_SLOT_SLECTOR) // select DIMM + + Name(MR64, ResourceTemplate() { + QWordMemory(ResourceProducer, PosDecode, MinFixed, MaxFixed, + Cacheable, ReadWrite, + 0x0000000000000000, // Address Space Granularity + 0x0000000000000000, // Address Range Minimum + 0xFFFFFFFFFFFFFFFE, // Address Range Maximum + 0x0000000000000000, // Address Translation Offset + 0xFFFFFFFFFFFFFFFF, // Address Length + ,, MW64, AddressRangeMemory, TypeStatic) + }) + + CreateDWordField(MR64, 14, MINL) + CreateDWordField(MR64, 18, MINH) + CreateDWordField(MR64, 38, LENL) + CreateDWordField(MR64, 42, LENH) + CreateDWordField(MR64, 22, MAXL) + CreateDWordField(MR64, 26, MAXH) + + Store(MEMORY_SLOT_ADDR_HIGH, MINH) + Store(MEMORY_SLOT_ADDR_LOW, MINL) + Store(MEMORY_SLOT_SIZE_HIGH, LENH) + Store(MEMORY_SLOT_SIZE_LOW, LENL) + + // 64-bit math: MAX = MIN + LEN - 1 + Add(MINL, LENL, MAXL) + Add(MINH, LENH, MAXH) + If (LLess(MAXL, MINL)) { + Add(MAXH, One, MAXH) + } + If (LLess(MAXL, One)) { + Subtract(MAXH, One, MAXH) + } + Subtract(MAXL, One, MAXL) + + If (LEqual(MAXH, Zero)){ + Name(MR32, ResourceTemplate() { + DWordMemory(ResourceProducer, PosDecode, MinFixed, MaxFixed, + Cacheable, ReadWrite, + 0x00000000, // Address Space Granularity + 0x00000000, // Address Range Minimum + 0xFFFFFFFE, // Address Range Maximum + 0x00000000, // Address Translation Offset + 0xFFFFFFFF, // Address Length + ,, MW32, AddressRangeMemory, TypeStatic) + }) + CreateDWordField(MR32, MW32._MIN, MIN) + CreateDWordField(MR32, MW32._MAX, MAX) + CreateDWordField(MR32, MW32._LEN, LEN) + Store(MINL, MIN) + Store(MAXL, MAX) + Store(LENL, LEN) + + Release(MEMORY_SLOT_LOCK) + Return(MR32) + } + + Release(MEMORY_SLOT_LOCK) + Return(MR64) + } + + Method(MEMORY_SLOT_PROXIMITY_METHOD, 1) { + Acquire(MEMORY_SLOT_LOCK, 0xFFFF) + Store(ToInteger(Arg0), MEMORY_SLOT_SLECTOR) // select DIMM + Store(MEMORY_SLOT_PROXIMITY, Local0) + Release(MEMORY_SLOT_LOCK) + Return(Local0) + } + + Method(MEMORY_SLOT_OST_METHOD, 4) { + Acquire(MEMORY_SLOT_LOCK, 0xFFFF) + Store(ToInteger(Arg0), MEMORY_SLOT_SLECTOR) // select DIMM + Store(Arg1, MEMORY_SLOT_OST_EVENT) + Store(Arg2, MEMORY_SLOT_OST_STATUS) + Release(MEMORY_SLOT_LOCK) + } + + Method(MEMORY_SLOT_EJECT_METHOD, 2) { + Acquire(MEMORY_SLOT_LOCK, 0xFFFF) + Store(ToInteger(Arg0), MEMORY_SLOT_SLECTOR) // select DIMM + Store(1, MEMORY_SLOT_EJECT) + Release(MEMORY_SLOT_LOCK) + } + } // Device() + } // Scope() diff --git a/qemu/hw/i386/acpi-dsdt.dsl b/qemu/hw/i386/acpi-dsdt.dsl new file mode 100644 index 000000000..a2d84ecf8 --- /dev/null +++ b/qemu/hw/i386/acpi-dsdt.dsl @@ -0,0 +1,304 @@ +/* + * Bochs/QEMU ACPI DSDT ASL definition + * + * Copyright (c) 2006 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2 as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +ACPI_EXTRACT_ALL_CODE AcpiDsdtAmlCode + +DefinitionBlock ( + "acpi-dsdt.aml", // Output Filename + "DSDT", // Signature + 0x01, // DSDT Compliance Revision + "BXPC", // OEMID + "BXDSDT", // TABLE ID + 0x1 // OEM Revision + ) +{ + +#include "acpi-dsdt-dbug.dsl" + + Scope(\_SB) { + Device(PCI0) { + Name(_HID, EisaId("PNP0A03")) + Name(_ADR, 0x00) + Name(_UID, 1) +//#define PX13 S0B_ +// External(PX13, DeviceObj) + } + } + +#include "acpi-dsdt-hpet.dsl" + + +/**************************************************************** + * PIIX4 PM + ****************************************************************/ + + Scope(\_SB.PCI0) { + Device(PX13) { + Name(_ADR, 0x00010003) + OperationRegion(P13C, PCI_Config, 0x00, 0xff) + } + } + + +/**************************************************************** + * PIIX3 ISA bridge + ****************************************************************/ + + Scope(\_SB.PCI0) { + + External(ISA, DeviceObj) + + Device(ISA) { + Name(_ADR, 0x00010000) + + /* PIIX PCI to ISA irq remapping */ + OperationRegion(P40C, PCI_Config, 0x60, 0x04) + + /* enable bits */ + Field(\_SB.PCI0.PX13.P13C, AnyAcc, NoLock, Preserve) { + Offset(0x5f), + , 7, + LPEN, 1, // LPT + Offset(0x67), + , 3, + CAEN, 1, // COM1 + , 3, + CBEN, 1, // COM2 + } + Name(FDEN, 1) + } + } + +#include "acpi-dsdt-isa.dsl" + + +/**************************************************************** + * PCI hotplug + ****************************************************************/ + + Scope(\_SB.PCI0) { + OperationRegion(PCST, SystemIO, 0xae00, 0x08) + Field(PCST, DWordAcc, NoLock, WriteAsZeros) { + PCIU, 32, + PCID, 32, + } + + OperationRegion(SEJ, SystemIO, 0xae08, 0x04) + Field(SEJ, DWordAcc, NoLock, WriteAsZeros) { + B0EJ, 32, + } + + OperationRegion(BNMR, SystemIO, 0xae10, 0x04) + Field(BNMR, DWordAcc, NoLock, WriteAsZeros) { + BNUM, 32, + } + + /* Lock to protect access to fields above. */ + Mutex(BLCK, 0) + + /* Methods called by bulk generated PCI devices below */ + + /* Methods called by hotplug devices */ + Method(PCEJ, 2, NotSerialized) { + // _EJ0 method - eject callback + Acquire(BLCK, 0xFFFF) + Store(Arg0, BNUM) + Store(ShiftLeft(1, Arg1), B0EJ) + Release(BLCK) + Return (0x0) + } + + /* Hotplug notification method supplied by SSDT */ + External(\_SB.PCI0.PCNT, MethodObj) + } + + +/**************************************************************** + * PCI IRQs + ****************************************************************/ + + Scope(\_SB) { + Scope(PCI0) { + Method (_PRT, 0) { + Store(Package(128) {}, Local0) + Store(Zero, Local1) + While(LLess(Local1, 128)) { + // slot = pin >> 2 + Store(ShiftRight(Local1, 2), Local2) + + // lnk = (slot + pin) & 3 + Store(And(Add(Local1, Local2), 3), Local3) + If (LEqual(Local3, 0)) { + Store(Package(4) { Zero, Zero, LNKD, Zero }, Local4) + } + If (LEqual(Local3, 1)) { + // device 1 is the power-management device, needs SCI + If (LEqual(Local1, 4)) { + Store(Package(4) { Zero, Zero, LNKS, Zero }, Local4) + } Else { + Store(Package(4) { Zero, Zero, LNKA, Zero }, Local4) + } + } + If (LEqual(Local3, 2)) { + Store(Package(4) { Zero, Zero, LNKB, Zero }, Local4) + } + If (LEqual(Local3, 3)) { + Store(Package(4) { Zero, Zero, LNKC, Zero }, Local4) + } + + // Complete the interrupt routing entry: + // Package(4) { 0x[slot]FFFF, [pin], [link], 0) } + + Store(Or(ShiftLeft(Local2, 16), 0xFFFF), Index(Local4, 0)) + Store(And(Local1, 3), Index(Local4, 1)) + Store(Local4, Index(Local0, Local1)) + + Increment(Local1) + } + + Return(Local0) + } + } + + Field(PCI0.ISA.P40C, ByteAcc, NoLock, Preserve) { + PRQ0, 8, + PRQ1, 8, + PRQ2, 8, + PRQ3, 8 + } + + Method(IQST, 1, NotSerialized) { + // _STA method - get status + If (And(0x80, Arg0)) { + Return (0x09) + } + Return (0x0B) + } + Method(IQCR, 1, Serialized) { + // _CRS method - get current settings + Name(PRR0, ResourceTemplate() { + Interrupt(, Level, ActiveHigh, Shared) { 0 } + }) + CreateDWordField(PRR0, 0x05, PRRI) + If (LLess(Arg0, 0x80)) { + Store(Arg0, PRRI) + } + Return (PRR0) + } + +#define define_link(link, uid, reg) \ + Device(link) { \ + Name(_HID, EISAID("PNP0C0F")) \ + Name(_UID, uid) \ + Name(_PRS, ResourceTemplate() { \ + Interrupt(, Level, ActiveHigh, Shared) { \ + 5, 10, 11 \ + } \ + }) \ + Method(_STA, 0, NotSerialized) { \ + Return (IQST(reg)) \ + } \ + Method(_DIS, 0, NotSerialized) { \ + Or(reg, 0x80, reg) \ + } \ + Method(_CRS, 0, NotSerialized) { \ + Return (IQCR(reg)) \ + } \ + Method(_SRS, 1, NotSerialized) { \ + CreateDWordField(Arg0, 0x05, PRRI) \ + Store(PRRI, reg) \ + } \ + } + + define_link(LNKA, 0, PRQ0) + define_link(LNKB, 1, PRQ1) + define_link(LNKC, 2, PRQ2) + define_link(LNKD, 3, PRQ3) + + Device(LNKS) { + Name(_HID, EISAID("PNP0C0F")) + Name(_UID, 4) + Name(_PRS, ResourceTemplate() { + Interrupt(, Level, ActiveHigh, Shared) { 9 } + }) + + // The SCI cannot be disabled and is always attached to GSI 9, + // so these are no-ops. We only need this link to override the + // polarity to active high and match the content of the MADT. + Method(_STA, 0, NotSerialized) { Return (0x0b) } + Method(_DIS, 0, NotSerialized) { } + Method(_CRS, 0, NotSerialized) { Return (_PRS) } + Method(_SRS, 1, NotSerialized) { } + } + } + +#include "hw/acpi/pc-hotplug.h" +#define CPU_STATUS_BASE PIIX4_CPU_HOTPLUG_IO_BASE +#include "acpi-dsdt-cpu-hotplug.dsl" +#include "acpi-dsdt-mem-hotplug.dsl" + + +/**************************************************************** + * General purpose events + ****************************************************************/ + Scope(\_GPE) { + Name(_HID, "ACPI0006") + + Method(_L00) { + } + Method(_E01) { + // PCI hotplug event + Acquire(\_SB.PCI0.BLCK, 0xFFFF) + \_SB.PCI0.PCNT() + Release(\_SB.PCI0.BLCK) + } + Method(_E02) { + // CPU hotplug event + \_SB.PRSC() + } + Method(_E03) { + // Memory hotplug event + \_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD() + } + Method(_L04) { + } + Method(_L05) { + } + Method(_L06) { + } + Method(_L07) { + } + Method(_L08) { + } + Method(_L09) { + } + Method(_L0A) { + } + Method(_L0B) { + } + Method(_L0C) { + } + Method(_L0D) { + } + Method(_L0E) { + } + Method(_L0F) { + } + } +} diff --git a/qemu/hw/i386/acpi-dsdt.hex.generated b/qemu/hw/i386/acpi-dsdt.hex.generated new file mode 100644 index 000000000..ecaa4a548 --- /dev/null +++ b/qemu/hw/i386/acpi-dsdt.hex.generated @@ -0,0 +1,2972 @@ +static unsigned char AcpiDsdtAmlCode[] = { +0x44, +0x53, +0x44, +0x54, +0x9a, +0xb, +0x0, +0x0, +0x1, +0xf8, +0x42, +0x58, +0x50, +0x43, +0x0, +0x0, +0x42, +0x58, +0x44, +0x53, +0x44, +0x54, +0x0, +0x0, +0x1, +0x0, +0x0, +0x0, +0x49, +0x4e, +0x54, +0x4c, +0x7, +0x11, +0x14, +0x20, +0x10, +0x49, +0x4, +0x5c, +0x0, +0x5b, +0x80, +0x44, +0x42, +0x47, +0x5f, +0x1, +0xb, +0x2, +0x4, +0x1, +0x5b, +0x81, +0xb, +0x44, +0x42, +0x47, +0x5f, +0x1, +0x44, +0x42, +0x47, +0x42, +0x8, +0x14, +0x2c, +0x44, +0x42, +0x55, +0x47, +0x1, +0x98, +0x68, +0x60, +0x96, +0x60, +0x60, +0x74, +0x87, +0x60, +0x1, +0x61, +0x70, +0x0, +0x62, +0xa2, +0x10, +0x95, +0x62, +0x61, +0x70, +0x83, +0x88, +0x60, +0x62, +0x0, +0x44, +0x42, +0x47, +0x42, +0x75, +0x62, +0x70, +0xa, +0xa, +0x44, +0x42, +0x47, +0x42, +0x10, +0x22, +0x5f, +0x53, +0x42, +0x5f, +0x5b, +0x82, +0x1b, +0x50, +0x43, +0x49, +0x30, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xa, +0x3, +0x8, +0x5f, +0x41, +0x44, +0x52, +0x0, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x1, +0x10, +0x4d, +0x8, +0x5f, +0x53, +0x42, +0x5f, +0x5b, +0x82, +0x45, +0x8, +0x48, +0x50, +0x45, +0x54, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0x1, +0x3, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x0, +0x5b, +0x80, +0x48, +0x50, +0x54, +0x4d, +0x0, +0xc, +0x0, +0x0, +0xd0, +0xfe, +0xb, +0x0, +0x4, +0x5b, +0x81, +0x10, +0x48, +0x50, +0x54, +0x4d, +0x13, +0x56, +0x45, +0x4e, +0x44, +0x20, +0x50, +0x52, +0x44, +0x5f, +0x20, +0x14, +0x36, +0x5f, +0x53, +0x54, +0x41, +0x0, +0x70, +0x56, +0x45, +0x4e, +0x44, +0x60, +0x70, +0x50, +0x52, +0x44, +0x5f, +0x61, +0x7a, +0x60, +0xa, +0x10, +0x60, +0xa0, +0xc, +0x91, +0x93, +0x60, +0x0, +0x93, +0x60, +0xb, +0xff, +0xff, +0xa4, +0x0, +0xa0, +0xe, +0x91, +0x93, +0x61, +0x0, +0x94, +0x61, +0xc, +0x0, +0xe1, +0xf5, +0x5, +0xa4, +0x0, +0xa4, +0xa, +0xf, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x11, +0xa, +0xe, +0x86, +0x9, +0x0, +0x0, +0x0, +0x0, +0xd0, +0xfe, +0x0, +0x4, +0x0, +0x0, +0x79, +0x0, +0x10, +0x25, +0x2e, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x5b, +0x82, +0x19, +0x50, +0x58, +0x31, +0x33, +0x8, +0x5f, +0x41, +0x44, +0x52, +0xc, +0x3, +0x0, +0x1, +0x0, +0x5b, +0x80, +0x50, +0x31, +0x33, +0x43, +0x2, +0x0, +0xa, +0xff, +0x10, +0x46, +0x5, +0x2e, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x5b, +0x82, +0x49, +0x4, +0x49, +0x53, +0x41, +0x5f, +0x8, +0x5f, +0x41, +0x44, +0x52, +0xc, +0x0, +0x0, +0x1, +0x0, +0x5b, +0x80, +0x50, +0x34, +0x30, +0x43, +0x2, +0xa, +0x60, +0xa, +0x4, +0x5b, +0x81, +0x26, +0x5e, +0x2e, +0x50, +0x58, +0x31, +0x33, +0x50, +0x31, +0x33, +0x43, +0x0, +0x0, +0x48, +0x2f, +0x0, +0x7, +0x4c, +0x50, +0x45, +0x4e, +0x1, +0x0, +0x38, +0x0, +0x3, +0x43, +0x41, +0x45, +0x4e, +0x1, +0x0, +0x3, +0x43, +0x42, +0x45, +0x4e, +0x1, +0x8, +0x46, +0x44, +0x45, +0x4e, +0x1, +0x10, +0x4c, +0x1b, +0x2f, +0x3, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x49, +0x53, +0x41, +0x5f, +0x5b, +0x82, +0x2d, +0x52, +0x54, +0x43, +0x5f, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xb, +0x0, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x18, +0xa, +0x15, +0x47, +0x1, +0x70, +0x0, +0x70, +0x0, +0x10, +0x2, +0x22, +0x0, +0x1, +0x47, +0x1, +0x72, +0x0, +0x72, +0x0, +0x2, +0x6, +0x79, +0x0, +0x5b, +0x82, +0x37, +0x4b, +0x42, +0x44, +0x5f, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0x3, +0x3, +0x14, +0x9, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0xa, +0xf, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x18, +0xa, +0x15, +0x47, +0x1, +0x60, +0x0, +0x60, +0x0, +0x1, +0x1, +0x47, +0x1, +0x64, +0x0, +0x64, +0x0, +0x1, +0x1, +0x22, +0x2, +0x0, +0x79, +0x0, +0x5b, +0x82, +0x27, +0x4d, +0x4f, +0x55, +0x5f, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xf, +0x13, +0x14, +0x9, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0xa, +0xf, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x8, +0xa, +0x5, +0x22, +0x0, +0x10, +0x79, +0x0, +0x5b, +0x82, +0x4a, +0x4, +0x46, +0x44, +0x43, +0x30, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0x7, +0x0, +0x14, +0x18, +0x5f, +0x53, +0x54, +0x41, +0x0, +0x70, +0x46, +0x44, +0x45, +0x4e, +0x60, +0xa0, +0x6, +0x93, +0x60, +0x0, +0xa4, +0x0, +0xa1, +0x4, +0xa4, +0xa, +0xf, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x1b, +0xa, +0x18, +0x47, +0x1, +0xf2, +0x3, +0xf2, +0x3, +0x0, +0x4, +0x47, +0x1, +0xf7, +0x3, +0xf7, +0x3, +0x0, +0x1, +0x22, +0x40, +0x0, +0x2a, +0x4, +0x0, +0x79, +0x0, +0x5b, +0x82, +0x3e, +0x4c, +0x50, +0x54, +0x5f, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0x4, +0x0, +0x14, +0x18, +0x5f, +0x53, +0x54, +0x41, +0x0, +0x70, +0x4c, +0x50, +0x45, +0x4e, +0x60, +0xa0, +0x6, +0x93, +0x60, +0x0, +0xa4, +0x0, +0xa1, +0x4, +0xa4, +0xa, +0xf, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x10, +0xa, +0xd, +0x47, +0x1, +0x78, +0x3, +0x78, +0x3, +0x8, +0x8, +0x22, +0x80, +0x0, +0x79, +0x0, +0x5b, +0x82, +0x45, +0x4, +0x43, +0x4f, +0x4d, +0x31, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0x5, +0x1, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x1, +0x14, +0x18, +0x5f, +0x53, +0x54, +0x41, +0x0, +0x70, +0x43, +0x41, +0x45, +0x4e, +0x60, +0xa0, +0x6, +0x93, +0x60, +0x0, +0xa4, +0x0, +0xa1, +0x4, +0xa4, +0xa, +0xf, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x10, +0xa, +0xd, +0x47, +0x1, +0xf8, +0x3, +0xf8, +0x3, +0x0, +0x8, +0x22, +0x10, +0x0, +0x79, +0x0, +0x5b, +0x82, +0x46, +0x4, +0x43, +0x4f, +0x4d, +0x32, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0x5, +0x1, +0x8, +0x5f, +0x55, +0x49, +0x44, +0xa, +0x2, +0x14, +0x18, +0x5f, +0x53, +0x54, +0x41, +0x0, +0x70, +0x43, +0x42, +0x45, +0x4e, +0x60, +0xa0, +0x6, +0x93, +0x60, +0x0, +0xa4, +0x0, +0xa1, +0x4, +0xa4, +0xa, +0xf, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x10, +0xa, +0xd, +0x47, +0x1, +0xf8, +0x2, +0xf8, +0x2, +0x0, +0x8, +0x22, +0x8, +0x0, +0x79, +0x0, +0x10, +0x48, +0x8, +0x2e, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x5b, +0x80, +0x50, +0x43, +0x53, +0x54, +0x1, +0xb, +0x0, +0xae, +0xa, +0x8, +0x5b, +0x81, +0x10, +0x50, +0x43, +0x53, +0x54, +0x43, +0x50, +0x43, +0x49, +0x55, +0x20, +0x50, +0x43, +0x49, +0x44, +0x20, +0x5b, +0x80, +0x53, +0x45, +0x4a, +0x5f, +0x1, +0xb, +0x8, +0xae, +0xa, +0x4, +0x5b, +0x81, +0xb, +0x53, +0x45, +0x4a, +0x5f, +0x43, +0x42, +0x30, +0x45, +0x4a, +0x20, +0x5b, +0x80, +0x42, +0x4e, +0x4d, +0x52, +0x1, +0xb, +0x10, +0xae, +0xa, +0x4, +0x5b, +0x81, +0xb, +0x42, +0x4e, +0x4d, +0x52, +0x43, +0x42, +0x4e, +0x55, +0x4d, +0x20, +0x5b, +0x1, +0x42, +0x4c, +0x43, +0x4b, +0x0, +0x14, +0x25, +0x50, +0x43, +0x45, +0x4a, +0x2, +0x5b, +0x23, +0x42, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x70, +0x68, +0x42, +0x4e, +0x55, +0x4d, +0x70, +0x79, +0x1, +0x69, +0x0, +0x42, +0x30, +0x45, +0x4a, +0x5b, +0x27, +0x42, +0x4c, +0x43, +0x4b, +0xa4, +0x0, +0x10, +0x4e, +0x36, +0x5f, +0x53, +0x42, +0x5f, +0x10, +0x4b, +0xa, +0x50, +0x43, +0x49, +0x30, +0x14, +0x44, +0xa, +0x5f, +0x50, +0x52, +0x54, +0x0, +0x70, +0x12, +0x2, +0x80, +0x60, +0x70, +0x0, +0x61, +0xa2, +0x42, +0x9, +0x95, +0x61, +0xa, +0x80, +0x70, +0x7a, +0x61, +0xa, +0x2, +0x0, +0x62, +0x70, +0x7b, +0x72, +0x61, +0x62, +0x0, +0xa, +0x3, +0x0, +0x63, +0xa0, +0x10, +0x93, +0x63, +0x0, +0x70, +0x12, +0x9, +0x4, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x44, +0x0, +0x64, +0xa0, +0x24, +0x93, +0x63, +0x1, +0xa0, +0x11, +0x93, +0x61, +0xa, +0x4, +0x70, +0x12, +0x9, +0x4, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x53, +0x0, +0x64, +0xa1, +0xd, +0x70, +0x12, +0x9, +0x4, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x41, +0x0, +0x64, +0xa0, +0x11, +0x93, +0x63, +0xa, +0x2, +0x70, +0x12, +0x9, +0x4, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x42, +0x0, +0x64, +0xa0, +0x11, +0x93, +0x63, +0xa, +0x3, +0x70, +0x12, +0x9, +0x4, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x43, +0x0, +0x64, +0x70, +0x7d, +0x79, +0x62, +0xa, +0x10, +0x0, +0xb, +0xff, +0xff, +0x0, +0x88, +0x64, +0x0, +0x0, +0x70, +0x7b, +0x61, +0xa, +0x3, +0x0, +0x88, +0x64, +0x1, +0x0, +0x70, +0x64, +0x88, +0x60, +0x61, +0x0, +0x75, +0x61, +0xa4, +0x60, +0x5b, +0x81, +0x24, +0x2f, +0x3, +0x50, +0x43, +0x49, +0x30, +0x49, +0x53, +0x41, +0x5f, +0x50, +0x34, +0x30, +0x43, +0x1, +0x50, +0x52, +0x51, +0x30, +0x8, +0x50, +0x52, +0x51, +0x31, +0x8, +0x50, +0x52, +0x51, +0x32, +0x8, +0x50, +0x52, +0x51, +0x33, +0x8, +0x14, +0x13, +0x49, +0x51, +0x53, +0x54, +0x1, +0xa0, +0x9, +0x7b, +0xa, +0x80, +0x68, +0x0, +0xa4, +0xa, +0x9, +0xa4, +0xa, +0xb, +0x14, +0x36, +0x49, +0x51, +0x43, +0x52, +0x9, +0x8, +0x50, +0x52, +0x52, +0x30, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x0, +0x0, +0x0, +0x0, +0x79, +0x0, +0x8a, +0x50, +0x52, +0x52, +0x30, +0xa, +0x5, +0x50, +0x52, +0x52, +0x49, +0xa0, +0xb, +0x95, +0x68, +0xa, +0x80, +0x70, +0x68, +0x50, +0x52, +0x52, +0x49, +0xa4, +0x50, +0x52, +0x52, +0x30, +0x5b, +0x82, +0x4c, +0x7, +0x4c, +0x4e, +0x4b, +0x41, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x0, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0x16, +0xa, +0x13, +0x89, +0xe, +0x0, +0x9, +0x3, +0x5, +0x0, +0x0, +0x0, +0xa, +0x0, +0x0, +0x0, +0xb, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0xf, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0x49, +0x51, +0x53, +0x54, +0x50, +0x52, +0x51, +0x30, +0x14, +0x11, +0x5f, +0x44, +0x49, +0x53, +0x0, +0x7d, +0x50, +0x52, +0x51, +0x30, +0xa, +0x80, +0x50, +0x52, +0x51, +0x30, +0x14, +0xf, +0x5f, +0x43, +0x52, +0x53, +0x0, +0xa4, +0x49, +0x51, +0x43, +0x52, +0x50, +0x52, +0x51, +0x30, +0x14, +0x17, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x8a, +0x68, +0xa, +0x5, +0x50, +0x52, +0x52, +0x49, +0x70, +0x50, +0x52, +0x52, +0x49, +0x50, +0x52, +0x51, +0x30, +0x5b, +0x82, +0x4c, +0x7, +0x4c, +0x4e, +0x4b, +0x42, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x1, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0x16, +0xa, +0x13, +0x89, +0xe, +0x0, +0x9, +0x3, +0x5, +0x0, +0x0, +0x0, +0xa, +0x0, +0x0, +0x0, +0xb, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0xf, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0x49, +0x51, +0x53, +0x54, +0x50, +0x52, +0x51, +0x31, +0x14, +0x11, +0x5f, +0x44, +0x49, +0x53, +0x0, +0x7d, +0x50, +0x52, +0x51, +0x31, +0xa, +0x80, +0x50, +0x52, +0x51, +0x31, +0x14, +0xf, +0x5f, +0x43, +0x52, +0x53, +0x0, +0xa4, +0x49, +0x51, +0x43, +0x52, +0x50, +0x52, +0x51, +0x31, +0x14, +0x17, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x8a, +0x68, +0xa, +0x5, +0x50, +0x52, +0x52, +0x49, +0x70, +0x50, +0x52, +0x52, +0x49, +0x50, +0x52, +0x51, +0x31, +0x5b, +0x82, +0x4d, +0x7, +0x4c, +0x4e, +0x4b, +0x43, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0xa, +0x2, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0x16, +0xa, +0x13, +0x89, +0xe, +0x0, +0x9, +0x3, +0x5, +0x0, +0x0, +0x0, +0xa, +0x0, +0x0, +0x0, +0xb, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0xf, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0x49, +0x51, +0x53, +0x54, +0x50, +0x52, +0x51, +0x32, +0x14, +0x11, +0x5f, +0x44, +0x49, +0x53, +0x0, +0x7d, +0x50, +0x52, +0x51, +0x32, +0xa, +0x80, +0x50, +0x52, +0x51, +0x32, +0x14, +0xf, +0x5f, +0x43, +0x52, +0x53, +0x0, +0xa4, +0x49, +0x51, +0x43, +0x52, +0x50, +0x52, +0x51, +0x32, +0x14, +0x17, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x8a, +0x68, +0xa, +0x5, +0x50, +0x52, +0x52, +0x49, +0x70, +0x50, +0x52, +0x52, +0x49, +0x50, +0x52, +0x51, +0x32, +0x5b, +0x82, +0x4d, +0x7, +0x4c, +0x4e, +0x4b, +0x44, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0xa, +0x3, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0x16, +0xa, +0x13, +0x89, +0xe, +0x0, +0x9, +0x3, +0x5, +0x0, +0x0, +0x0, +0xa, +0x0, +0x0, +0x0, +0xb, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0xf, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0x49, +0x51, +0x53, +0x54, +0x50, +0x52, +0x51, +0x33, +0x14, +0x11, +0x5f, +0x44, +0x49, +0x53, +0x0, +0x7d, +0x50, +0x52, +0x51, +0x33, +0xa, +0x80, +0x50, +0x52, +0x51, +0x33, +0x14, +0xf, +0x5f, +0x43, +0x52, +0x53, +0x0, +0xa4, +0x49, +0x51, +0x43, +0x52, +0x50, +0x52, +0x51, +0x33, +0x14, +0x17, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x8a, +0x68, +0xa, +0x5, +0x50, +0x52, +0x52, +0x49, +0x70, +0x50, +0x52, +0x52, +0x49, +0x50, +0x52, +0x51, +0x33, +0x5b, +0x82, +0x4f, +0x4, +0x4c, +0x4e, +0x4b, +0x53, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0xa, +0x4, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x9, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0x9, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0xa, +0xb, +0x14, +0x6, +0x5f, +0x44, +0x49, +0x53, +0x0, +0x14, +0xb, +0x5f, +0x43, +0x52, +0x53, +0x0, +0xa4, +0x5f, +0x50, +0x52, +0x53, +0x14, +0x6, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x10, +0x4d, +0xc, +0x5f, +0x53, +0x42, +0x5f, +0x14, +0x35, +0x43, +0x50, +0x4d, +0x41, +0x1, +0x70, +0x83, +0x88, +0x43, +0x50, +0x4f, +0x4e, +0x68, +0x0, +0x60, +0x70, +0x11, +0xb, +0xa, +0x8, +0x0, +0x8, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x61, +0x70, +0x68, +0x88, +0x61, +0xa, +0x2, +0x0, +0x70, +0x68, +0x88, +0x61, +0xa, +0x3, +0x0, +0x70, +0x60, +0x88, +0x61, +0xa, +0x4, +0x0, +0xa4, +0x61, +0x14, +0x1a, +0x43, +0x50, +0x53, +0x54, +0x1, +0x70, +0x83, +0x88, +0x43, +0x50, +0x4f, +0x4e, +0x68, +0x0, +0x60, +0xa0, +0x5, +0x60, +0xa4, +0xa, +0xf, +0xa1, +0x3, +0xa4, +0x0, +0x14, +0xa, +0x43, +0x50, +0x45, +0x4a, +0x2, +0x5b, +0x22, +0xa, +0xc8, +0x14, +0x4a, +0x6, +0x50, +0x52, +0x53, +0x43, +0x0, +0x70, +0x50, +0x52, +0x53, +0x5f, +0x65, +0x70, +0x0, +0x62, +0x70, +0x0, +0x60, +0xa2, +0x46, +0x5, +0x95, +0x60, +0x87, +0x43, +0x50, +0x4f, +0x4e, +0x70, +0x83, +0x88, +0x43, +0x50, +0x4f, +0x4e, +0x60, +0x0, +0x61, +0xa0, +0xa, +0x7b, +0x60, +0xa, +0x7, +0x0, +0x7a, +0x62, +0x1, +0x62, +0xa1, +0xc, +0x70, +0x83, +0x88, +0x65, +0x7a, +0x60, +0xa, +0x3, +0x0, +0x0, +0x62, +0x70, +0x7b, +0x62, +0x1, +0x0, +0x63, +0xa0, +0x22, +0x92, +0x93, +0x61, +0x63, +0x70, +0x63, +0x88, +0x43, +0x50, +0x4f, +0x4e, +0x60, +0x0, +0xa0, +0xa, +0x93, +0x63, +0x1, +0x4e, +0x54, +0x46, +0x59, +0x60, +0x1, +0xa1, +0x8, +0x4e, +0x54, +0x46, +0x59, +0x60, +0xa, +0x3, +0x75, +0x60, +0x10, +0x44, +0x2a, +0x2e, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x5b, +0x82, +0x47, +0x29, +0x4d, +0x48, +0x50, +0x44, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xd, +0x50, +0x4e, +0x50, +0x30, +0x41, +0x30, +0x36, +0x0, +0x8, +0x5f, +0x55, +0x49, +0x44, +0xd, +0x4d, +0x65, +0x6d, +0x6f, +0x72, +0x79, +0x20, +0x68, +0x6f, +0x74, +0x70, +0x6c, +0x75, +0x67, +0x20, +0x72, +0x65, +0x73, +0x6f, +0x75, +0x72, +0x63, +0x65, +0x73, +0x0, +0x14, +0x13, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa0, +0x9, +0x93, +0x4d, +0x44, +0x4e, +0x52, +0x0, +0xa4, +0x0, +0xa4, +0xa, +0xb, +0x5b, +0x1, +0x4d, +0x4c, +0x43, +0x4b, +0x0, +0x14, +0x4a, +0x4, +0x4d, +0x53, +0x43, +0x4e, +0x0, +0xa0, +0x9, +0x93, +0x4d, +0x44, +0x4e, +0x52, +0x0, +0xa4, +0x0, +0x70, +0x0, +0x60, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0xa2, +0x25, +0x95, +0x60, +0x4d, +0x44, +0x4e, +0x52, +0x70, +0x60, +0x4d, +0x53, +0x45, +0x4c, +0xa0, +0x13, +0x93, +0x4d, +0x49, +0x4e, +0x53, +0x1, +0x4d, +0x54, +0x46, +0x59, +0x60, +0x1, +0x70, +0x1, +0x4d, +0x49, +0x4e, +0x53, +0x72, +0x60, +0x1, +0x60, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x1, +0x14, +0x2d, +0x4d, +0x52, +0x53, +0x54, +0x1, +0x70, +0x0, +0x60, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x70, +0x99, +0x68, +0x0, +0x4d, +0x53, +0x45, +0x4c, +0xa0, +0xb, +0x93, +0x4d, +0x45, +0x53, +0x5f, +0x1, +0x70, +0xa, +0xf, +0x60, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x60, +0x14, +0x41, +0x18, +0x4d, +0x43, +0x52, +0x53, +0x9, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x70, +0x99, +0x68, +0x0, +0x4d, +0x53, +0x45, +0x4c, +0x8, +0x4d, +0x52, +0x36, +0x34, +0x11, +0x33, +0xa, +0x30, +0x8a, +0x2b, +0x0, +0x0, +0xc, +0x3, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0xfe, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0x79, +0x0, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0xe, +0x4d, +0x49, +0x4e, +0x4c, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x12, +0x4d, +0x49, +0x4e, +0x48, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x26, +0x4c, +0x45, +0x4e, +0x4c, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x2a, +0x4c, +0x45, +0x4e, +0x48, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x16, +0x4d, +0x41, +0x58, +0x4c, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x1a, +0x4d, +0x41, +0x58, +0x48, +0x70, +0x4d, +0x52, +0x42, +0x48, +0x4d, +0x49, +0x4e, +0x48, +0x70, +0x4d, +0x52, +0x42, +0x4c, +0x4d, +0x49, +0x4e, +0x4c, +0x70, +0x4d, +0x52, +0x4c, +0x48, +0x4c, +0x45, +0x4e, +0x48, +0x70, +0x4d, +0x52, +0x4c, +0x4c, +0x4c, +0x45, +0x4e, +0x4c, +0x72, +0x4d, +0x49, +0x4e, +0x4c, +0x4c, +0x45, +0x4e, +0x4c, +0x4d, +0x41, +0x58, +0x4c, +0x72, +0x4d, +0x49, +0x4e, +0x48, +0x4c, +0x45, +0x4e, +0x48, +0x4d, +0x41, +0x58, +0x48, +0xa0, +0x14, +0x95, +0x4d, +0x41, +0x58, +0x4c, +0x4d, +0x49, +0x4e, +0x4c, +0x72, +0x4d, +0x41, +0x58, +0x48, +0x1, +0x4d, +0x41, +0x58, +0x48, +0xa0, +0x11, +0x95, +0x4d, +0x41, +0x58, +0x4c, +0x1, +0x74, +0x4d, +0x41, +0x58, +0x48, +0x1, +0x4d, +0x41, +0x58, +0x48, +0x74, +0x4d, +0x41, +0x58, +0x4c, +0x1, +0x4d, +0x41, +0x58, +0x4c, +0xa0, +0x44, +0x7, +0x93, +0x4d, +0x41, +0x58, +0x48, +0x0, +0x8, +0x4d, +0x52, +0x33, +0x32, +0x11, +0x1f, +0xa, +0x1c, +0x87, +0x17, +0x0, +0x0, +0xc, +0x3, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0xfe, +0xff, +0xff, +0xff, +0x0, +0x0, +0x0, +0x0, +0xff, +0xff, +0xff, +0xff, +0x79, +0x0, +0x8a, +0x4d, +0x52, +0x33, +0x32, +0xa, +0xa, +0x4d, +0x49, +0x4e, +0x5f, +0x8a, +0x4d, +0x52, +0x33, +0x32, +0xa, +0xe, +0x4d, +0x41, +0x58, +0x5f, +0x8a, +0x4d, +0x52, +0x33, +0x32, +0xa, +0x16, +0x4c, +0x45, +0x4e, +0x5f, +0x70, +0x4d, +0x49, +0x4e, +0x4c, +0x4d, +0x49, +0x4e, +0x5f, +0x70, +0x4d, +0x41, +0x58, +0x4c, +0x4d, +0x41, +0x58, +0x5f, +0x70, +0x4c, +0x45, +0x4e, +0x4c, +0x4c, +0x45, +0x4e, +0x5f, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x4d, +0x52, +0x33, +0x32, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x4d, +0x52, +0x36, +0x34, +0x14, +0x24, +0x4d, +0x50, +0x58, +0x4d, +0x1, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x70, +0x99, +0x68, +0x0, +0x4d, +0x53, +0x45, +0x4c, +0x70, +0x4d, +0x50, +0x58, +0x5f, +0x60, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x60, +0x14, +0x28, +0x4d, +0x4f, +0x53, +0x54, +0x4, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x70, +0x99, +0x68, +0x0, +0x4d, +0x53, +0x45, +0x4c, +0x70, +0x69, +0x4d, +0x4f, +0x45, +0x56, +0x70, +0x6a, +0x4d, +0x4f, +0x53, +0x43, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0x10, +0x45, +0xd, +0x5f, +0x47, +0x50, +0x45, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xd, +0x41, +0x43, +0x50, +0x49, +0x30, +0x30, +0x30, +0x36, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x30, +0x0, +0x14, +0x39, +0x5f, +0x45, +0x30, +0x31, +0x0, +0x5b, +0x23, +0x5c, +0x2f, +0x3, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x42, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x5c, +0x2f, +0x3, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x50, +0x43, +0x4e, +0x54, +0x5b, +0x27, +0x5c, +0x2f, +0x3, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x42, +0x4c, +0x43, +0x4b, +0x14, +0x10, +0x5f, +0x45, +0x30, +0x32, +0x0, +0x5c, +0x2e, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x52, +0x53, +0x43, +0x14, +0x19, +0x5f, +0x45, +0x30, +0x33, +0x0, +0x5c, +0x2f, +0x4, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x4d, +0x48, +0x50, +0x44, +0x4d, +0x53, +0x43, +0x4e, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x34, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x35, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x36, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x37, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x38, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x39, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x41, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x42, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x43, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x44, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x45, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x46, +0x0 +}; diff --git a/qemu/hw/i386/intel_iommu.c b/qemu/hw/i386/intel_iommu.c new file mode 100644 index 000000000..08055a8d8 --- /dev/null +++ b/qemu/hw/i386/intel_iommu.c @@ -0,0 +1,1967 @@ +/* + * QEMU emulation of an Intel IOMMU (VT-d) + * (DMA Remapping device) + * + * Copyright (C) 2013 Knut Omang, Oracle <knut.omang@oracle.com> + * Copyright (C) 2014 Le Tan, <tamlokveer@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "hw/sysbus.h" +#include "exec/address-spaces.h" +#include "intel_iommu_internal.h" + +/*#define DEBUG_INTEL_IOMMU*/ +#ifdef DEBUG_INTEL_IOMMU +enum { + DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG, + DEBUG_CACHE, +}; +#define VTD_DBGBIT(x) (1 << DEBUG_##x) +static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR); + +#define VTD_DPRINTF(what, fmt, ...) do { \ + if (vtd_dbgflags & VTD_DBGBIT(what)) { \ + fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \ + ## __VA_ARGS__); } \ + } while (0) +#else +#define VTD_DPRINTF(what, fmt, ...) do {} while (0) +#endif + +static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val, + uint64_t wmask, uint64_t w1cmask) +{ + stq_le_p(&s->csr[addr], val); + stq_le_p(&s->wmask[addr], wmask); + stq_le_p(&s->w1cmask[addr], w1cmask); +} + +static void vtd_define_quad_wo(IntelIOMMUState *s, hwaddr addr, uint64_t mask) +{ + stq_le_p(&s->womask[addr], mask); +} + +static void vtd_define_long(IntelIOMMUState *s, hwaddr addr, uint32_t val, + uint32_t wmask, uint32_t w1cmask) +{ + stl_le_p(&s->csr[addr], val); + stl_le_p(&s->wmask[addr], wmask); + stl_le_p(&s->w1cmask[addr], w1cmask); +} + +static void vtd_define_long_wo(IntelIOMMUState *s, hwaddr addr, uint32_t mask) +{ + stl_le_p(&s->womask[addr], mask); +} + +/* "External" get/set operations */ +static void vtd_set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val) +{ + uint64_t oldval = ldq_le_p(&s->csr[addr]); + uint64_t wmask = ldq_le_p(&s->wmask[addr]); + uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]); + stq_le_p(&s->csr[addr], + ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val)); +} + +static void vtd_set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val) +{ + uint32_t oldval = ldl_le_p(&s->csr[addr]); + uint32_t wmask = ldl_le_p(&s->wmask[addr]); + uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]); + stl_le_p(&s->csr[addr], + ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val)); +} + +static uint64_t vtd_get_quad(IntelIOMMUState *s, hwaddr addr) +{ + uint64_t val = ldq_le_p(&s->csr[addr]); + uint64_t womask = ldq_le_p(&s->womask[addr]); + return val & ~womask; +} + +static uint32_t vtd_get_long(IntelIOMMUState *s, hwaddr addr) +{ + uint32_t val = ldl_le_p(&s->csr[addr]); + uint32_t womask = ldl_le_p(&s->womask[addr]); + return val & ~womask; +} + +/* "Internal" get/set operations */ +static uint64_t vtd_get_quad_raw(IntelIOMMUState *s, hwaddr addr) +{ + return ldq_le_p(&s->csr[addr]); +} + +static uint32_t vtd_get_long_raw(IntelIOMMUState *s, hwaddr addr) +{ + return ldl_le_p(&s->csr[addr]); +} + +static void vtd_set_quad_raw(IntelIOMMUState *s, hwaddr addr, uint64_t val) +{ + stq_le_p(&s->csr[addr], val); +} + +static uint32_t vtd_set_clear_mask_long(IntelIOMMUState *s, hwaddr addr, + uint32_t clear, uint32_t mask) +{ + uint32_t new_val = (ldl_le_p(&s->csr[addr]) & ~clear) | mask; + stl_le_p(&s->csr[addr], new_val); + return new_val; +} + +static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr, + uint64_t clear, uint64_t mask) +{ + uint64_t new_val = (ldq_le_p(&s->csr[addr]) & ~clear) | mask; + stq_le_p(&s->csr[addr], new_val); + return new_val; +} + +/* GHashTable functions */ +static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2) +{ + return *((const uint64_t *)v1) == *((const uint64_t *)v2); +} + +static guint vtd_uint64_hash(gconstpointer v) +{ + return (guint)*(const uint64_t *)v; +} + +static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value, + gpointer user_data) +{ + VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value; + uint16_t domain_id = *(uint16_t *)user_data; + return entry->domain_id == domain_id; +} + +static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value, + gpointer user_data) +{ + VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value; + VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data; + uint64_t gfn = info->gfn & info->mask; + return (entry->domain_id == info->domain_id) && + ((entry->gfn & info->mask) == gfn); +} + +/* Reset all the gen of VTDAddressSpace to zero and set the gen of + * IntelIOMMUState to 1. + */ +static void vtd_reset_context_cache(IntelIOMMUState *s) +{ + VTDAddressSpace **pvtd_as; + VTDAddressSpace *vtd_as; + uint32_t bus_it; + uint32_t devfn_it; + + VTD_DPRINTF(CACHE, "global context_cache_gen=1"); + for (bus_it = 0; bus_it < VTD_PCI_BUS_MAX; ++bus_it) { + pvtd_as = s->address_spaces[bus_it]; + if (!pvtd_as) { + continue; + } + for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) { + vtd_as = pvtd_as[devfn_it]; + if (!vtd_as) { + continue; + } + vtd_as->context_cache_entry.context_cache_gen = 0; + } + } + s->context_cache_gen = 1; +} + +static void vtd_reset_iotlb(IntelIOMMUState *s) +{ + assert(s->iotlb); + g_hash_table_remove_all(s->iotlb); +} + +static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id, + hwaddr addr) +{ + uint64_t key; + + key = (addr >> VTD_PAGE_SHIFT_4K) | + ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT); + return g_hash_table_lookup(s->iotlb, &key); + +} + +static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, + uint16_t domain_id, hwaddr addr, uint64_t slpte, + bool read_flags, bool write_flags) +{ + VTDIOTLBEntry *entry = g_malloc(sizeof(*entry)); + uint64_t *key = g_malloc(sizeof(*key)); + uint64_t gfn = addr >> VTD_PAGE_SHIFT_4K; + + VTD_DPRINTF(CACHE, "update iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64 + " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, slpte, + domain_id); + if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) { + VTD_DPRINTF(CACHE, "iotlb exceeds size limit, forced to reset"); + vtd_reset_iotlb(s); + } + + entry->gfn = gfn; + entry->domain_id = domain_id; + entry->slpte = slpte; + entry->read_flags = read_flags; + entry->write_flags = write_flags; + *key = gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT); + g_hash_table_replace(s->iotlb, key, entry); +} + +/* Given the reg addr of both the message data and address, generate an + * interrupt via MSI. + */ +static void vtd_generate_interrupt(IntelIOMMUState *s, hwaddr mesg_addr_reg, + hwaddr mesg_data_reg) +{ + hwaddr addr; + uint32_t data; + + assert(mesg_data_reg < DMAR_REG_SIZE); + assert(mesg_addr_reg < DMAR_REG_SIZE); + + addr = vtd_get_long_raw(s, mesg_addr_reg); + data = vtd_get_long_raw(s, mesg_data_reg); + + VTD_DPRINTF(FLOG, "msi: addr 0x%"PRIx64 " data 0x%"PRIx32, addr, data); + address_space_stl_le(&address_space_memory, addr, data, + MEMTXATTRS_UNSPECIFIED, NULL); +} + +/* Generate a fault event to software via MSI if conditions are met. + * Notice that the value of FSTS_REG being passed to it should be the one + * before any update. + */ +static void vtd_generate_fault_event(IntelIOMMUState *s, uint32_t pre_fsts) +{ + if (pre_fsts & VTD_FSTS_PPF || pre_fsts & VTD_FSTS_PFO || + pre_fsts & VTD_FSTS_IQE) { + VTD_DPRINTF(FLOG, "there are previous interrupt conditions " + "to be serviced by software, fault event is not generated " + "(FSTS_REG 0x%"PRIx32 ")", pre_fsts); + return; + } + vtd_set_clear_mask_long(s, DMAR_FECTL_REG, 0, VTD_FECTL_IP); + if (vtd_get_long_raw(s, DMAR_FECTL_REG) & VTD_FECTL_IM) { + VTD_DPRINTF(FLOG, "Interrupt Mask set, fault event is not generated"); + } else { + vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG); + vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); + } +} + +/* Check if the Fault (F) field of the Fault Recording Register referenced by + * @index is Set. + */ +static bool vtd_is_frcd_set(IntelIOMMUState *s, uint16_t index) +{ + /* Each reg is 128-bit */ + hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); + addr += 8; /* Access the high 64-bit half */ + + assert(index < DMAR_FRCD_REG_NR); + + return vtd_get_quad_raw(s, addr) & VTD_FRCD_F; +} + +/* Update the PPF field of Fault Status Register. + * Should be called whenever change the F field of any fault recording + * registers. + */ +static void vtd_update_fsts_ppf(IntelIOMMUState *s) +{ + uint32_t i; + uint32_t ppf_mask = 0; + + for (i = 0; i < DMAR_FRCD_REG_NR; i++) { + if (vtd_is_frcd_set(s, i)) { + ppf_mask = VTD_FSTS_PPF; + break; + } + } + vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_PPF, ppf_mask); + VTD_DPRINTF(FLOG, "set PPF of FSTS_REG to %d", ppf_mask ? 1 : 0); +} + +static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index) +{ + /* Each reg is 128-bit */ + hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); + addr += 8; /* Access the high 64-bit half */ + + assert(index < DMAR_FRCD_REG_NR); + + vtd_set_clear_mask_quad(s, addr, 0, VTD_FRCD_F); + vtd_update_fsts_ppf(s); +} + +/* Must not update F field now, should be done later */ +static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index, + uint16_t source_id, hwaddr addr, + VTDFaultReason fault, bool is_write) +{ + uint64_t hi = 0, lo; + hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); + + assert(index < DMAR_FRCD_REG_NR); + + lo = VTD_FRCD_FI(addr); + hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault); + if (!is_write) { + hi |= VTD_FRCD_T; + } + vtd_set_quad_raw(s, frcd_reg_addr, lo); + vtd_set_quad_raw(s, frcd_reg_addr + 8, hi); + VTD_DPRINTF(FLOG, "record to FRCD_REG #%"PRIu16 ": hi 0x%"PRIx64 + ", lo 0x%"PRIx64, index, hi, lo); +} + +/* Try to collapse multiple pending faults from the same requester */ +static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id) +{ + uint32_t i; + uint64_t frcd_reg; + hwaddr addr = DMAR_FRCD_REG_OFFSET + 8; /* The high 64-bit half */ + + for (i = 0; i < DMAR_FRCD_REG_NR; i++) { + frcd_reg = vtd_get_quad_raw(s, addr); + VTD_DPRINTF(FLOG, "frcd_reg #%d 0x%"PRIx64, i, frcd_reg); + if ((frcd_reg & VTD_FRCD_F) && + ((frcd_reg & VTD_FRCD_SID_MASK) == source_id)) { + return true; + } + addr += 16; /* 128-bit for each */ + } + return false; +} + +/* Log and report an DMAR (address translation) fault to software */ +static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, + hwaddr addr, VTDFaultReason fault, + bool is_write) +{ + uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG); + + assert(fault < VTD_FR_MAX); + + if (fault == VTD_FR_RESERVED_ERR) { + /* This is not a normal fault reason case. Drop it. */ + return; + } + VTD_DPRINTF(FLOG, "sid 0x%"PRIx16 ", fault %d, addr 0x%"PRIx64 + ", is_write %d", source_id, fault, addr, is_write); + if (fsts_reg & VTD_FSTS_PFO) { + VTD_DPRINTF(FLOG, "new fault is not recorded due to " + "Primary Fault Overflow"); + return; + } + if (vtd_try_collapse_fault(s, source_id)) { + VTD_DPRINTF(FLOG, "new fault is not recorded due to " + "compression of faults"); + return; + } + if (vtd_is_frcd_set(s, s->next_frcd_reg)) { + VTD_DPRINTF(FLOG, "Primary Fault Overflow and " + "new fault is not recorded, set PFO field"); + vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_PFO); + return; + } + + vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write); + + if (fsts_reg & VTD_FSTS_PPF) { + VTD_DPRINTF(FLOG, "there are pending faults already, " + "fault event is not generated"); + vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg); + s->next_frcd_reg++; + if (s->next_frcd_reg == DMAR_FRCD_REG_NR) { + s->next_frcd_reg = 0; + } + } else { + vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_FRI_MASK, + VTD_FSTS_FRI(s->next_frcd_reg)); + vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg); /* Will set PPF */ + s->next_frcd_reg++; + if (s->next_frcd_reg == DMAR_FRCD_REG_NR) { + s->next_frcd_reg = 0; + } + /* This case actually cause the PPF to be Set. + * So generate fault event (interrupt). + */ + vtd_generate_fault_event(s, fsts_reg); + } +} + +/* Handle Invalidation Queue Errors of queued invalidation interface error + * conditions. + */ +static void vtd_handle_inv_queue_error(IntelIOMMUState *s) +{ + uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG); + + vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_IQE); + vtd_generate_fault_event(s, fsts_reg); +} + +/* Set the IWC field and try to generate an invalidation completion interrupt */ +static void vtd_generate_completion_event(IntelIOMMUState *s) +{ + VTD_DPRINTF(INV, "completes an invalidation wait command with " + "Interrupt Flag"); + if (vtd_get_long_raw(s, DMAR_ICS_REG) & VTD_ICS_IWC) { + VTD_DPRINTF(INV, "there is a previous interrupt condition to be " + "serviced by software, " + "new invalidation event is not generated"); + return; + } + vtd_set_clear_mask_long(s, DMAR_ICS_REG, 0, VTD_ICS_IWC); + vtd_set_clear_mask_long(s, DMAR_IECTL_REG, 0, VTD_IECTL_IP); + if (vtd_get_long_raw(s, DMAR_IECTL_REG) & VTD_IECTL_IM) { + VTD_DPRINTF(INV, "IM filed in IECTL_REG is set, new invalidation " + "event is not generated"); + return; + } else { + /* Generate the interrupt event */ + vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG); + vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0); + } +} + +static inline bool vtd_root_entry_present(VTDRootEntry *root) +{ + return root->val & VTD_ROOT_ENTRY_P; +} + +static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index, + VTDRootEntry *re) +{ + dma_addr_t addr; + + addr = s->root + index * sizeof(*re); + if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) { + VTD_DPRINTF(GENERAL, "error: fail to access root-entry at 0x%"PRIx64 + " + %"PRIu8, s->root, index); + re->val = 0; + return -VTD_FR_ROOT_TABLE_INV; + } + re->val = le64_to_cpu(re->val); + return 0; +} + +static inline bool vtd_context_entry_present(VTDContextEntry *context) +{ + return context->lo & VTD_CONTEXT_ENTRY_P; +} + +static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index, + VTDContextEntry *ce) +{ + dma_addr_t addr; + + if (!vtd_root_entry_present(root)) { + VTD_DPRINTF(GENERAL, "error: root-entry is not present"); + return -VTD_FR_ROOT_ENTRY_P; + } + addr = (root->val & VTD_ROOT_ENTRY_CTP) + index * sizeof(*ce); + if (dma_memory_read(&address_space_memory, addr, ce, sizeof(*ce))) { + VTD_DPRINTF(GENERAL, "error: fail to access context-entry at 0x%"PRIx64 + " + %"PRIu8, + (uint64_t)(root->val & VTD_ROOT_ENTRY_CTP), index); + return -VTD_FR_CONTEXT_TABLE_INV; + } + ce->lo = le64_to_cpu(ce->lo); + ce->hi = le64_to_cpu(ce->hi); + return 0; +} + +static inline dma_addr_t vtd_get_slpt_base_from_context(VTDContextEntry *ce) +{ + return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR; +} + +/* The shift of an addr for a certain level of paging structure */ +static inline uint32_t vtd_slpt_level_shift(uint32_t level) +{ + return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS; +} + +static inline uint64_t vtd_get_slpte_addr(uint64_t slpte) +{ + return slpte & VTD_SL_PT_BASE_ADDR_MASK; +} + +/* Whether the pte indicates the address of the page frame */ +static inline bool vtd_is_last_slpte(uint64_t slpte, uint32_t level) +{ + return level == VTD_SL_PT_LEVEL || (slpte & VTD_SL_PT_PAGE_SIZE_MASK); +} + +/* Get the content of a spte located in @base_addr[@index] */ +static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index) +{ + uint64_t slpte; + + assert(index < VTD_SL_PT_ENTRY_NR); + + if (dma_memory_read(&address_space_memory, + base_addr + index * sizeof(slpte), &slpte, + sizeof(slpte))) { + slpte = (uint64_t)-1; + return slpte; + } + slpte = le64_to_cpu(slpte); + return slpte; +} + +/* Given a gpa and the level of paging structure, return the offset of current + * level. + */ +static inline uint32_t vtd_gpa_level_offset(uint64_t gpa, uint32_t level) +{ + return (gpa >> vtd_slpt_level_shift(level)) & + ((1ULL << VTD_SL_LEVEL_BITS) - 1); +} + +/* Check Capability Register to see if the @level of page-table is supported */ +static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level) +{ + return VTD_CAP_SAGAW_MASK & s->cap & + (1ULL << (level - 2 + VTD_CAP_SAGAW_SHIFT)); +} + +/* Get the page-table level that hardware should use for the second-level + * page-table walk from the Address Width field of context-entry. + */ +static inline uint32_t vtd_get_level_from_context_entry(VTDContextEntry *ce) +{ + return 2 + (ce->hi & VTD_CONTEXT_ENTRY_AW); +} + +static inline uint32_t vtd_get_agaw_from_context_entry(VTDContextEntry *ce) +{ + return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9; +} + +static const uint64_t vtd_paging_entry_rsvd_field[] = { + [0] = ~0ULL, + /* For not large page */ + [1] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), + [2] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), + [3] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), + [4] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), + /* For large page */ + [5] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), + [6] = 0x1ff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), + [7] = 0x3ffff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), + [8] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), +}; + +static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) +{ + if (slpte & VTD_SL_PT_PAGE_SIZE_MASK) { + /* Maybe large page */ + return slpte & vtd_paging_entry_rsvd_field[level + 4]; + } else { + return slpte & vtd_paging_entry_rsvd_field[level]; + } +} + +/* Given the @gpa, get relevant @slptep. @slpte_level will be the last level + * of the translation, can be used for deciding the size of large page. + */ +static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_write, + uint64_t *slptep, uint32_t *slpte_level, + bool *reads, bool *writes) +{ + dma_addr_t addr = vtd_get_slpt_base_from_context(ce); + uint32_t level = vtd_get_level_from_context_entry(ce); + uint32_t offset; + uint64_t slpte; + uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce); + uint64_t access_right_check; + + /* Check if @gpa is above 2^X-1, where X is the minimum of MGAW in CAP_REG + * and AW in context-entry. + */ + if (gpa & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) { + VTD_DPRINTF(GENERAL, "error: gpa 0x%"PRIx64 " exceeds limits", gpa); + return -VTD_FR_ADDR_BEYOND_MGAW; + } + + /* FIXME: what is the Atomics request here? */ + access_right_check = is_write ? VTD_SL_W : VTD_SL_R; + + while (true) { + offset = vtd_gpa_level_offset(gpa, level); + slpte = vtd_get_slpte(addr, offset); + + if (slpte == (uint64_t)-1) { + VTD_DPRINTF(GENERAL, "error: fail to access second-level paging " + "entry at level %"PRIu32 " for gpa 0x%"PRIx64, + level, gpa); + if (level == vtd_get_level_from_context_entry(ce)) { + /* Invalid programming of context-entry */ + return -VTD_FR_CONTEXT_ENTRY_INV; + } else { + return -VTD_FR_PAGING_ENTRY_INV; + } + } + *reads = (*reads) && (slpte & VTD_SL_R); + *writes = (*writes) && (slpte & VTD_SL_W); + if (!(slpte & access_right_check)) { + VTD_DPRINTF(GENERAL, "error: lack of %s permission for " + "gpa 0x%"PRIx64 " slpte 0x%"PRIx64, + (is_write ? "write" : "read"), gpa, slpte); + return is_write ? -VTD_FR_WRITE : -VTD_FR_READ; + } + if (vtd_slpte_nonzero_rsvd(slpte, level)) { + VTD_DPRINTF(GENERAL, "error: non-zero reserved field in second " + "level paging entry level %"PRIu32 " slpte 0x%"PRIx64, + level, slpte); + return -VTD_FR_PAGING_ENTRY_RSVD; + } + + if (vtd_is_last_slpte(slpte, level)) { + *slptep = slpte; + *slpte_level = level; + return 0; + } + addr = vtd_get_slpte_addr(slpte); + level--; + } +} + +/* Map a device to its corresponding domain (context-entry) */ +static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, + uint8_t devfn, VTDContextEntry *ce) +{ + VTDRootEntry re; + int ret_fr; + + ret_fr = vtd_get_root_entry(s, bus_num, &re); + if (ret_fr) { + return ret_fr; + } + + if (!vtd_root_entry_present(&re)) { + VTD_DPRINTF(GENERAL, "error: root-entry #%"PRIu8 " is not present", + bus_num); + return -VTD_FR_ROOT_ENTRY_P; + } else if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) { + VTD_DPRINTF(GENERAL, "error: non-zero reserved field in root-entry " + "hi 0x%"PRIx64 " lo 0x%"PRIx64, re.rsvd, re.val); + return -VTD_FR_ROOT_ENTRY_RSVD; + } + + ret_fr = vtd_get_context_entry_from_root(&re, devfn, ce); + if (ret_fr) { + return ret_fr; + } + + if (!vtd_context_entry_present(ce)) { + VTD_DPRINTF(GENERAL, + "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") " + "is not present", devfn, bus_num); + return -VTD_FR_CONTEXT_ENTRY_P; + } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) || + (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) { + VTD_DPRINTF(GENERAL, + "error: non-zero reserved field in context-entry " + "hi 0x%"PRIx64 " lo 0x%"PRIx64, ce->hi, ce->lo); + return -VTD_FR_CONTEXT_ENTRY_RSVD; + } + /* Check if the programming of context-entry is valid */ + if (!vtd_is_level_supported(s, vtd_get_level_from_context_entry(ce))) { + VTD_DPRINTF(GENERAL, "error: unsupported Address Width value in " + "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64, + ce->hi, ce->lo); + return -VTD_FR_CONTEXT_ENTRY_INV; + } else if (ce->lo & VTD_CONTEXT_ENTRY_TT) { + VTD_DPRINTF(GENERAL, "error: unsupported Translation Type in " + "context-entry hi 0x%"PRIx64 " lo 0x%"PRIx64, + ce->hi, ce->lo); + return -VTD_FR_CONTEXT_ENTRY_INV; + } + return 0; +} + +static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn) +{ + return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL); +} + +static const bool vtd_qualified_faults[] = { + [VTD_FR_RESERVED] = false, + [VTD_FR_ROOT_ENTRY_P] = false, + [VTD_FR_CONTEXT_ENTRY_P] = true, + [VTD_FR_CONTEXT_ENTRY_INV] = true, + [VTD_FR_ADDR_BEYOND_MGAW] = true, + [VTD_FR_WRITE] = true, + [VTD_FR_READ] = true, + [VTD_FR_PAGING_ENTRY_INV] = true, + [VTD_FR_ROOT_TABLE_INV] = false, + [VTD_FR_CONTEXT_TABLE_INV] = false, + [VTD_FR_ROOT_ENTRY_RSVD] = false, + [VTD_FR_PAGING_ENTRY_RSVD] = true, + [VTD_FR_CONTEXT_ENTRY_TT] = true, + [VTD_FR_RESERVED_ERR] = false, + [VTD_FR_MAX] = false, +}; + +/* To see if a fault condition is "qualified", which is reported to software + * only if the FPD field in the context-entry used to process the faulting + * request is 0. + */ +static inline bool vtd_is_qualified_fault(VTDFaultReason fault) +{ + return vtd_qualified_faults[fault]; +} + +static inline bool vtd_is_interrupt_addr(hwaddr addr) +{ + return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST; +} + +/* Map dev to context-entry then do a paging-structures walk to do a iommu + * translation. + * + * Called from RCU critical section. + * + * @bus_num: The bus number + * @devfn: The devfn, which is the combined of device and function number + * @is_write: The access is a write operation + * @entry: IOMMUTLBEntry that contain the addr to be translated and result + */ +static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, uint8_t bus_num, + uint8_t devfn, hwaddr addr, bool is_write, + IOMMUTLBEntry *entry) +{ + IntelIOMMUState *s = vtd_as->iommu_state; + VTDContextEntry ce; + VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry; + uint64_t slpte; + uint32_t level; + uint16_t source_id = vtd_make_source_id(bus_num, devfn); + int ret_fr; + bool is_fpd_set = false; + bool reads = true; + bool writes = true; + VTDIOTLBEntry *iotlb_entry; + + /* Check if the request is in interrupt address range */ + if (vtd_is_interrupt_addr(addr)) { + if (is_write) { + /* FIXME: since we don't know the length of the access here, we + * treat Non-DWORD length write requests without PASID as + * interrupt requests, too. Withoud interrupt remapping support, + * we just use 1:1 mapping. + */ + VTD_DPRINTF(MMU, "write request to interrupt address " + "gpa 0x%"PRIx64, addr); + entry->iova = addr & VTD_PAGE_MASK_4K; + entry->translated_addr = addr & VTD_PAGE_MASK_4K; + entry->addr_mask = ~VTD_PAGE_MASK_4K; + entry->perm = IOMMU_WO; + return; + } else { + VTD_DPRINTF(GENERAL, "error: read request from interrupt address " + "gpa 0x%"PRIx64, addr); + vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ, is_write); + return; + } + } + /* Try to fetch slpte form IOTLB */ + iotlb_entry = vtd_lookup_iotlb(s, source_id, addr); + if (iotlb_entry) { + VTD_DPRINTF(CACHE, "hit iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64 + " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, + iotlb_entry->slpte, iotlb_entry->domain_id); + slpte = iotlb_entry->slpte; + reads = iotlb_entry->read_flags; + writes = iotlb_entry->write_flags; + goto out; + } + /* Try to fetch context-entry from cache first */ + if (cc_entry->context_cache_gen == s->context_cache_gen) { + VTD_DPRINTF(CACHE, "hit context-cache bus %d devfn %d " + "(hi %"PRIx64 " lo %"PRIx64 " gen %"PRIu32 ")", + bus_num, devfn, cc_entry->context_entry.hi, + cc_entry->context_entry.lo, cc_entry->context_cache_gen); + ce = cc_entry->context_entry; + is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; + } else { + ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce); + is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; + if (ret_fr) { + ret_fr = -ret_fr; + if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { + VTD_DPRINTF(FLOG, "fault processing is disabled for DMA " + "requests through this context-entry " + "(with FPD Set)"); + } else { + vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); + } + return; + } + /* Update context-cache */ + VTD_DPRINTF(CACHE, "update context-cache bus %d devfn %d " + "(hi %"PRIx64 " lo %"PRIx64 " gen %"PRIu32 "->%"PRIu32 ")", + bus_num, devfn, ce.hi, ce.lo, + cc_entry->context_cache_gen, s->context_cache_gen); + cc_entry->context_entry = ce; + cc_entry->context_cache_gen = s->context_cache_gen; + } + + ret_fr = vtd_gpa_to_slpte(&ce, addr, is_write, &slpte, &level, + &reads, &writes); + if (ret_fr) { + ret_fr = -ret_fr; + if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { + VTD_DPRINTF(FLOG, "fault processing is disabled for DMA requests " + "through this context-entry (with FPD Set)"); + } else { + vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); + } + return; + } + + vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte, + reads, writes); +out: + entry->iova = addr & VTD_PAGE_MASK_4K; + entry->translated_addr = vtd_get_slpte_addr(slpte) & VTD_PAGE_MASK_4K; + entry->addr_mask = ~VTD_PAGE_MASK_4K; + entry->perm = (writes ? 2 : 0) + (reads ? 1 : 0); +} + +static void vtd_root_table_setup(IntelIOMMUState *s) +{ + s->root = vtd_get_quad_raw(s, DMAR_RTADDR_REG); + s->root_extended = s->root & VTD_RTADDR_RTT; + s->root &= VTD_RTADDR_ADDR_MASK; + + VTD_DPRINTF(CSR, "root_table addr 0x%"PRIx64 " %s", s->root, + (s->root_extended ? "(extended)" : "")); +} + +static void vtd_context_global_invalidate(IntelIOMMUState *s) +{ + s->context_cache_gen++; + if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) { + vtd_reset_context_cache(s); + } +} + +/* Do a context-cache device-selective invalidation. + * @func_mask: FM field after shifting + */ +static void vtd_context_device_invalidate(IntelIOMMUState *s, + uint16_t source_id, + uint16_t func_mask) +{ + uint16_t mask; + VTDAddressSpace **pvtd_as; + VTDAddressSpace *vtd_as; + uint16_t devfn; + uint16_t devfn_it; + + switch (func_mask & 3) { + case 0: + mask = 0; /* No bits in the SID field masked */ + break; + case 1: + mask = 4; /* Mask bit 2 in the SID field */ + break; + case 2: + mask = 6; /* Mask bit 2:1 in the SID field */ + break; + case 3: + mask = 7; /* Mask bit 2:0 in the SID field */ + break; + } + VTD_DPRINTF(INV, "device-selective invalidation source 0x%"PRIx16 + " mask %"PRIu16, source_id, mask); + pvtd_as = s->address_spaces[VTD_SID_TO_BUS(source_id)]; + if (pvtd_as) { + devfn = VTD_SID_TO_DEVFN(source_id); + for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) { + vtd_as = pvtd_as[devfn_it]; + if (vtd_as && ((devfn_it & mask) == (devfn & mask))) { + VTD_DPRINTF(INV, "invalidate context-cahce of devfn 0x%"PRIx16, + devfn_it); + vtd_as->context_cache_entry.context_cache_gen = 0; + } + } + } +} + +/* Context-cache invalidation + * Returns the Context Actual Invalidation Granularity. + * @val: the content of the CCMD_REG + */ +static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val) +{ + uint64_t caig; + uint64_t type = val & VTD_CCMD_CIRG_MASK; + + switch (type) { + case VTD_CCMD_DOMAIN_INVL: + VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16, + (uint16_t)VTD_CCMD_DID(val)); + /* Fall through */ + case VTD_CCMD_GLOBAL_INVL: + VTD_DPRINTF(INV, "global invalidation"); + caig = VTD_CCMD_GLOBAL_INVL_A; + vtd_context_global_invalidate(s); + break; + + case VTD_CCMD_DEVICE_INVL: + caig = VTD_CCMD_DEVICE_INVL_A; + vtd_context_device_invalidate(s, VTD_CCMD_SID(val), VTD_CCMD_FM(val)); + break; + + default: + VTD_DPRINTF(GENERAL, "error: invalid granularity"); + caig = 0; + } + return caig; +} + +static void vtd_iotlb_global_invalidate(IntelIOMMUState *s) +{ + vtd_reset_iotlb(s); +} + +static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) +{ + g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain, + &domain_id); +} + +static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, + hwaddr addr, uint8_t am) +{ + VTDIOTLBPageInvInfo info; + + assert(am <= VTD_MAMV); + info.domain_id = domain_id; + info.gfn = addr >> VTD_PAGE_SHIFT_4K; + info.mask = ~((1 << am) - 1); + g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info); +} + +/* Flush IOTLB + * Returns the IOTLB Actual Invalidation Granularity. + * @val: the content of the IOTLB_REG + */ +static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val) +{ + uint64_t iaig; + uint64_t type = val & VTD_TLB_FLUSH_GRANU_MASK; + uint16_t domain_id; + hwaddr addr; + uint8_t am; + + switch (type) { + case VTD_TLB_GLOBAL_FLUSH: + VTD_DPRINTF(INV, "global invalidation"); + iaig = VTD_TLB_GLOBAL_FLUSH_A; + vtd_iotlb_global_invalidate(s); + break; + + case VTD_TLB_DSI_FLUSH: + domain_id = VTD_TLB_DID(val); + VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16, + domain_id); + iaig = VTD_TLB_DSI_FLUSH_A; + vtd_iotlb_domain_invalidate(s, domain_id); + break; + + case VTD_TLB_PSI_FLUSH: + domain_id = VTD_TLB_DID(val); + addr = vtd_get_quad_raw(s, DMAR_IVA_REG); + am = VTD_IVA_AM(addr); + addr = VTD_IVA_ADDR(addr); + VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16 + " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am); + if (am > VTD_MAMV) { + VTD_DPRINTF(GENERAL, "error: supported max address mask value is " + "%"PRIu8, (uint8_t)VTD_MAMV); + iaig = 0; + break; + } + iaig = VTD_TLB_PSI_FLUSH_A; + vtd_iotlb_page_invalidate(s, domain_id, addr, am); + break; + + default: + VTD_DPRINTF(GENERAL, "error: invalid granularity"); + iaig = 0; + } + return iaig; +} + +static inline bool vtd_queued_inv_enable_check(IntelIOMMUState *s) +{ + return s->iq_tail == 0; +} + +static inline bool vtd_queued_inv_disable_check(IntelIOMMUState *s) +{ + return s->qi_enabled && (s->iq_tail == s->iq_head) && + (s->iq_last_desc_type == VTD_INV_DESC_WAIT); +} + +static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en) +{ + uint64_t iqa_val = vtd_get_quad_raw(s, DMAR_IQA_REG); + + VTD_DPRINTF(INV, "Queued Invalidation Enable %s", (en ? "on" : "off")); + if (en) { + if (vtd_queued_inv_enable_check(s)) { + s->iq = iqa_val & VTD_IQA_IQA_MASK; + /* 2^(x+8) entries */ + s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8); + s->qi_enabled = true; + VTD_DPRINTF(INV, "DMAR_IQA_REG 0x%"PRIx64, iqa_val); + VTD_DPRINTF(INV, "Invalidation Queue addr 0x%"PRIx64 " size %d", + s->iq, s->iq_size); + /* Ok - report back to driver */ + vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES); + } else { + VTD_DPRINTF(GENERAL, "error: can't enable Queued Invalidation: " + "tail %"PRIu16, s->iq_tail); + } + } else { + if (vtd_queued_inv_disable_check(s)) { + /* disable Queued Invalidation */ + vtd_set_quad_raw(s, DMAR_IQH_REG, 0); + s->iq_head = 0; + s->qi_enabled = false; + /* Ok - report back to driver */ + vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_QIES, 0); + } else { + VTD_DPRINTF(GENERAL, "error: can't disable Queued Invalidation: " + "head %"PRIu16 ", tail %"PRIu16 + ", last_descriptor %"PRIu8, + s->iq_head, s->iq_tail, s->iq_last_desc_type); + } + } +} + +/* Set Root Table Pointer */ +static void vtd_handle_gcmd_srtp(IntelIOMMUState *s) +{ + VTD_DPRINTF(CSR, "set Root Table Pointer"); + + vtd_root_table_setup(s); + /* Ok - report back to driver */ + vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS); +} + +/* Handle Translation Enable/Disable */ +static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) +{ + VTD_DPRINTF(CSR, "Translation Enable %s", (en ? "on" : "off")); + + if (en) { + s->dmar_enabled = true; + /* Ok - report back to driver */ + vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_TES); + } else { + s->dmar_enabled = false; + + /* Clear the index of Fault Recording Register */ + s->next_frcd_reg = 0; + /* Ok - report back to driver */ + vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0); + } +} + +/* Handle write to Global Command Register */ +static void vtd_handle_gcmd_write(IntelIOMMUState *s) +{ + uint32_t status = vtd_get_long_raw(s, DMAR_GSTS_REG); + uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG); + uint32_t changed = status ^ val; + + VTD_DPRINTF(CSR, "value 0x%"PRIx32 " status 0x%"PRIx32, val, status); + if (changed & VTD_GCMD_TE) { + /* Translation enable/disable */ + vtd_handle_gcmd_te(s, val & VTD_GCMD_TE); + } + if (val & VTD_GCMD_SRTP) { + /* Set/update the root-table pointer */ + vtd_handle_gcmd_srtp(s); + } + if (changed & VTD_GCMD_QIE) { + /* Queued Invalidation Enable */ + vtd_handle_gcmd_qie(s, val & VTD_GCMD_QIE); + } +} + +/* Handle write to Context Command Register */ +static void vtd_handle_ccmd_write(IntelIOMMUState *s) +{ + uint64_t ret; + uint64_t val = vtd_get_quad_raw(s, DMAR_CCMD_REG); + + /* Context-cache invalidation request */ + if (val & VTD_CCMD_ICC) { + if (s->qi_enabled) { + VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, " + "should not use register-based invalidation"); + return; + } + ret = vtd_context_cache_invalidate(s, val); + /* Invalidation completed. Change something to show */ + vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_ICC, 0ULL); + ret = vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_CAIG_MASK, + ret); + VTD_DPRINTF(INV, "CCMD_REG write-back val: 0x%"PRIx64, ret); + } +} + +/* Handle write to IOTLB Invalidation Register */ +static void vtd_handle_iotlb_write(IntelIOMMUState *s) +{ + uint64_t ret; + uint64_t val = vtd_get_quad_raw(s, DMAR_IOTLB_REG); + + /* IOTLB invalidation request */ + if (val & VTD_TLB_IVT) { + if (s->qi_enabled) { + VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, " + "should not use register-based invalidation"); + return; + } + ret = vtd_iotlb_flush(s, val); + /* Invalidation completed. Change something to show */ + vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG, VTD_TLB_IVT, 0ULL); + ret = vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG, + VTD_TLB_FLUSH_GRANU_MASK_A, ret); + VTD_DPRINTF(INV, "IOTLB_REG write-back val: 0x%"PRIx64, ret); + } +} + +/* Fetch an Invalidation Descriptor from the Invalidation Queue */ +static bool vtd_get_inv_desc(dma_addr_t base_addr, uint32_t offset, + VTDInvDesc *inv_desc) +{ + dma_addr_t addr = base_addr + offset * sizeof(*inv_desc); + if (dma_memory_read(&address_space_memory, addr, inv_desc, + sizeof(*inv_desc))) { + VTD_DPRINTF(GENERAL, "error: fail to fetch Invalidation Descriptor " + "base_addr 0x%"PRIx64 " offset %"PRIu32, base_addr, offset); + inv_desc->lo = 0; + inv_desc->hi = 0; + + return false; + } + inv_desc->lo = le64_to_cpu(inv_desc->lo); + inv_desc->hi = le64_to_cpu(inv_desc->hi); + return true; +} + +static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) +{ + if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) || + (inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) { + VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Invalidation " + "Wait Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64, + inv_desc->hi, inv_desc->lo); + return false; + } + if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) { + /* Status Write */ + uint32_t status_data = (uint32_t)(inv_desc->lo >> + VTD_INV_DESC_WAIT_DATA_SHIFT); + + assert(!(inv_desc->lo & VTD_INV_DESC_WAIT_IF)); + + /* FIXME: need to be masked with HAW? */ + dma_addr_t status_addr = inv_desc->hi; + VTD_DPRINTF(INV, "status data 0x%x, status addr 0x%"PRIx64, + status_data, status_addr); + status_data = cpu_to_le32(status_data); + if (dma_memory_write(&address_space_memory, status_addr, &status_data, + sizeof(status_data))) { + VTD_DPRINTF(GENERAL, "error: fail to perform a coherent write"); + return false; + } + } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) { + /* Interrupt flag */ + VTD_DPRINTF(INV, "Invalidation Wait Descriptor interrupt completion"); + vtd_generate_completion_event(s); + } else { + VTD_DPRINTF(GENERAL, "error: invalid Invalidation Wait Descriptor: " + "hi 0x%"PRIx64 " lo 0x%"PRIx64, inv_desc->hi, inv_desc->lo); + return false; + } + return true; +} + +static bool vtd_process_context_cache_desc(IntelIOMMUState *s, + VTDInvDesc *inv_desc) +{ + if ((inv_desc->lo & VTD_INV_DESC_CC_RSVD) || inv_desc->hi) { + VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Context-cache " + "Invalidate Descriptor"); + return false; + } + switch (inv_desc->lo & VTD_INV_DESC_CC_G) { + case VTD_INV_DESC_CC_DOMAIN: + VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16, + (uint16_t)VTD_INV_DESC_CC_DID(inv_desc->lo)); + /* Fall through */ + case VTD_INV_DESC_CC_GLOBAL: + VTD_DPRINTF(INV, "global invalidation"); + vtd_context_global_invalidate(s); + break; + + case VTD_INV_DESC_CC_DEVICE: + vtd_context_device_invalidate(s, VTD_INV_DESC_CC_SID(inv_desc->lo), + VTD_INV_DESC_CC_FM(inv_desc->lo)); + break; + + default: + VTD_DPRINTF(GENERAL, "error: invalid granularity in Context-cache " + "Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64, + inv_desc->hi, inv_desc->lo); + return false; + } + return true; +} + +static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) +{ + uint16_t domain_id; + uint8_t am; + hwaddr addr; + + if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) || + (inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) { + VTD_DPRINTF(GENERAL, "error: non-zero reserved field in IOTLB " + "Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64, + inv_desc->hi, inv_desc->lo); + return false; + } + + switch (inv_desc->lo & VTD_INV_DESC_IOTLB_G) { + case VTD_INV_DESC_IOTLB_GLOBAL: + VTD_DPRINTF(INV, "global invalidation"); + vtd_iotlb_global_invalidate(s); + break; + + case VTD_INV_DESC_IOTLB_DOMAIN: + domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo); + VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16, + domain_id); + vtd_iotlb_domain_invalidate(s, domain_id); + break; + + case VTD_INV_DESC_IOTLB_PAGE: + domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo); + addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi); + am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi); + VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16 + " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am); + if (am > VTD_MAMV) { + VTD_DPRINTF(GENERAL, "error: supported max address mask value is " + "%"PRIu8, (uint8_t)VTD_MAMV); + return false; + } + vtd_iotlb_page_invalidate(s, domain_id, addr, am); + break; + + default: + VTD_DPRINTF(GENERAL, "error: invalid granularity in IOTLB Invalidate " + "Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64, + inv_desc->hi, inv_desc->lo); + return false; + } + return true; +} + +static bool vtd_process_inv_desc(IntelIOMMUState *s) +{ + VTDInvDesc inv_desc; + uint8_t desc_type; + + VTD_DPRINTF(INV, "iq head %"PRIu16, s->iq_head); + if (!vtd_get_inv_desc(s->iq, s->iq_head, &inv_desc)) { + s->iq_last_desc_type = VTD_INV_DESC_NONE; + return false; + } + desc_type = inv_desc.lo & VTD_INV_DESC_TYPE; + /* FIXME: should update at first or at last? */ + s->iq_last_desc_type = desc_type; + + switch (desc_type) { + case VTD_INV_DESC_CC: + VTD_DPRINTF(INV, "Context-cache Invalidate Descriptor hi 0x%"PRIx64 + " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo); + if (!vtd_process_context_cache_desc(s, &inv_desc)) { + return false; + } + break; + + case VTD_INV_DESC_IOTLB: + VTD_DPRINTF(INV, "IOTLB Invalidate Descriptor hi 0x%"PRIx64 + " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo); + if (!vtd_process_iotlb_desc(s, &inv_desc)) { + return false; + } + break; + + case VTD_INV_DESC_WAIT: + VTD_DPRINTF(INV, "Invalidation Wait Descriptor hi 0x%"PRIx64 + " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo); + if (!vtd_process_wait_desc(s, &inv_desc)) { + return false; + } + break; + + default: + VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type " + "hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8, + inv_desc.hi, inv_desc.lo, desc_type); + return false; + } + s->iq_head++; + if (s->iq_head == s->iq_size) { + s->iq_head = 0; + } + return true; +} + +/* Try to fetch and process more Invalidation Descriptors */ +static void vtd_fetch_inv_desc(IntelIOMMUState *s) +{ + VTD_DPRINTF(INV, "fetch Invalidation Descriptors"); + if (s->iq_tail >= s->iq_size) { + /* Detects an invalid Tail pointer */ + VTD_DPRINTF(GENERAL, "error: iq_tail is %"PRIu16 + " while iq_size is %"PRIu16, s->iq_tail, s->iq_size); + vtd_handle_inv_queue_error(s); + return; + } + while (s->iq_head != s->iq_tail) { + if (!vtd_process_inv_desc(s)) { + /* Invalidation Queue Errors */ + vtd_handle_inv_queue_error(s); + break; + } + /* Must update the IQH_REG in time */ + vtd_set_quad_raw(s, DMAR_IQH_REG, + (((uint64_t)(s->iq_head)) << VTD_IQH_QH_SHIFT) & + VTD_IQH_QH_MASK); + } +} + +/* Handle write to Invalidation Queue Tail Register */ +static void vtd_handle_iqt_write(IntelIOMMUState *s) +{ + uint64_t val = vtd_get_quad_raw(s, DMAR_IQT_REG); + + s->iq_tail = VTD_IQT_QT(val); + VTD_DPRINTF(INV, "set iq tail %"PRIu16, s->iq_tail); + if (s->qi_enabled && !(vtd_get_long_raw(s, DMAR_FSTS_REG) & VTD_FSTS_IQE)) { + /* Process Invalidation Queue here */ + vtd_fetch_inv_desc(s); + } +} + +static void vtd_handle_fsts_write(IntelIOMMUState *s) +{ + uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG); + uint32_t fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG); + uint32_t status_fields = VTD_FSTS_PFO | VTD_FSTS_PPF | VTD_FSTS_IQE; + + if ((fectl_reg & VTD_FECTL_IP) && !(fsts_reg & status_fields)) { + vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); + VTD_DPRINTF(FLOG, "all pending interrupt conditions serviced, clear " + "IP field of FECTL_REG"); + } + /* FIXME: when IQE is Clear, should we try to fetch some Invalidation + * Descriptors if there are any when Queued Invalidation is enabled? + */ +} + +static void vtd_handle_fectl_write(IntelIOMMUState *s) +{ + uint32_t fectl_reg; + /* FIXME: when software clears the IM field, check the IP field. But do we + * need to compare the old value and the new value to conclude that + * software clears the IM field? Or just check if the IM field is zero? + */ + fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG); + if ((fectl_reg & VTD_FECTL_IP) && !(fectl_reg & VTD_FECTL_IM)) { + vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG); + vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0); + VTD_DPRINTF(FLOG, "IM field is cleared, generate " + "fault event interrupt"); + } +} + +static void vtd_handle_ics_write(IntelIOMMUState *s) +{ + uint32_t ics_reg = vtd_get_long_raw(s, DMAR_ICS_REG); + uint32_t iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG); + + if ((iectl_reg & VTD_IECTL_IP) && !(ics_reg & VTD_ICS_IWC)) { + vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0); + VTD_DPRINTF(INV, "pending completion interrupt condition serviced, " + "clear IP field of IECTL_REG"); + } +} + +static void vtd_handle_iectl_write(IntelIOMMUState *s) +{ + uint32_t iectl_reg; + /* FIXME: when software clears the IM field, check the IP field. But do we + * need to compare the old value and the new value to conclude that + * software clears the IM field? Or just check if the IM field is zero? + */ + iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG); + if ((iectl_reg & VTD_IECTL_IP) && !(iectl_reg & VTD_IECTL_IM)) { + vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG); + vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0); + VTD_DPRINTF(INV, "IM field is cleared, generate " + "invalidation event interrupt"); + } +} + +static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size) +{ + IntelIOMMUState *s = opaque; + uint64_t val; + + if (addr + size > DMAR_REG_SIZE) { + VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64 + ", got 0x%"PRIx64 " %d", + (uint64_t)DMAR_REG_SIZE, addr, size); + return (uint64_t)-1; + } + + switch (addr) { + /* Root Table Address Register, 64-bit */ + case DMAR_RTADDR_REG: + if (size == 4) { + val = s->root & ((1ULL << 32) - 1); + } else { + val = s->root; + } + break; + + case DMAR_RTADDR_REG_HI: + assert(size == 4); + val = s->root >> 32; + break; + + /* Invalidation Queue Address Register, 64-bit */ + case DMAR_IQA_REG: + val = s->iq | (vtd_get_quad(s, DMAR_IQA_REG) & VTD_IQA_QS); + if (size == 4) { + val = val & ((1ULL << 32) - 1); + } + break; + + case DMAR_IQA_REG_HI: + assert(size == 4); + val = s->iq >> 32; + break; + + default: + if (size == 4) { + val = vtd_get_long(s, addr); + } else { + val = vtd_get_quad(s, addr); + } + } + VTD_DPRINTF(CSR, "addr 0x%"PRIx64 " size %d val 0x%"PRIx64, + addr, size, val); + return val; +} + +static void vtd_mem_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + IntelIOMMUState *s = opaque; + + if (addr + size > DMAR_REG_SIZE) { + VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64 + ", got 0x%"PRIx64 " %d", + (uint64_t)DMAR_REG_SIZE, addr, size); + return; + } + + switch (addr) { + /* Global Command Register, 32-bit */ + case DMAR_GCMD_REG: + VTD_DPRINTF(CSR, "DMAR_GCMD_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + vtd_set_long(s, addr, val); + vtd_handle_gcmd_write(s); + break; + + /* Context Command Register, 64-bit */ + case DMAR_CCMD_REG: + VTD_DPRINTF(CSR, "DMAR_CCMD_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + if (size == 4) { + vtd_set_long(s, addr, val); + } else { + vtd_set_quad(s, addr, val); + vtd_handle_ccmd_write(s); + } + break; + + case DMAR_CCMD_REG_HI: + VTD_DPRINTF(CSR, "DMAR_CCMD_REG_HI write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + vtd_handle_ccmd_write(s); + break; + + /* IOTLB Invalidation Register, 64-bit */ + case DMAR_IOTLB_REG: + VTD_DPRINTF(INV, "DMAR_IOTLB_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + if (size == 4) { + vtd_set_long(s, addr, val); + } else { + vtd_set_quad(s, addr, val); + vtd_handle_iotlb_write(s); + } + break; + + case DMAR_IOTLB_REG_HI: + VTD_DPRINTF(INV, "DMAR_IOTLB_REG_HI write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + vtd_handle_iotlb_write(s); + break; + + /* Invalidate Address Register, 64-bit */ + case DMAR_IVA_REG: + VTD_DPRINTF(INV, "DMAR_IVA_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + if (size == 4) { + vtd_set_long(s, addr, val); + } else { + vtd_set_quad(s, addr, val); + } + break; + + case DMAR_IVA_REG_HI: + VTD_DPRINTF(INV, "DMAR_IVA_REG_HI write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + break; + + /* Fault Status Register, 32-bit */ + case DMAR_FSTS_REG: + VTD_DPRINTF(FLOG, "DMAR_FSTS_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + vtd_handle_fsts_write(s); + break; + + /* Fault Event Control Register, 32-bit */ + case DMAR_FECTL_REG: + VTD_DPRINTF(FLOG, "DMAR_FECTL_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + vtd_handle_fectl_write(s); + break; + + /* Fault Event Data Register, 32-bit */ + case DMAR_FEDATA_REG: + VTD_DPRINTF(FLOG, "DMAR_FEDATA_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + break; + + /* Fault Event Address Register, 32-bit */ + case DMAR_FEADDR_REG: + VTD_DPRINTF(FLOG, "DMAR_FEADDR_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + break; + + /* Fault Event Upper Address Register, 32-bit */ + case DMAR_FEUADDR_REG: + VTD_DPRINTF(FLOG, "DMAR_FEUADDR_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + break; + + /* Protected Memory Enable Register, 32-bit */ + case DMAR_PMEN_REG: + VTD_DPRINTF(CSR, "DMAR_PMEN_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + break; + + /* Root Table Address Register, 64-bit */ + case DMAR_RTADDR_REG: + VTD_DPRINTF(CSR, "DMAR_RTADDR_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + if (size == 4) { + vtd_set_long(s, addr, val); + } else { + vtd_set_quad(s, addr, val); + } + break; + + case DMAR_RTADDR_REG_HI: + VTD_DPRINTF(CSR, "DMAR_RTADDR_REG_HI write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + break; + + /* Invalidation Queue Tail Register, 64-bit */ + case DMAR_IQT_REG: + VTD_DPRINTF(INV, "DMAR_IQT_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + if (size == 4) { + vtd_set_long(s, addr, val); + } else { + vtd_set_quad(s, addr, val); + } + vtd_handle_iqt_write(s); + break; + + case DMAR_IQT_REG_HI: + VTD_DPRINTF(INV, "DMAR_IQT_REG_HI write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + /* 19:63 of IQT_REG is RsvdZ, do nothing here */ + break; + + /* Invalidation Queue Address Register, 64-bit */ + case DMAR_IQA_REG: + VTD_DPRINTF(INV, "DMAR_IQA_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + if (size == 4) { + vtd_set_long(s, addr, val); + } else { + vtd_set_quad(s, addr, val); + } + break; + + case DMAR_IQA_REG_HI: + VTD_DPRINTF(INV, "DMAR_IQA_REG_HI write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + break; + + /* Invalidation Completion Status Register, 32-bit */ + case DMAR_ICS_REG: + VTD_DPRINTF(INV, "DMAR_ICS_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + vtd_handle_ics_write(s); + break; + + /* Invalidation Event Control Register, 32-bit */ + case DMAR_IECTL_REG: + VTD_DPRINTF(INV, "DMAR_IECTL_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + vtd_handle_iectl_write(s); + break; + + /* Invalidation Event Data Register, 32-bit */ + case DMAR_IEDATA_REG: + VTD_DPRINTF(INV, "DMAR_IEDATA_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + break; + + /* Invalidation Event Address Register, 32-bit */ + case DMAR_IEADDR_REG: + VTD_DPRINTF(INV, "DMAR_IEADDR_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + break; + + /* Invalidation Event Upper Address Register, 32-bit */ + case DMAR_IEUADDR_REG: + VTD_DPRINTF(INV, "DMAR_IEUADDR_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + break; + + /* Fault Recording Registers, 128-bit */ + case DMAR_FRCD_REG_0_0: + VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_0 write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + if (size == 4) { + vtd_set_long(s, addr, val); + } else { + vtd_set_quad(s, addr, val); + } + break; + + case DMAR_FRCD_REG_0_1: + VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_1 write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + break; + + case DMAR_FRCD_REG_0_2: + VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_2 write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + if (size == 4) { + vtd_set_long(s, addr, val); + } else { + vtd_set_quad(s, addr, val); + /* May clear bit 127 (Fault), update PPF */ + vtd_update_fsts_ppf(s); + } + break; + + case DMAR_FRCD_REG_0_3: + VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_3 write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + /* May clear bit 127 (Fault), update PPF */ + vtd_update_fsts_ppf(s); + break; + + default: + VTD_DPRINTF(GENERAL, "error: unhandled reg write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + if (size == 4) { + vtd_set_long(s, addr, val); + } else { + vtd_set_quad(s, addr, val); + } + } +} + +static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr, + bool is_write) +{ + VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); + IntelIOMMUState *s = vtd_as->iommu_state; + IOMMUTLBEntry ret = { + .target_as = &address_space_memory, + .iova = addr, + .translated_addr = 0, + .addr_mask = ~(hwaddr)0, + .perm = IOMMU_NONE, + }; + + if (!s->dmar_enabled) { + /* DMAR disabled, passthrough, use 4k-page*/ + ret.iova = addr & VTD_PAGE_MASK_4K; + ret.translated_addr = addr & VTD_PAGE_MASK_4K; + ret.addr_mask = ~VTD_PAGE_MASK_4K; + ret.perm = IOMMU_RW; + return ret; + } + + vtd_do_iommu_translate(vtd_as, vtd_as->bus_num, vtd_as->devfn, addr, + is_write, &ret); + VTD_DPRINTF(MMU, + "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8 + " gpa 0x%"PRIx64 " hpa 0x%"PRIx64, vtd_as->bus_num, + VTD_PCI_SLOT(vtd_as->devfn), VTD_PCI_FUNC(vtd_as->devfn), + vtd_as->devfn, addr, ret.translated_addr); + return ret; +} + +static const VMStateDescription vtd_vmstate = { + .name = "iommu-intel", + .unmigratable = 1, +}; + +static const MemoryRegionOps vtd_mem_ops = { + .read = vtd_mem_read, + .write = vtd_mem_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 8, + }, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + }, +}; + +static Property vtd_properties[] = { + DEFINE_PROP_UINT32("version", IntelIOMMUState, version, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +/* Do the initialization. It will also be called when reset, so pay + * attention when adding new initialization stuff. + */ +static void vtd_init(IntelIOMMUState *s) +{ + memset(s->csr, 0, DMAR_REG_SIZE); + memset(s->wmask, 0, DMAR_REG_SIZE); + memset(s->w1cmask, 0, DMAR_REG_SIZE); + memset(s->womask, 0, DMAR_REG_SIZE); + + s->iommu_ops.translate = vtd_iommu_translate; + s->root = 0; + s->root_extended = false; + s->dmar_enabled = false; + s->iq_head = 0; + s->iq_tail = 0; + s->iq = 0; + s->iq_size = 0; + s->qi_enabled = false; + s->iq_last_desc_type = VTD_INV_DESC_NONE; + s->next_frcd_reg = 0; + s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW | + VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI; + s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; + + vtd_reset_context_cache(s); + vtd_reset_iotlb(s); + + /* Define registers with default values and bit semantics */ + vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0); + vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0); + vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0); + vtd_define_long(s, DMAR_GCMD_REG, 0, 0xff800000UL, 0); + vtd_define_long_wo(s, DMAR_GCMD_REG, 0xff800000UL); + vtd_define_long(s, DMAR_GSTS_REG, 0, 0, 0); + vtd_define_quad(s, DMAR_RTADDR_REG, 0, 0xfffffffffffff000ULL, 0); + vtd_define_quad(s, DMAR_CCMD_REG, 0, 0xe0000003ffffffffULL, 0); + vtd_define_quad_wo(s, DMAR_CCMD_REG, 0x3ffff0000ULL); + + /* Advanced Fault Logging not supported */ + vtd_define_long(s, DMAR_FSTS_REG, 0, 0, 0x11UL); + vtd_define_long(s, DMAR_FECTL_REG, 0x80000000UL, 0x80000000UL, 0); + vtd_define_long(s, DMAR_FEDATA_REG, 0, 0x0000ffffUL, 0); + vtd_define_long(s, DMAR_FEADDR_REG, 0, 0xfffffffcUL, 0); + + /* Treated as RsvdZ when EIM in ECAP_REG is not supported + * vtd_define_long(s, DMAR_FEUADDR_REG, 0, 0xffffffffUL, 0); + */ + vtd_define_long(s, DMAR_FEUADDR_REG, 0, 0, 0); + + /* Treated as RO for implementations that PLMR and PHMR fields reported + * as Clear in the CAP_REG. + * vtd_define_long(s, DMAR_PMEN_REG, 0, 0x80000000UL, 0); + */ + vtd_define_long(s, DMAR_PMEN_REG, 0, 0, 0); + + vtd_define_quad(s, DMAR_IQH_REG, 0, 0, 0); + vtd_define_quad(s, DMAR_IQT_REG, 0, 0x7fff0ULL, 0); + vtd_define_quad(s, DMAR_IQA_REG, 0, 0xfffffffffffff007ULL, 0); + vtd_define_long(s, DMAR_ICS_REG, 0, 0, 0x1UL); + vtd_define_long(s, DMAR_IECTL_REG, 0x80000000UL, 0x80000000UL, 0); + vtd_define_long(s, DMAR_IEDATA_REG, 0, 0xffffffffUL, 0); + vtd_define_long(s, DMAR_IEADDR_REG, 0, 0xfffffffcUL, 0); + /* Treadted as RsvdZ when EIM in ECAP_REG is not supported */ + vtd_define_long(s, DMAR_IEUADDR_REG, 0, 0, 0); + + /* IOTLB registers */ + vtd_define_quad(s, DMAR_IOTLB_REG, 0, 0Xb003ffff00000000ULL, 0); + vtd_define_quad(s, DMAR_IVA_REG, 0, 0xfffffffffffff07fULL, 0); + vtd_define_quad_wo(s, DMAR_IVA_REG, 0xfffffffffffff07fULL); + + /* Fault Recording Registers, 128-bit */ + vtd_define_quad(s, DMAR_FRCD_REG_0_0, 0, 0, 0); + vtd_define_quad(s, DMAR_FRCD_REG_0_2, 0, 0, 0x8000000000000000ULL); +} + +/* Should not reset address_spaces when reset because devices will still use + * the address space they got at first (won't ask the bus again). + */ +static void vtd_reset(DeviceState *dev) +{ + IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); + + VTD_DPRINTF(GENERAL, ""); + vtd_init(s); +} + +static void vtd_realize(DeviceState *dev, Error **errp) +{ + IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); + + VTD_DPRINTF(GENERAL, ""); + memset(s->address_spaces, 0, sizeof(s->address_spaces)); + memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s, + "intel_iommu", DMAR_REG_SIZE); + sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem); + /* No corresponding destroy */ + s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal, + g_free, g_free); + vtd_init(s); +} + +static void vtd_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = vtd_reset; + dc->realize = vtd_realize; + dc->vmsd = &vtd_vmstate; + dc->props = vtd_properties; +} + +static const TypeInfo vtd_info = { + .name = TYPE_INTEL_IOMMU_DEVICE, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(IntelIOMMUState), + .class_init = vtd_class_init, +}; + +static void vtd_register_types(void) +{ + VTD_DPRINTF(GENERAL, ""); + type_register_static(&vtd_info); +} + +type_init(vtd_register_types) diff --git a/qemu/hw/i386/intel_iommu_internal.h b/qemu/hw/i386/intel_iommu_internal.h new file mode 100644 index 000000000..ba288ab1d --- /dev/null +++ b/qemu/hw/i386/intel_iommu_internal.h @@ -0,0 +1,389 @@ +/* + * QEMU emulation of an Intel IOMMU (VT-d) + * (DMA Remapping device) + * + * Copyright (C) 2013 Knut Omang, Oracle <knut.omang@oracle.com> + * Copyright (C) 2014 Le Tan, <tamlokveer@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + * + * Lots of defines copied from kernel/include/linux/intel-iommu.h: + * Copyright (C) 2006-2008 Intel Corporation + * Author: Ashok Raj <ashok.raj@intel.com> + * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> + * + */ + +#ifndef HW_I386_INTEL_IOMMU_INTERNAL_H +#define HW_I386_INTEL_IOMMU_INTERNAL_H +#include "hw/i386/intel_iommu.h" + +/* + * Intel IOMMU register specification + */ +#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ +#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ +#define DMAR_CAP_REG_HI 0xc /* High 32-bit of DMAR_CAP_REG */ +#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ +#define DMAR_ECAP_REG_HI 0X14 +#define DMAR_GCMD_REG 0x18 /* Global command */ +#define DMAR_GSTS_REG 0x1c /* Global status */ +#define DMAR_RTADDR_REG 0x20 /* Root entry table */ +#define DMAR_RTADDR_REG_HI 0X24 +#define DMAR_CCMD_REG 0x28 /* Context command */ +#define DMAR_CCMD_REG_HI 0x2c +#define DMAR_FSTS_REG 0x34 /* Fault status */ +#define DMAR_FECTL_REG 0x38 /* Fault control */ +#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data */ +#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr */ +#define DMAR_FEUADDR_REG 0x44 /* Upper address */ +#define DMAR_AFLOG_REG 0x58 /* Advanced fault control */ +#define DMAR_AFLOG_REG_HI 0X5c +#define DMAR_PMEN_REG 0x64 /* Enable protected memory region */ +#define DMAR_PLMBASE_REG 0x68 /* PMRR low addr */ +#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ +#define DMAR_PHMBASE_REG 0x70 /* PMRR high base addr */ +#define DMAR_PHMBASE_REG_HI 0X74 +#define DMAR_PHMLIMIT_REG 0x78 /* PMRR high limit */ +#define DMAR_PHMLIMIT_REG_HI 0x7c +#define DMAR_IQH_REG 0x80 /* Invalidation queue head */ +#define DMAR_IQH_REG_HI 0X84 +#define DMAR_IQT_REG 0x88 /* Invalidation queue tail */ +#define DMAR_IQT_REG_HI 0X8c +#define DMAR_IQA_REG 0x90 /* Invalidation queue addr */ +#define DMAR_IQA_REG_HI 0x94 +#define DMAR_ICS_REG 0x9c /* Invalidation complete status */ +#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr */ +#define DMAR_IRTA_REG_HI 0xbc +#define DMAR_IECTL_REG 0xa0 /* Invalidation event control */ +#define DMAR_IEDATA_REG 0xa4 /* Invalidation event data */ +#define DMAR_IEADDR_REG 0xa8 /* Invalidation event address */ +#define DMAR_IEUADDR_REG 0xac /* Invalidation event address */ +#define DMAR_PQH_REG 0xc0 /* Page request queue head */ +#define DMAR_PQH_REG_HI 0xc4 +#define DMAR_PQT_REG 0xc8 /* Page request queue tail*/ +#define DMAR_PQT_REG_HI 0xcc +#define DMAR_PQA_REG 0xd0 /* Page request queue address */ +#define DMAR_PQA_REG_HI 0xd4 +#define DMAR_PRS_REG 0xdc /* Page request status */ +#define DMAR_PECTL_REG 0xe0 /* Page request event control */ +#define DMAR_PEDATA_REG 0xe4 /* Page request event data */ +#define DMAR_PEADDR_REG 0xe8 /* Page request event address */ +#define DMAR_PEUADDR_REG 0xec /* Page event upper address */ +#define DMAR_MTRRCAP_REG 0x100 /* MTRR capability */ +#define DMAR_MTRRCAP_REG_HI 0x104 +#define DMAR_MTRRDEF_REG 0x108 /* MTRR default type */ +#define DMAR_MTRRDEF_REG_HI 0x10c + +/* IOTLB registers */ +#define DMAR_IOTLB_REG_OFFSET 0xf0 /* Offset to the IOTLB registers */ +#define DMAR_IVA_REG DMAR_IOTLB_REG_OFFSET /* Invalidate address */ +#define DMAR_IVA_REG_HI (DMAR_IVA_REG + 4) +/* IOTLB invalidate register */ +#define DMAR_IOTLB_REG (DMAR_IOTLB_REG_OFFSET + 0x8) +#define DMAR_IOTLB_REG_HI (DMAR_IOTLB_REG + 4) + +/* FRCD */ +#define DMAR_FRCD_REG_OFFSET 0x220 /* Offset to the fault recording regs */ +/* NOTICE: If you change the DMAR_FRCD_REG_NR, please remember to change the + * DMAR_REG_SIZE in include/hw/i386/intel_iommu.h. + * #define DMAR_REG_SIZE (DMAR_FRCD_REG_OFFSET + 16 * DMAR_FRCD_REG_NR) + */ +#define DMAR_FRCD_REG_NR 1ULL /* Num of fault recording regs */ + +#define DMAR_FRCD_REG_0_0 0x220 /* The 0th fault recording regs */ +#define DMAR_FRCD_REG_0_1 0x224 +#define DMAR_FRCD_REG_0_2 0x228 +#define DMAR_FRCD_REG_0_3 0x22c + +/* Interrupt Address Range */ +#define VTD_INTERRUPT_ADDR_FIRST 0xfee00000ULL +#define VTD_INTERRUPT_ADDR_LAST 0xfeefffffULL + +/* The shift of source_id in the key of IOTLB hash table */ +#define VTD_IOTLB_SID_SHIFT 36 +#define VTD_IOTLB_MAX_SIZE 1024 /* Max size of the hash table */ + +/* IOTLB_REG */ +#define VTD_TLB_GLOBAL_FLUSH (1ULL << 60) /* Global invalidation */ +#define VTD_TLB_DSI_FLUSH (2ULL << 60) /* Domain-selective */ +#define VTD_TLB_PSI_FLUSH (3ULL << 60) /* Page-selective */ +#define VTD_TLB_FLUSH_GRANU_MASK (3ULL << 60) +#define VTD_TLB_GLOBAL_FLUSH_A (1ULL << 57) +#define VTD_TLB_DSI_FLUSH_A (2ULL << 57) +#define VTD_TLB_PSI_FLUSH_A (3ULL << 57) +#define VTD_TLB_FLUSH_GRANU_MASK_A (3ULL << 57) +#define VTD_TLB_IVT (1ULL << 63) +#define VTD_TLB_DID(val) (((val) >> 32) & VTD_DOMAIN_ID_MASK) + +/* IVA_REG */ +#define VTD_IVA_ADDR(val) ((val) & ~0xfffULL & ((1ULL << VTD_MGAW) - 1)) +#define VTD_IVA_AM(val) ((val) & 0x3fULL) + +/* GCMD_REG */ +#define VTD_GCMD_TE (1UL << 31) +#define VTD_GCMD_SRTP (1UL << 30) +#define VTD_GCMD_SFL (1UL << 29) +#define VTD_GCMD_EAFL (1UL << 28) +#define VTD_GCMD_WBF (1UL << 27) +#define VTD_GCMD_QIE (1UL << 26) +#define VTD_GCMD_IRE (1UL << 25) +#define VTD_GCMD_SIRTP (1UL << 24) +#define VTD_GCMD_CFI (1UL << 23) + +/* GSTS_REG */ +#define VTD_GSTS_TES (1UL << 31) +#define VTD_GSTS_RTPS (1UL << 30) +#define VTD_GSTS_FLS (1UL << 29) +#define VTD_GSTS_AFLS (1UL << 28) +#define VTD_GSTS_WBFS (1UL << 27) +#define VTD_GSTS_QIES (1UL << 26) +#define VTD_GSTS_IRES (1UL << 25) +#define VTD_GSTS_IRTPS (1UL << 24) +#define VTD_GSTS_CFIS (1UL << 23) + +/* CCMD_REG */ +#define VTD_CCMD_ICC (1ULL << 63) +#define VTD_CCMD_GLOBAL_INVL (1ULL << 61) +#define VTD_CCMD_DOMAIN_INVL (2ULL << 61) +#define VTD_CCMD_DEVICE_INVL (3ULL << 61) +#define VTD_CCMD_CIRG_MASK (3ULL << 61) +#define VTD_CCMD_GLOBAL_INVL_A (1ULL << 59) +#define VTD_CCMD_DOMAIN_INVL_A (2ULL << 59) +#define VTD_CCMD_DEVICE_INVL_A (3ULL << 59) +#define VTD_CCMD_CAIG_MASK (3ULL << 59) +#define VTD_CCMD_DID(val) ((val) & VTD_DOMAIN_ID_MASK) +#define VTD_CCMD_SID(val) (((val) >> 16) & 0xffffULL) +#define VTD_CCMD_FM(val) (((val) >> 32) & 3ULL) + +/* RTADDR_REG */ +#define VTD_RTADDR_RTT (1ULL << 11) +#define VTD_RTADDR_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL) + +/* ECAP_REG */ +/* (offset >> 4) << 8 */ +#define VTD_ECAP_IRO (DMAR_IOTLB_REG_OFFSET << 4) +#define VTD_ECAP_QI (1ULL << 1) + +/* CAP_REG */ +/* (offset >> 4) << 24 */ +#define VTD_CAP_FRO (DMAR_FRCD_REG_OFFSET << 20) +#define VTD_CAP_NFR ((DMAR_FRCD_REG_NR - 1) << 40) +#define VTD_DOMAIN_ID_SHIFT 16 /* 16-bit domain id for 64K domains */ +#define VTD_DOMAIN_ID_MASK ((1UL << VTD_DOMAIN_ID_SHIFT) - 1) +#define VTD_CAP_ND (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL) +#define VTD_MGAW 39 /* Maximum Guest Address Width */ +#define VTD_CAP_MGAW (((VTD_MGAW - 1) & 0x3fULL) << 16) +#define VTD_MAMV 9ULL +#define VTD_CAP_MAMV (VTD_MAMV << 48) +#define VTD_CAP_PSI (1ULL << 39) + +/* Supported Adjusted Guest Address Widths */ +#define VTD_CAP_SAGAW_SHIFT 8 +#define VTD_CAP_SAGAW_MASK (0x1fULL << VTD_CAP_SAGAW_SHIFT) + /* 39-bit AGAW, 3-level page-table */ +#define VTD_CAP_SAGAW_39bit (0x2ULL << VTD_CAP_SAGAW_SHIFT) + /* 48-bit AGAW, 4-level page-table */ +#define VTD_CAP_SAGAW_48bit (0x4ULL << VTD_CAP_SAGAW_SHIFT) +#define VTD_CAP_SAGAW VTD_CAP_SAGAW_39bit + +/* IQT_REG */ +#define VTD_IQT_QT(val) (((val) >> 4) & 0x7fffULL) + +/* IQA_REG */ +#define VTD_IQA_IQA_MASK (VTD_HAW_MASK ^ 0xfffULL) +#define VTD_IQA_QS 0x7ULL + +/* IQH_REG */ +#define VTD_IQH_QH_SHIFT 4 +#define VTD_IQH_QH_MASK 0x7fff0ULL + +/* ICS_REG */ +#define VTD_ICS_IWC 1UL + +/* IECTL_REG */ +#define VTD_IECTL_IM (1UL << 31) +#define VTD_IECTL_IP (1UL << 30) + +/* FSTS_REG */ +#define VTD_FSTS_FRI_MASK 0xff00UL +#define VTD_FSTS_FRI(val) ((((uint32_t)(val)) << 8) & VTD_FSTS_FRI_MASK) +#define VTD_FSTS_IQE (1UL << 4) +#define VTD_FSTS_PPF (1UL << 1) +#define VTD_FSTS_PFO 1UL + +/* FECTL_REG */ +#define VTD_FECTL_IM (1UL << 31) +#define VTD_FECTL_IP (1UL << 30) + +/* Fault Recording Register */ +/* For the high 64-bit of 128-bit */ +#define VTD_FRCD_F (1ULL << 63) +#define VTD_FRCD_T (1ULL << 62) +#define VTD_FRCD_FR(val) (((val) & 0xffULL) << 32) +#define VTD_FRCD_SID_MASK 0xffffULL +#define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK) +/* For the low 64-bit of 128-bit */ +#define VTD_FRCD_FI(val) ((val) & (((1ULL << VTD_MGAW) - 1) ^ 0xfffULL)) + +/* DMA Remapping Fault Conditions */ +typedef enum VTDFaultReason { + VTD_FR_RESERVED = 0, /* Reserved for Advanced Fault logging */ + VTD_FR_ROOT_ENTRY_P = 1, /* The Present(P) field of root-entry is 0 */ + VTD_FR_CONTEXT_ENTRY_P, /* The Present(P) field of context-entry is 0 */ + VTD_FR_CONTEXT_ENTRY_INV, /* Invalid programming of a context-entry */ + VTD_FR_ADDR_BEYOND_MGAW, /* Input-address above (2^x-1) */ + VTD_FR_WRITE, /* No write permission */ + VTD_FR_READ, /* No read permission */ + /* Fail to access a second-level paging entry (not SL_PML4E) */ + VTD_FR_PAGING_ENTRY_INV, + VTD_FR_ROOT_TABLE_INV, /* Fail to access a root-entry */ + VTD_FR_CONTEXT_TABLE_INV, /* Fail to access a context-entry */ + /* Non-zero reserved field in a present root-entry */ + VTD_FR_ROOT_ENTRY_RSVD, + /* Non-zero reserved field in a present context-entry */ + VTD_FR_CONTEXT_ENTRY_RSVD, + /* Non-zero reserved field in a second-level paging entry with at lease one + * Read(R) and Write(W) or Execute(E) field is Set. + */ + VTD_FR_PAGING_ENTRY_RSVD, + /* Translation request or translated request explicitly blocked dut to the + * programming of the Translation Type (T) field in the present + * context-entry. + */ + VTD_FR_CONTEXT_ENTRY_TT, + /* This is not a normal fault reason. We use this to indicate some faults + * that are not referenced by the VT-d specification. + * Fault event with such reason should not be recorded. + */ + VTD_FR_RESERVED_ERR, + VTD_FR_MAX, /* Guard */ +} VTDFaultReason; + +#define VTD_CONTEXT_CACHE_GEN_MAX 0xffffffffUL + +/* Queued Invalidation Descriptor */ +struct VTDInvDesc { + uint64_t lo; + uint64_t hi; +}; +typedef struct VTDInvDesc VTDInvDesc; + +/* Masks for struct VTDInvDesc */ +#define VTD_INV_DESC_TYPE 0xf +#define VTD_INV_DESC_CC 0x1 /* Context-cache Invalidate Desc */ +#define VTD_INV_DESC_IOTLB 0x2 +#define VTD_INV_DESC_WAIT 0x5 /* Invalidation Wait Descriptor */ +#define VTD_INV_DESC_NONE 0 /* Not an Invalidate Descriptor */ + +/* Masks for Invalidation Wait Descriptor*/ +#define VTD_INV_DESC_WAIT_SW (1ULL << 5) +#define VTD_INV_DESC_WAIT_IF (1ULL << 4) +#define VTD_INV_DESC_WAIT_FN (1ULL << 6) +#define VTD_INV_DESC_WAIT_DATA_SHIFT 32 +#define VTD_INV_DESC_WAIT_RSVD_LO 0Xffffff80ULL +#define VTD_INV_DESC_WAIT_RSVD_HI 3ULL + +/* Masks for Context-cache Invalidation Descriptor */ +#define VTD_INV_DESC_CC_G (3ULL << 4) +#define VTD_INV_DESC_CC_GLOBAL (1ULL << 4) +#define VTD_INV_DESC_CC_DOMAIN (2ULL << 4) +#define VTD_INV_DESC_CC_DEVICE (3ULL << 4) +#define VTD_INV_DESC_CC_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK) +#define VTD_INV_DESC_CC_SID(val) (((val) >> 32) & 0xffffUL) +#define VTD_INV_DESC_CC_FM(val) (((val) >> 48) & 3UL) +#define VTD_INV_DESC_CC_RSVD 0xfffc00000000ffc0ULL + +/* Masks for IOTLB Invalidate Descriptor */ +#define VTD_INV_DESC_IOTLB_G (3ULL << 4) +#define VTD_INV_DESC_IOTLB_GLOBAL (1ULL << 4) +#define VTD_INV_DESC_IOTLB_DOMAIN (2ULL << 4) +#define VTD_INV_DESC_IOTLB_PAGE (3ULL << 4) +#define VTD_INV_DESC_IOTLB_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK) +#define VTD_INV_DESC_IOTLB_ADDR(val) ((val) & ~0xfffULL & \ + ((1ULL << VTD_MGAW) - 1)) +#define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL) +#define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000ff00ULL +#define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL + +/* Information about page-selective IOTLB invalidate */ +struct VTDIOTLBPageInvInfo { + uint16_t domain_id; + uint64_t gfn; + uint8_t mask; +}; +typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo; + +/* Pagesize of VTD paging structures, including root and context tables */ +#define VTD_PAGE_SHIFT 12 +#define VTD_PAGE_SIZE (1ULL << VTD_PAGE_SHIFT) + +#define VTD_PAGE_SHIFT_4K 12 +#define VTD_PAGE_MASK_4K (~((1ULL << VTD_PAGE_SHIFT_4K) - 1)) +#define VTD_PAGE_SHIFT_2M 21 +#define VTD_PAGE_MASK_2M (~((1ULL << VTD_PAGE_SHIFT_2M) - 1)) +#define VTD_PAGE_SHIFT_1G 30 +#define VTD_PAGE_MASK_1G (~((1ULL << VTD_PAGE_SHIFT_1G) - 1)) + +struct VTDRootEntry { + uint64_t val; + uint64_t rsvd; +}; +typedef struct VTDRootEntry VTDRootEntry; + +/* Masks for struct VTDRootEntry */ +#define VTD_ROOT_ENTRY_P 1ULL +#define VTD_ROOT_ENTRY_CTP (~0xfffULL) + +#define VTD_ROOT_ENTRY_NR (VTD_PAGE_SIZE / sizeof(VTDRootEntry)) +#define VTD_ROOT_ENTRY_RSVD (0xffeULL | ~VTD_HAW_MASK) + +/* Masks for struct VTDContextEntry */ +/* lo */ +#define VTD_CONTEXT_ENTRY_P (1ULL << 0) +#define VTD_CONTEXT_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */ +#define VTD_CONTEXT_ENTRY_TT (3ULL << 2) /* Translation Type */ +#define VTD_CONTEXT_TT_MULTI_LEVEL 0 +#define VTD_CONTEXT_TT_DEV_IOTLB 1 +#define VTD_CONTEXT_TT_PASS_THROUGH 2 +/* Second Level Page Translation Pointer*/ +#define VTD_CONTEXT_ENTRY_SLPTPTR (~0xfffULL) +#define VTD_CONTEXT_ENTRY_RSVD_LO (0xff0ULL | ~VTD_HAW_MASK) +/* hi */ +#define VTD_CONTEXT_ENTRY_AW 7ULL /* Adjusted guest-address-width */ +#define VTD_CONTEXT_ENTRY_DID(val) (((val) >> 8) & VTD_DOMAIN_ID_MASK) +#define VTD_CONTEXT_ENTRY_RSVD_HI 0xffffffffff000080ULL + +#define VTD_CONTEXT_ENTRY_NR (VTD_PAGE_SIZE / sizeof(VTDContextEntry)) + +/* Paging Structure common */ +#define VTD_SL_PT_PAGE_SIZE_MASK (1ULL << 7) +/* Bits to decide the offset for each level */ +#define VTD_SL_LEVEL_BITS 9 + +/* Second Level Paging Structure */ +#define VTD_SL_PML4_LEVEL 4 +#define VTD_SL_PDP_LEVEL 3 +#define VTD_SL_PD_LEVEL 2 +#define VTD_SL_PT_LEVEL 1 +#define VTD_SL_PT_ENTRY_NR 512 + +/* Masks for Second Level Paging Entry */ +#define VTD_SL_RW_MASK 3ULL +#define VTD_SL_R 1ULL +#define VTD_SL_W (1ULL << 1) +#define VTD_SL_PT_BASE_ADDR_MASK (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK) +#define VTD_SL_IGN_COM 0xbff0000000000000ULL + +#endif diff --git a/qemu/hw/i386/kvm/Makefile.objs b/qemu/hw/i386/kvm/Makefile.objs new file mode 100644 index 000000000..d8bce209b --- /dev/null +++ b/qemu/hw/i386/kvm/Makefile.objs @@ -0,0 +1 @@ +obj-y += clock.o apic.o i8259.o ioapic.o i8254.o pci-assign.o diff --git a/qemu/hw/i386/kvm/apic.c b/qemu/hw/i386/kvm/apic.c new file mode 100644 index 000000000..5b470562a --- /dev/null +++ b/qemu/hw/i386/kvm/apic.c @@ -0,0 +1,218 @@ +/* + * KVM in-kernel APIC support + * + * Copyright (c) 2011 Siemens AG + * + * Authors: + * Jan Kiszka <jan.kiszka@siemens.com> + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + */ +#include "hw/i386/apic_internal.h" +#include "hw/pci/msi.h" +#include "sysemu/kvm.h" + +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, + int reg_id, uint32_t val) +{ + *((uint32_t *)(kapic->regs + (reg_id << 4))) = val; +} + +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic, + int reg_id) +{ + return *((uint32_t *)(kapic->regs + (reg_id << 4))); +} + +void kvm_put_apic_state(DeviceState *dev, struct kvm_lapic_state *kapic) +{ + APICCommonState *s = APIC_COMMON(dev); + int i; + + memset(kapic, 0, sizeof(*kapic)); + kvm_apic_set_reg(kapic, 0x2, s->id << 24); + kvm_apic_set_reg(kapic, 0x8, s->tpr); + kvm_apic_set_reg(kapic, 0xd, s->log_dest << 24); + kvm_apic_set_reg(kapic, 0xe, s->dest_mode << 28 | 0x0fffffff); + kvm_apic_set_reg(kapic, 0xf, s->spurious_vec); + for (i = 0; i < 8; i++) { + kvm_apic_set_reg(kapic, 0x10 + i, s->isr[i]); + kvm_apic_set_reg(kapic, 0x18 + i, s->tmr[i]); + kvm_apic_set_reg(kapic, 0x20 + i, s->irr[i]); + } + kvm_apic_set_reg(kapic, 0x28, s->esr); + kvm_apic_set_reg(kapic, 0x30, s->icr[0]); + kvm_apic_set_reg(kapic, 0x31, s->icr[1]); + for (i = 0; i < APIC_LVT_NB; i++) { + kvm_apic_set_reg(kapic, 0x32 + i, s->lvt[i]); + } + kvm_apic_set_reg(kapic, 0x38, s->initial_count); + kvm_apic_set_reg(kapic, 0x3e, s->divide_conf); +} + +void kvm_get_apic_state(DeviceState *dev, struct kvm_lapic_state *kapic) +{ + APICCommonState *s = APIC_COMMON(dev); + int i, v; + + s->id = kvm_apic_get_reg(kapic, 0x2) >> 24; + s->tpr = kvm_apic_get_reg(kapic, 0x8); + s->arb_id = kvm_apic_get_reg(kapic, 0x9); + s->log_dest = kvm_apic_get_reg(kapic, 0xd) >> 24; + s->dest_mode = kvm_apic_get_reg(kapic, 0xe) >> 28; + s->spurious_vec = kvm_apic_get_reg(kapic, 0xf); + for (i = 0; i < 8; i++) { + s->isr[i] = kvm_apic_get_reg(kapic, 0x10 + i); + s->tmr[i] = kvm_apic_get_reg(kapic, 0x18 + i); + s->irr[i] = kvm_apic_get_reg(kapic, 0x20 + i); + } + s->esr = kvm_apic_get_reg(kapic, 0x28); + s->icr[0] = kvm_apic_get_reg(kapic, 0x30); + s->icr[1] = kvm_apic_get_reg(kapic, 0x31); + for (i = 0; i < APIC_LVT_NB; i++) { + s->lvt[i] = kvm_apic_get_reg(kapic, 0x32 + i); + } + s->initial_count = kvm_apic_get_reg(kapic, 0x38); + s->divide_conf = kvm_apic_get_reg(kapic, 0x3e); + + v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4); + s->count_shift = (v + 1) & 7; + + s->initial_count_load_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + apic_next_timer(s, s->initial_count_load_time); +} + +static void kvm_apic_set_base(APICCommonState *s, uint64_t val) +{ + s->apicbase = val; +} + +static void kvm_apic_set_tpr(APICCommonState *s, uint8_t val) +{ + s->tpr = (val & 0x0f) << 4; +} + +static uint8_t kvm_apic_get_tpr(APICCommonState *s) +{ + return s->tpr >> 4; +} + +static void kvm_apic_enable_tpr_reporting(APICCommonState *s, bool enable) +{ + struct kvm_tpr_access_ctl ctl = { + .enabled = enable + }; + + kvm_vcpu_ioctl(CPU(s->cpu), KVM_TPR_ACCESS_REPORTING, &ctl); +} + +static void kvm_apic_vapic_base_update(APICCommonState *s) +{ + struct kvm_vapic_addr vapid_addr = { + .vapic_addr = s->vapic_paddr, + }; + int ret; + + ret = kvm_vcpu_ioctl(CPU(s->cpu), KVM_SET_VAPIC_ADDR, &vapid_addr); + if (ret < 0) { + fprintf(stderr, "KVM: setting VAPIC address failed (%s)\n", + strerror(-ret)); + abort(); + } +} + +static void do_inject_external_nmi(void *data) +{ + APICCommonState *s = data; + CPUState *cpu = CPU(s->cpu); + uint32_t lvt; + int ret; + + cpu_synchronize_state(cpu); + + lvt = s->lvt[APIC_LVT_LINT1]; + if (!(lvt & APIC_LVT_MASKED) && ((lvt >> 8) & 7) == APIC_DM_NMI) { + ret = kvm_vcpu_ioctl(cpu, KVM_NMI); + if (ret < 0) { + fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n", + strerror(-ret)); + } + } +} + +static void kvm_apic_external_nmi(APICCommonState *s) +{ + run_on_cpu(CPU(s->cpu), do_inject_external_nmi, s); +} + +static uint64_t kvm_apic_mem_read(void *opaque, hwaddr addr, + unsigned size) +{ + return ~(uint64_t)0; +} + +static void kvm_apic_mem_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + MSIMessage msg = { .address = addr, .data = data }; + int ret; + + ret = kvm_irqchip_send_msi(kvm_state, msg); + if (ret < 0) { + fprintf(stderr, "KVM: injection failed, MSI lost (%s)\n", + strerror(-ret)); + } +} + +static const MemoryRegionOps kvm_apic_io_ops = { + .read = kvm_apic_mem_read, + .write = kvm_apic_mem_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void kvm_apic_reset(APICCommonState *s) +{ + /* Not used by KVM, which uses the CPU mp_state instead. */ + s->wait_for_sipi = 0; +} + +static void kvm_apic_realize(DeviceState *dev, Error **errp) +{ + APICCommonState *s = APIC_COMMON(dev); + + memory_region_init_io(&s->io_memory, NULL, &kvm_apic_io_ops, s, "kvm-apic-msi", + APIC_SPACE_SIZE); + + if (kvm_has_gsi_routing()) { + msi_supported = true; + } +} + +static void kvm_apic_class_init(ObjectClass *klass, void *data) +{ + APICCommonClass *k = APIC_COMMON_CLASS(klass); + + k->realize = kvm_apic_realize; + k->reset = kvm_apic_reset; + k->set_base = kvm_apic_set_base; + k->set_tpr = kvm_apic_set_tpr; + k->get_tpr = kvm_apic_get_tpr; + k->enable_tpr_reporting = kvm_apic_enable_tpr_reporting; + k->vapic_base_update = kvm_apic_vapic_base_update; + k->external_nmi = kvm_apic_external_nmi; +} + +static const TypeInfo kvm_apic_info = { + .name = "kvm-apic", + .parent = TYPE_APIC_COMMON, + .instance_size = sizeof(APICCommonState), + .class_init = kvm_apic_class_init, +}; + +static void kvm_apic_register_types(void) +{ + type_register_static(&kvm_apic_info); +} + +type_init(kvm_apic_register_types) diff --git a/qemu/hw/i386/kvm/clock.c b/qemu/hw/i386/kvm/clock.c new file mode 100644 index 000000000..efdf16584 --- /dev/null +++ b/qemu/hw/i386/kvm/clock.c @@ -0,0 +1,209 @@ +/* + * QEMU KVM support, paravirtual clock device + * + * Copyright (C) 2011 Siemens AG + * + * Authors: + * Jan Kiszka <jan.kiszka@siemens.com> + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu-common.h" +#include "qemu/host-utils.h" +#include "sysemu/sysemu.h" +#include "sysemu/kvm.h" +#include "sysemu/cpus.h" +#include "hw/sysbus.h" +#include "hw/kvm/clock.h" + +#include <linux/kvm.h> +#include <linux/kvm_para.h> + +#define TYPE_KVM_CLOCK "kvmclock" +#define KVM_CLOCK(obj) OBJECT_CHECK(KVMClockState, (obj), TYPE_KVM_CLOCK) + +typedef struct KVMClockState { + /*< private >*/ + SysBusDevice busdev; + /*< public >*/ + + uint64_t clock; + bool clock_valid; +} KVMClockState; + +struct pvclock_vcpu_time_info { + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; + uint64_t system_time; + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + uint8_t flags; + uint8_t pad[2]; +} __attribute__((__packed__)); /* 32 bytes */ + +static uint64_t kvmclock_current_nsec(KVMClockState *s) +{ + CPUState *cpu = first_cpu; + CPUX86State *env = cpu->env_ptr; + hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL; + uint64_t migration_tsc = env->tsc; + struct pvclock_vcpu_time_info time; + uint64_t delta; + uint64_t nsec_lo; + uint64_t nsec_hi; + uint64_t nsec; + + if (!(env->system_time_msr & 1ULL)) { + /* KVM clock not active */ + return 0; + } + + cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time)); + + assert(time.tsc_timestamp <= migration_tsc); + delta = migration_tsc - time.tsc_timestamp; + if (time.tsc_shift < 0) { + delta >>= -time.tsc_shift; + } else { + delta <<= time.tsc_shift; + } + + mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul); + nsec = (nsec_lo >> 32) | (nsec_hi << 32); + return nsec + time.system_time; +} + +static void kvmclock_vm_state_change(void *opaque, int running, + RunState state) +{ + KVMClockState *s = opaque; + CPUState *cpu; + int cap_clock_ctrl = kvm_check_extension(kvm_state, KVM_CAP_KVMCLOCK_CTRL); + int ret; + + if (running) { + struct kvm_clock_data data = {}; + uint64_t time_at_migration = kvmclock_current_nsec(s); + + s->clock_valid = false; + + /* We can't rely on the migrated clock value, just discard it */ + if (time_at_migration) { + s->clock = time_at_migration; + } + + data.clock = s->clock; + ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data); + if (ret < 0) { + fprintf(stderr, "KVM_SET_CLOCK failed: %s\n", strerror(ret)); + abort(); + } + + if (!cap_clock_ctrl) { + return; + } + CPU_FOREACH(cpu) { + ret = kvm_vcpu_ioctl(cpu, KVM_KVMCLOCK_CTRL, 0); + if (ret) { + if (ret != -EINVAL) { + fprintf(stderr, "%s: %s\n", __func__, strerror(-ret)); + } + return; + } + } + } else { + struct kvm_clock_data data; + int ret; + + if (s->clock_valid) { + return; + } + + cpu_synchronize_all_states(); + /* In theory, the cpu_synchronize_all_states() call above wouldn't + * affect the rest of the code, as the VCPU state inside CPUState + * is supposed to always match the VCPU state on the kernel side. + * + * In practice, calling cpu_synchronize_state() too soon will load the + * kernel-side APIC state into X86CPU.apic_state too early, APIC state + * won't be reloaded later because CPUState.vcpu_dirty==true, and + * outdated APIC state may be migrated to another host. + * + * The real fix would be to make sure outdated APIC state is read + * from the kernel again when necessary. While this is not fixed, we + * need the cpu_clean_all_dirty() call below. + */ + cpu_clean_all_dirty(); + + ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); + if (ret < 0) { + fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); + abort(); + } + s->clock = data.clock; + + /* + * If the VM is stopped, declare the clock state valid to + * avoid re-reading it on next vmsave (which would return + * a different value). Will be reset when the VM is continued. + */ + s->clock_valid = true; + } +} + +static void kvmclock_realize(DeviceState *dev, Error **errp) +{ + KVMClockState *s = KVM_CLOCK(dev); + + qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); +} + +static const VMStateDescription kvmclock_vmsd = { + .name = "kvmclock", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT64(clock, KVMClockState), + VMSTATE_END_OF_LIST() + } +}; + +static void kvmclock_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = kvmclock_realize; + dc->vmsd = &kvmclock_vmsd; +} + +static const TypeInfo kvmclock_info = { + .name = TYPE_KVM_CLOCK, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(KVMClockState), + .class_init = kvmclock_class_init, +}; + +/* Note: Must be called after VCPU initialization. */ +void kvmclock_create(void) +{ + X86CPU *cpu = X86_CPU(first_cpu); + + if (kvm_enabled() && + cpu->env.features[FEAT_KVM] & ((1ULL << KVM_FEATURE_CLOCKSOURCE) | + (1ULL << KVM_FEATURE_CLOCKSOURCE2))) { + sysbus_create_simple(TYPE_KVM_CLOCK, -1, NULL); + } +} + +static void kvmclock_register_types(void) +{ + type_register_static(&kvmclock_info); +} + +type_init(kvmclock_register_types) diff --git a/qemu/hw/i386/kvm/i8254.c b/qemu/hw/i386/kvm/i8254.c new file mode 100644 index 000000000..90eea10df --- /dev/null +++ b/qemu/hw/i386/kvm/i8254.c @@ -0,0 +1,335 @@ +/* + * KVM in-kernel PIT (i8254) support + * + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2012 Jan Kiszka, Siemens AG + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/timer.h" +#include "sysemu/sysemu.h" +#include "hw/timer/i8254.h" +#include "hw/timer/i8254_internal.h" +#include "sysemu/kvm.h" + +#define KVM_PIT_REINJECT_BIT 0 + +#define CALIBRATION_ROUNDS 3 + +#define KVM_PIT(obj) OBJECT_CHECK(KVMPITState, (obj), TYPE_KVM_I8254) +#define KVM_PIT_CLASS(class) \ + OBJECT_CLASS_CHECK(KVMPITClass, (class), TYPE_KVM_I8254) +#define KVM_PIT_GET_CLASS(obj) \ + OBJECT_GET_CLASS(KVMPITClass, (obj), TYPE_KVM_I8254) + +typedef struct KVMPITState { + PITCommonState parent_obj; + + LostTickPolicy lost_tick_policy; + bool vm_stopped; + int64_t kernel_clock_offset; +} KVMPITState; + +typedef struct KVMPITClass { + PITCommonClass parent_class; + + DeviceRealize parent_realize; +} KVMPITClass; + +static int64_t abs64(int64_t v) +{ + return v < 0 ? -v : v; +} + +static void kvm_pit_update_clock_offset(KVMPITState *s) +{ + int64_t offset, clock_offset; + struct timespec ts; + int i; + + /* + * Measure the delta between CLOCK_MONOTONIC, the base used for + * kvm_pit_channel_state::count_load_time, and QEMU_CLOCK_VIRTUAL. Take the + * minimum of several samples to filter out scheduling noise. + */ + clock_offset = INT64_MAX; + for (i = 0; i < CALIBRATION_ROUNDS; i++) { + offset = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + clock_gettime(CLOCK_MONOTONIC, &ts); + offset -= ts.tv_nsec; + offset -= (int64_t)ts.tv_sec * 1000000000; + if (abs64(offset) < abs64(clock_offset)) { + clock_offset = offset; + } + } + s->kernel_clock_offset = clock_offset; +} + +static void kvm_pit_get(PITCommonState *pit) +{ + KVMPITState *s = KVM_PIT(pit); + struct kvm_pit_state2 kpit; + struct kvm_pit_channel_state *kchan; + struct PITChannelState *sc; + int i, ret; + + /* No need to re-read the state if VM is stopped. */ + if (s->vm_stopped) { + return; + } + + if (kvm_has_pit_state2()) { + ret = kvm_vm_ioctl(kvm_state, KVM_GET_PIT2, &kpit); + if (ret < 0) { + fprintf(stderr, "KVM_GET_PIT2 failed: %s\n", strerror(ret)); + abort(); + } + pit->channels[0].irq_disabled = kpit.flags & KVM_PIT_FLAGS_HPET_LEGACY; + } else { + /* + * kvm_pit_state2 is superset of kvm_pit_state struct, + * so we can use it for KVM_GET_PIT as well. + */ + ret = kvm_vm_ioctl(kvm_state, KVM_GET_PIT, &kpit); + if (ret < 0) { + fprintf(stderr, "KVM_GET_PIT failed: %s\n", strerror(ret)); + abort(); + } + } + for (i = 0; i < 3; i++) { + kchan = &kpit.channels[i]; + sc = &pit->channels[i]; + sc->count = kchan->count; + sc->latched_count = kchan->latched_count; + sc->count_latched = kchan->count_latched; + sc->status_latched = kchan->status_latched; + sc->status = kchan->status; + sc->read_state = kchan->read_state; + sc->write_state = kchan->write_state; + sc->write_latch = kchan->write_latch; + sc->rw_mode = kchan->rw_mode; + sc->mode = kchan->mode; + sc->bcd = kchan->bcd; + sc->gate = kchan->gate; + sc->count_load_time = kchan->count_load_time + s->kernel_clock_offset; + } + + sc = &pit->channels[0]; + sc->next_transition_time = + pit_get_next_transition_time(sc, sc->count_load_time); +} + +static void kvm_pit_put(PITCommonState *pit) +{ + KVMPITState *s = KVM_PIT(pit); + struct kvm_pit_state2 kpit = {}; + struct kvm_pit_channel_state *kchan; + struct PITChannelState *sc; + int i, ret; + + /* The offset keeps changing as long as the VM is stopped. */ + if (s->vm_stopped) { + kvm_pit_update_clock_offset(s); + } + + kpit.flags = pit->channels[0].irq_disabled ? KVM_PIT_FLAGS_HPET_LEGACY : 0; + for (i = 0; i < 3; i++) { + kchan = &kpit.channels[i]; + sc = &pit->channels[i]; + kchan->count = sc->count; + kchan->latched_count = sc->latched_count; + kchan->count_latched = sc->count_latched; + kchan->status_latched = sc->status_latched; + kchan->status = sc->status; + kchan->read_state = sc->read_state; + kchan->write_state = sc->write_state; + kchan->write_latch = sc->write_latch; + kchan->rw_mode = sc->rw_mode; + kchan->mode = sc->mode; + kchan->bcd = sc->bcd; + kchan->gate = sc->gate; + kchan->count_load_time = sc->count_load_time - s->kernel_clock_offset; + } + + ret = kvm_vm_ioctl(kvm_state, + kvm_has_pit_state2() ? KVM_SET_PIT2 : KVM_SET_PIT, + &kpit); + if (ret < 0) { + fprintf(stderr, "%s failed: %s\n", + kvm_has_pit_state2() ? "KVM_SET_PIT2" : "KVM_SET_PIT", + strerror(ret)); + abort(); + } +} + +static void kvm_pit_set_gate(PITCommonState *s, PITChannelState *sc, int val) +{ + kvm_pit_get(s); + + switch (sc->mode) { + default: + case 0: + case 4: + /* XXX: just disable/enable counting */ + break; + case 1: + case 2: + case 3: + case 5: + if (sc->gate < val) { + /* restart counting on rising edge */ + sc->count_load_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + } + break; + } + sc->gate = val; + + kvm_pit_put(s); +} + +static void kvm_pit_get_channel_info(PITCommonState *s, PITChannelState *sc, + PITChannelInfo *info) +{ + kvm_pit_get(s); + + pit_get_channel_info_common(s, sc, info); +} + +static void kvm_pit_reset(DeviceState *dev) +{ + PITCommonState *s = PIT_COMMON(dev); + + pit_reset_common(s); + + kvm_pit_put(s); +} + +static void kvm_pit_irq_control(void *opaque, int n, int enable) +{ + PITCommonState *pit = opaque; + PITChannelState *s = &pit->channels[0]; + + kvm_pit_get(pit); + + s->irq_disabled = !enable; + + kvm_pit_put(pit); +} + +static void kvm_pit_vm_state_change(void *opaque, int running, + RunState state) +{ + KVMPITState *s = opaque; + + if (running) { + kvm_pit_update_clock_offset(s); + kvm_pit_put(PIT_COMMON(s)); + s->vm_stopped = false; + } else { + kvm_pit_update_clock_offset(s); + kvm_pit_get(PIT_COMMON(s)); + s->vm_stopped = true; + } +} + +static void kvm_pit_realizefn(DeviceState *dev, Error **errp) +{ + PITCommonState *pit = PIT_COMMON(dev); + KVMPITClass *kpc = KVM_PIT_GET_CLASS(dev); + KVMPITState *s = KVM_PIT(pit); + struct kvm_pit_config config = { + .flags = 0, + }; + int ret; + + if (kvm_check_extension(kvm_state, KVM_CAP_PIT2)) { + ret = kvm_vm_ioctl(kvm_state, KVM_CREATE_PIT2, &config); + } else { + ret = kvm_vm_ioctl(kvm_state, KVM_CREATE_PIT); + } + if (ret < 0) { + error_setg(errp, "Create kernel PIC irqchip failed: %s", + strerror(ret)); + return; + } + switch (s->lost_tick_policy) { + case LOST_TICK_POLICY_DELAY: + break; /* enabled by default */ + case LOST_TICK_POLICY_DISCARD: + if (kvm_check_extension(kvm_state, KVM_CAP_REINJECT_CONTROL)) { + struct kvm_reinject_control control = { .pit_reinject = 0 }; + + ret = kvm_vm_ioctl(kvm_state, KVM_REINJECT_CONTROL, &control); + if (ret < 0) { + error_setg(errp, + "Can't disable in-kernel PIT reinjection: %s", + strerror(ret)); + return; + } + } + break; + default: + error_setg(errp, "Lost tick policy not supported."); + return; + } + + memory_region_init_reservation(&pit->ioports, NULL, "kvm-pit", 4); + + qdev_init_gpio_in(dev, kvm_pit_irq_control, 1); + + qemu_add_vm_change_state_handler(kvm_pit_vm_state_change, s); + + kpc->parent_realize(dev, errp); +} + +static Property kvm_pit_properties[] = { + DEFINE_PROP_UINT32("iobase", PITCommonState, iobase, -1), + DEFINE_PROP_LOSTTICKPOLICY("lost_tick_policy", KVMPITState, + lost_tick_policy, LOST_TICK_POLICY_DELAY), + DEFINE_PROP_END_OF_LIST(), +}; + +static void kvm_pit_class_init(ObjectClass *klass, void *data) +{ + KVMPITClass *kpc = KVM_PIT_CLASS(klass); + PITCommonClass *k = PIT_COMMON_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + kpc->parent_realize = dc->realize; + dc->realize = kvm_pit_realizefn; + k->set_channel_gate = kvm_pit_set_gate; + k->get_channel_info = kvm_pit_get_channel_info; + dc->reset = kvm_pit_reset; + dc->props = kvm_pit_properties; +} + +static const TypeInfo kvm_pit_info = { + .name = TYPE_KVM_I8254, + .parent = TYPE_PIT_COMMON, + .instance_size = sizeof(KVMPITState), + .class_init = kvm_pit_class_init, + .class_size = sizeof(KVMPITClass), +}; + +static void kvm_pit_register(void) +{ + type_register_static(&kvm_pit_info); +} + +type_init(kvm_pit_register) diff --git a/qemu/hw/i386/kvm/i8259.c b/qemu/hw/i386/kvm/i8259.c new file mode 100644 index 000000000..53e3ca8c6 --- /dev/null +++ b/qemu/hw/i386/kvm/i8259.c @@ -0,0 +1,162 @@ +/* + * KVM in-kernel PIC (i8259) support + * + * Copyright (c) 2011 Siemens AG + * + * Authors: + * Jan Kiszka <jan.kiszka@siemens.com> + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + */ +#include "hw/isa/i8259_internal.h" +#include "hw/i386/apic_internal.h" +#include "sysemu/kvm.h" + +#define TYPE_KVM_I8259 "kvm-i8259" +#define KVM_PIC_CLASS(class) \ + OBJECT_CLASS_CHECK(KVMPICClass, (class), TYPE_KVM_I8259) +#define KVM_PIC_GET_CLASS(obj) \ + OBJECT_GET_CLASS(KVMPICClass, (obj), TYPE_KVM_I8259) + +/** + * KVMPICClass: + * @parent_realize: The parent's realizefn. + */ +typedef struct KVMPICClass { + PICCommonClass parent_class; + + DeviceRealize parent_realize; +} KVMPICClass; + +static void kvm_pic_get(PICCommonState *s) +{ + struct kvm_irqchip chip; + struct kvm_pic_state *kpic; + int ret; + + chip.chip_id = s->master ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE; + ret = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, &chip); + if (ret < 0) { + fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret)); + abort(); + } + + kpic = &chip.chip.pic; + + s->last_irr = kpic->last_irr; + s->irr = kpic->irr; + s->imr = kpic->imr; + s->isr = kpic->isr; + s->priority_add = kpic->priority_add; + s->irq_base = kpic->irq_base; + s->read_reg_select = kpic->read_reg_select; + s->poll = kpic->poll; + s->special_mask = kpic->special_mask; + s->init_state = kpic->init_state; + s->auto_eoi = kpic->auto_eoi; + s->rotate_on_auto_eoi = kpic->rotate_on_auto_eoi; + s->special_fully_nested_mode = kpic->special_fully_nested_mode; + s->init4 = kpic->init4; + s->elcr = kpic->elcr; + s->elcr_mask = kpic->elcr_mask; +} + +static void kvm_pic_put(PICCommonState *s) +{ + struct kvm_irqchip chip; + struct kvm_pic_state *kpic; + int ret; + + chip.chip_id = s->master ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE; + + kpic = &chip.chip.pic; + + kpic->last_irr = s->last_irr; + kpic->irr = s->irr; + kpic->imr = s->imr; + kpic->isr = s->isr; + kpic->priority_add = s->priority_add; + kpic->irq_base = s->irq_base; + kpic->read_reg_select = s->read_reg_select; + kpic->poll = s->poll; + kpic->special_mask = s->special_mask; + kpic->init_state = s->init_state; + kpic->auto_eoi = s->auto_eoi; + kpic->rotate_on_auto_eoi = s->rotate_on_auto_eoi; + kpic->special_fully_nested_mode = s->special_fully_nested_mode; + kpic->init4 = s->init4; + kpic->elcr = s->elcr; + kpic->elcr_mask = s->elcr_mask; + + ret = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, &chip); + if (ret < 0) { + fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret)); + abort(); + } +} + +static void kvm_pic_reset(DeviceState *dev) +{ + PICCommonState *s = PIC_COMMON(dev); + + s->elcr = 0; + pic_reset_common(s); + + kvm_pic_put(s); +} + +static void kvm_pic_set_irq(void *opaque, int irq, int level) +{ + int delivered; + + delivered = kvm_set_irq(kvm_state, irq, level); + apic_report_irq_delivered(delivered); +} + +static void kvm_pic_realize(DeviceState *dev, Error **errp) +{ + PICCommonState *s = PIC_COMMON(dev); + KVMPICClass *kpc = KVM_PIC_GET_CLASS(dev); + + memory_region_init_reservation(&s->base_io, NULL, "kvm-pic", 2); + memory_region_init_reservation(&s->elcr_io, NULL, "kvm-elcr", 1); + + kpc->parent_realize(dev, errp); +} + +qemu_irq *kvm_i8259_init(ISABus *bus) +{ + i8259_init_chip(TYPE_KVM_I8259, bus, true); + i8259_init_chip(TYPE_KVM_I8259, bus, false); + + return qemu_allocate_irqs(kvm_pic_set_irq, NULL, ISA_NUM_IRQS); +} + +static void kvm_i8259_class_init(ObjectClass *klass, void *data) +{ + KVMPICClass *kpc = KVM_PIC_CLASS(klass); + PICCommonClass *k = PIC_COMMON_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = kvm_pic_reset; + kpc->parent_realize = dc->realize; + dc->realize = kvm_pic_realize; + k->pre_save = kvm_pic_get; + k->post_load = kvm_pic_put; +} + +static const TypeInfo kvm_i8259_info = { + .name = TYPE_KVM_I8259, + .parent = TYPE_PIC_COMMON, + .instance_size = sizeof(PICCommonState), + .class_init = kvm_i8259_class_init, + .class_size = sizeof(KVMPICClass), +}; + +static void kvm_pic_register_types(void) +{ + type_register_static(&kvm_i8259_info); +} + +type_init(kvm_pic_register_types) diff --git a/qemu/hw/i386/kvm/ioapic.c b/qemu/hw/i386/kvm/ioapic.c new file mode 100644 index 000000000..d2a6c4cf6 --- /dev/null +++ b/qemu/hw/i386/kvm/ioapic.c @@ -0,0 +1,168 @@ +/* + * KVM in-kernel IOPIC support + * + * Copyright (c) 2011 Siemens AG + * + * Authors: + * Jan Kiszka <jan.kiszka@siemens.com> + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + */ + +#include "hw/i386/pc.h" +#include "hw/i386/ioapic_internal.h" +#include "hw/i386/apic_internal.h" +#include "sysemu/kvm.h" + +/* PC Utility function */ +void kvm_pc_setup_irq_routing(bool pci_enabled) +{ + KVMState *s = kvm_state; + int i; + + if (kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) { + for (i = 0; i < 8; ++i) { + if (i == 2) { + continue; + } + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_MASTER, i); + } + for (i = 8; i < 16; ++i) { + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8); + } + if (pci_enabled) { + for (i = 0; i < 24; ++i) { + if (i == 0) { + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, 2); + } else if (i != 2) { + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, i); + } + } + } + kvm_irqchip_commit_routes(s); + } +} + +void kvm_pc_gsi_handler(void *opaque, int n, int level) +{ + GSIState *s = opaque; + + if (n < ISA_NUM_IRQS) { + /* Kernel will forward to both PIC and IOAPIC */ + qemu_set_irq(s->i8259_irq[n], level); + } else { + qemu_set_irq(s->ioapic_irq[n], level); + } +} + +typedef struct KVMIOAPICState KVMIOAPICState; + +struct KVMIOAPICState { + IOAPICCommonState ioapic; + uint32_t kvm_gsi_base; +}; + +static void kvm_ioapic_get(IOAPICCommonState *s) +{ + struct kvm_irqchip chip; + struct kvm_ioapic_state *kioapic; + int ret, i; + + chip.chip_id = KVM_IRQCHIP_IOAPIC; + ret = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, &chip); + if (ret < 0) { + fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret)); + abort(); + } + + kioapic = &chip.chip.ioapic; + + s->id = kioapic->id; + s->ioregsel = kioapic->ioregsel; + s->irr = kioapic->irr; + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + s->ioredtbl[i] = kioapic->redirtbl[i].bits; + } +} + +static void kvm_ioapic_put(IOAPICCommonState *s) +{ + struct kvm_irqchip chip; + struct kvm_ioapic_state *kioapic; + int ret, i; + + chip.chip_id = KVM_IRQCHIP_IOAPIC; + kioapic = &chip.chip.ioapic; + + kioapic->id = s->id; + kioapic->ioregsel = s->ioregsel; + kioapic->base_address = s->busdev.mmio[0].addr; + kioapic->irr = s->irr; + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + kioapic->redirtbl[i].bits = s->ioredtbl[i]; + } + + ret = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, &chip); + if (ret < 0) { + fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret)); + abort(); + } +} + +static void kvm_ioapic_reset(DeviceState *dev) +{ + IOAPICCommonState *s = IOAPIC_COMMON(dev); + + ioapic_reset_common(dev); + kvm_ioapic_put(s); +} + +static void kvm_ioapic_set_irq(void *opaque, int irq, int level) +{ + KVMIOAPICState *s = opaque; + int delivered; + + delivered = kvm_set_irq(kvm_state, s->kvm_gsi_base + irq, level); + apic_report_irq_delivered(delivered); +} + +static void kvm_ioapic_realize(DeviceState *dev, Error **errp) +{ + IOAPICCommonState *s = IOAPIC_COMMON(dev); + + memory_region_init_reservation(&s->io_memory, NULL, "kvm-ioapic", 0x1000); + + qdev_init_gpio_in(dev, kvm_ioapic_set_irq, IOAPIC_NUM_PINS); +} + +static Property kvm_ioapic_properties[] = { + DEFINE_PROP_UINT32("gsi_base", KVMIOAPICState, kvm_gsi_base, 0), + DEFINE_PROP_END_OF_LIST() +}; + +static void kvm_ioapic_class_init(ObjectClass *klass, void *data) +{ + IOAPICCommonClass *k = IOAPIC_COMMON_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + k->realize = kvm_ioapic_realize; + k->pre_save = kvm_ioapic_get; + k->post_load = kvm_ioapic_put; + dc->reset = kvm_ioapic_reset; + dc->props = kvm_ioapic_properties; +} + +static const TypeInfo kvm_ioapic_info = { + .name = "kvm-ioapic", + .parent = TYPE_IOAPIC_COMMON, + .instance_size = sizeof(KVMIOAPICState), + .class_init = kvm_ioapic_class_init, +}; + +static void kvm_ioapic_register_types(void) +{ + type_register_static(&kvm_ioapic_info); +} + +type_init(kvm_ioapic_register_types) diff --git a/qemu/hw/i386/kvm/pci-assign.c b/qemu/hw/i386/kvm/pci-assign.c new file mode 100644 index 000000000..74d22f4fd --- /dev/null +++ b/qemu/hw/i386/kvm/pci-assign.c @@ -0,0 +1,1968 @@ +/* + * Copyright (c) 2007, Neocleus Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * + * Assign a PCI device from the host to a guest VM. + * + * This implementation uses the classic device assignment interface of KVM + * and is only available on x86 hosts. It is expected to be obsoleted by VFIO + * based device assignment. + * + * Adapted for KVM (qemu-kvm) by Qumranet. QEMU version was based on qemu-kvm + * revision 4144fe9d48. See its repository for the history. + * + * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) + * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) + * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) + * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) + * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) + */ +#include <stdio.h> +#include <unistd.h> +#include <sys/io.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include "hw/hw.h" +#include "hw/i386/pc.h" +#include "qemu/error-report.h" +#include "ui/console.h" +#include "hw/loader.h" +#include "monitor/monitor.h" +#include "qemu/range.h" +#include "sysemu/sysemu.h" +#include "hw/pci/pci.h" +#include "hw/pci/msi.h" +#include "kvm_i386.h" + +#define MSIX_PAGE_SIZE 0x1000 + +/* From linux/ioport.h */ +#define IORESOURCE_IO 0x00000100 /* Resource type */ +#define IORESOURCE_MEM 0x00000200 +#define IORESOURCE_IRQ 0x00000400 +#define IORESOURCE_DMA 0x00000800 +#define IORESOURCE_PREFETCH 0x00002000 /* No side effects */ +#define IORESOURCE_MEM_64 0x00100000 + +//#define DEVICE_ASSIGNMENT_DEBUG + +#ifdef DEVICE_ASSIGNMENT_DEBUG +#define DEBUG(fmt, ...) \ + do { \ + fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \ + } while (0) +#else +#define DEBUG(fmt, ...) +#endif + +typedef struct PCIRegion { + int type; /* Memory or port I/O */ + int valid; + uint64_t base_addr; + uint64_t size; /* size of the region */ + int resource_fd; +} PCIRegion; + +typedef struct PCIDevRegions { + uint8_t bus, dev, func; /* Bus inside domain, device and function */ + int irq; /* IRQ number */ + uint16_t region_number; /* number of active regions */ + + /* Port I/O or MMIO Regions */ + PCIRegion regions[PCI_NUM_REGIONS - 1]; + int config_fd; +} PCIDevRegions; + +typedef struct AssignedDevRegion { + MemoryRegion container; + MemoryRegion real_iomem; + union { + uint8_t *r_virtbase; /* mmapped access address for memory regions */ + uint32_t r_baseport; /* the base guest port for I/O regions */ + } u; + pcibus_t e_size; /* emulated size of region in bytes */ + pcibus_t r_size; /* real size of region in bytes */ + PCIRegion *region; +} AssignedDevRegion; + +#define ASSIGNED_DEVICE_PREFER_MSI_BIT 0 +#define ASSIGNED_DEVICE_SHARE_INTX_BIT 1 + +#define ASSIGNED_DEVICE_PREFER_MSI_MASK (1 << ASSIGNED_DEVICE_PREFER_MSI_BIT) +#define ASSIGNED_DEVICE_SHARE_INTX_MASK (1 << ASSIGNED_DEVICE_SHARE_INTX_BIT) + +typedef struct MSIXTableEntry { + uint32_t addr_lo; + uint32_t addr_hi; + uint32_t data; + uint32_t ctrl; +} MSIXTableEntry; + +typedef enum AssignedIRQType { + ASSIGNED_IRQ_NONE = 0, + ASSIGNED_IRQ_INTX_HOST_INTX, + ASSIGNED_IRQ_INTX_HOST_MSI, + ASSIGNED_IRQ_MSI, + ASSIGNED_IRQ_MSIX +} AssignedIRQType; + +typedef struct AssignedDevice { + PCIDevice dev; + PCIHostDeviceAddress host; + uint32_t dev_id; + uint32_t features; + int intpin; + AssignedDevRegion v_addrs[PCI_NUM_REGIONS - 1]; + PCIDevRegions real_device; + PCIINTxRoute intx_route; + AssignedIRQType assigned_irq_type; + struct { +#define ASSIGNED_DEVICE_CAP_MSI (1 << 0) +#define ASSIGNED_DEVICE_CAP_MSIX (1 << 1) + uint32_t available; +#define ASSIGNED_DEVICE_MSI_ENABLED (1 << 0) +#define ASSIGNED_DEVICE_MSIX_ENABLED (1 << 1) +#define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2) + uint32_t state; + } cap; + uint8_t emulate_config_read[PCI_CONFIG_SPACE_SIZE]; + uint8_t emulate_config_write[PCI_CONFIG_SPACE_SIZE]; + int msi_virq_nr; + int *msi_virq; + MSIXTableEntry *msix_table; + hwaddr msix_table_addr; + uint16_t msix_max; + MemoryRegion mmio; + char *configfd_name; + int32_t bootindex; +} AssignedDevice; + +#define TYPE_PCI_ASSIGN "kvm-pci-assign" +#define PCI_ASSIGN(obj) OBJECT_CHECK(AssignedDevice, (obj), TYPE_PCI_ASSIGN) + +static void assigned_dev_update_irq_routing(PCIDevice *dev); + +static void assigned_dev_load_option_rom(AssignedDevice *dev); + +static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev); + +static uint64_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, + hwaddr addr, int size, + uint64_t *data) +{ + uint64_t val = 0; + int fd = dev_region->region->resource_fd; + + if (data) { + DEBUG("pwrite data=%" PRIx64 ", size=%d, e_phys=" TARGET_FMT_plx + ", addr="TARGET_FMT_plx"\n", *data, size, addr, addr); + if (pwrite(fd, data, size, addr) != size) { + error_report("%s - pwrite failed %s", __func__, strerror(errno)); + } + } else { + if (pread(fd, &val, size, addr) != size) { + error_report("%s - pread failed %s", __func__, strerror(errno)); + val = (1UL << (size * 8)) - 1; + } + DEBUG("pread val=%" PRIx64 ", size=%d, e_phys=" TARGET_FMT_plx + ", addr=" TARGET_FMT_plx "\n", val, size, addr, addr); + } + return val; +} + +static void assigned_dev_ioport_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + assigned_dev_ioport_rw(opaque, addr, size, &data); +} + +static uint64_t assigned_dev_ioport_read(void *opaque, + hwaddr addr, unsigned size) +{ + return assigned_dev_ioport_rw(opaque, addr, size, NULL); +} + +static uint32_t slow_bar_readb(void *opaque, hwaddr addr) +{ + AssignedDevRegion *d = opaque; + uint8_t *in = d->u.r_virtbase + addr; + uint32_t r; + + r = *in; + DEBUG("addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r); + + return r; +} + +static uint32_t slow_bar_readw(void *opaque, hwaddr addr) +{ + AssignedDevRegion *d = opaque; + uint16_t *in = (uint16_t *)(d->u.r_virtbase + addr); + uint32_t r; + + r = *in; + DEBUG("addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r); + + return r; +} + +static uint32_t slow_bar_readl(void *opaque, hwaddr addr) +{ + AssignedDevRegion *d = opaque; + uint32_t *in = (uint32_t *)(d->u.r_virtbase + addr); + uint32_t r; + + r = *in; + DEBUG("addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r); + + return r; +} + +static void slow_bar_writeb(void *opaque, hwaddr addr, uint32_t val) +{ + AssignedDevRegion *d = opaque; + uint8_t *out = d->u.r_virtbase + addr; + + DEBUG("addr=0x" TARGET_FMT_plx " val=0x%02x\n", addr, val); + *out = val; +} + +static void slow_bar_writew(void *opaque, hwaddr addr, uint32_t val) +{ + AssignedDevRegion *d = opaque; + uint16_t *out = (uint16_t *)(d->u.r_virtbase + addr); + + DEBUG("addr=0x" TARGET_FMT_plx " val=0x%04x\n", addr, val); + *out = val; +} + +static void slow_bar_writel(void *opaque, hwaddr addr, uint32_t val) +{ + AssignedDevRegion *d = opaque; + uint32_t *out = (uint32_t *)(d->u.r_virtbase + addr); + + DEBUG("addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, val); + *out = val; +} + +static const MemoryRegionOps slow_bar_ops = { + .old_mmio = { + .read = { slow_bar_readb, slow_bar_readw, slow_bar_readl, }, + .write = { slow_bar_writeb, slow_bar_writew, slow_bar_writel, }, + }, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void assigned_dev_iomem_setup(PCIDevice *pci_dev, int region_num, + pcibus_t e_size) +{ + AssignedDevice *r_dev = PCI_ASSIGN(pci_dev); + AssignedDevRegion *region = &r_dev->v_addrs[region_num]; + PCIRegion *real_region = &r_dev->real_device.regions[region_num]; + + if (e_size > 0) { + memory_region_init(®ion->container, OBJECT(pci_dev), + "assigned-dev-container", e_size); + memory_region_add_subregion(®ion->container, 0, ®ion->real_iomem); + + /* deal with MSI-X MMIO page */ + if (real_region->base_addr <= r_dev->msix_table_addr && + real_region->base_addr + real_region->size > + r_dev->msix_table_addr) { + uint64_t offset = r_dev->msix_table_addr - real_region->base_addr; + + memory_region_add_subregion_overlap(®ion->container, + offset, + &r_dev->mmio, + 1); + } + } +} + +static const MemoryRegionOps assigned_dev_ioport_ops = { + .read = assigned_dev_ioport_read, + .write = assigned_dev_ioport_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void assigned_dev_ioport_setup(PCIDevice *pci_dev, int region_num, + pcibus_t size) +{ + AssignedDevice *r_dev = PCI_ASSIGN(pci_dev); + AssignedDevRegion *region = &r_dev->v_addrs[region_num]; + + region->e_size = size; + memory_region_init(®ion->container, OBJECT(pci_dev), + "assigned-dev-container", size); + memory_region_init_io(®ion->real_iomem, OBJECT(pci_dev), + &assigned_dev_ioport_ops, r_dev->v_addrs + region_num, + "assigned-dev-iomem", size); + memory_region_add_subregion(®ion->container, 0, ®ion->real_iomem); +} + +static uint32_t assigned_dev_pci_read(PCIDevice *d, int pos, int len) +{ + AssignedDevice *pci_dev = PCI_ASSIGN(d); + uint32_t val; + ssize_t ret; + int fd = pci_dev->real_device.config_fd; + +again: + ret = pread(fd, &val, len, pos); + if (ret != len) { + if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) { + goto again; + } + + hw_error("pci read failed, ret = %zd errno = %d\n", ret, errno); + } + + return val; +} + +static uint8_t assigned_dev_pci_read_byte(PCIDevice *d, int pos) +{ + return (uint8_t)assigned_dev_pci_read(d, pos, 1); +} + +static void assigned_dev_pci_write(PCIDevice *d, int pos, uint32_t val, int len) +{ + AssignedDevice *pci_dev = PCI_ASSIGN(d); + ssize_t ret; + int fd = pci_dev->real_device.config_fd; + +again: + ret = pwrite(fd, &val, len, pos); + if (ret != len) { + if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) { + goto again; + } + + hw_error("pci write failed, ret = %zd errno = %d\n", ret, errno); + } +} + +static void assigned_dev_emulate_config_read(AssignedDevice *dev, + uint32_t offset, uint32_t len) +{ + memset(dev->emulate_config_read + offset, 0xff, len); +} + +static void assigned_dev_direct_config_read(AssignedDevice *dev, + uint32_t offset, uint32_t len) +{ + memset(dev->emulate_config_read + offset, 0, len); +} + +static void assigned_dev_direct_config_write(AssignedDevice *dev, + uint32_t offset, uint32_t len) +{ + memset(dev->emulate_config_write + offset, 0, len); +} + +static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap, uint8_t start) +{ + int id; + int max_cap = 48; + int pos = start ? start : PCI_CAPABILITY_LIST; + int status; + + status = assigned_dev_pci_read_byte(d, PCI_STATUS); + if ((status & PCI_STATUS_CAP_LIST) == 0) { + return 0; + } + + while (max_cap--) { + pos = assigned_dev_pci_read_byte(d, pos); + if (pos < 0x40) { + break; + } + + pos &= ~3; + id = assigned_dev_pci_read_byte(d, pos + PCI_CAP_LIST_ID); + + if (id == 0xff) { + break; + } + if (id == cap) { + return pos; + } + + pos += PCI_CAP_LIST_NEXT; + } + return 0; +} + +static void assigned_dev_register_regions(PCIRegion *io_regions, + unsigned long regions_num, + AssignedDevice *pci_dev, + Error **errp) +{ + uint32_t i; + PCIRegion *cur_region = io_regions; + + for (i = 0; i < regions_num; i++, cur_region++) { + if (!cur_region->valid) { + continue; + } + + /* handle memory io regions */ + if (cur_region->type & IORESOURCE_MEM) { + int t = PCI_BASE_ADDRESS_SPACE_MEMORY; + if (cur_region->type & IORESOURCE_PREFETCH) { + t |= PCI_BASE_ADDRESS_MEM_PREFETCH; + } + if (cur_region->type & IORESOURCE_MEM_64) { + t |= PCI_BASE_ADDRESS_MEM_TYPE_64; + } + + /* map physical memory */ + pci_dev->v_addrs[i].u.r_virtbase = mmap(NULL, cur_region->size, + PROT_WRITE | PROT_READ, + MAP_SHARED, + cur_region->resource_fd, + (off_t)0); + + if (pci_dev->v_addrs[i].u.r_virtbase == MAP_FAILED) { + pci_dev->v_addrs[i].u.r_virtbase = NULL; + error_setg_errno(errp, errno, "Couldn't mmap 0x%" PRIx64 "!", + cur_region->base_addr); + return; + } + + pci_dev->v_addrs[i].r_size = cur_region->size; + pci_dev->v_addrs[i].e_size = 0; + + /* add offset */ + pci_dev->v_addrs[i].u.r_virtbase += + (cur_region->base_addr & 0xFFF); + + if (cur_region->size & 0xFFF) { + error_report("PCI region %d at address 0x%" PRIx64 " has " + "size 0x%" PRIx64 ", which is not a multiple of " + "4K. You might experience some performance hit " + "due to that.", + i, cur_region->base_addr, cur_region->size); + memory_region_init_io(&pci_dev->v_addrs[i].real_iomem, + OBJECT(pci_dev), &slow_bar_ops, + &pci_dev->v_addrs[i], + "assigned-dev-slow-bar", + cur_region->size); + } else { + void *virtbase = pci_dev->v_addrs[i].u.r_virtbase; + char name[32]; + snprintf(name, sizeof(name), "%s.bar%d", + object_get_typename(OBJECT(pci_dev)), i); + memory_region_init_ram_ptr(&pci_dev->v_addrs[i].real_iomem, + OBJECT(pci_dev), name, + cur_region->size, virtbase); + vmstate_register_ram(&pci_dev->v_addrs[i].real_iomem, + &pci_dev->dev.qdev); + } + + assigned_dev_iomem_setup(&pci_dev->dev, i, cur_region->size); + pci_register_bar((PCIDevice *) pci_dev, i, t, + &pci_dev->v_addrs[i].container); + continue; + } else { + /* handle port io regions */ + uint32_t val; + int ret; + + /* Test kernel support for ioport resource read/write. Old + * kernels return EIO. New kernels only allow 1/2/4 byte reads + * so should return EINVAL for a 3 byte read */ + ret = pread(pci_dev->v_addrs[i].region->resource_fd, &val, 3, 0); + if (ret >= 0) { + error_report("Unexpected return from I/O port read: %d", ret); + abort(); + } else if (errno != EINVAL) { + error_report("Kernel doesn't support ioport resource " + "access, hiding this region."); + close(pci_dev->v_addrs[i].region->resource_fd); + cur_region->valid = 0; + continue; + } + + pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr; + pci_dev->v_addrs[i].r_size = cur_region->size; + pci_dev->v_addrs[i].e_size = 0; + + assigned_dev_ioport_setup(&pci_dev->dev, i, cur_region->size); + pci_register_bar((PCIDevice *) pci_dev, i, + PCI_BASE_ADDRESS_SPACE_IO, + &pci_dev->v_addrs[i].container); + } + } + + /* success */ +} + +static void get_real_id(const char *devpath, const char *idname, uint16_t *val, + Error **errp) +{ + FILE *f; + char name[128]; + long id; + + snprintf(name, sizeof(name), "%s%s", devpath, idname); + f = fopen(name, "r"); + if (f == NULL) { + error_setg_file_open(errp, errno, name); + return; + } + if (fscanf(f, "%li\n", &id) == 1) { + *val = id; + } else { + error_setg(errp, "Failed to parse contents of '%s'", name); + } + fclose(f); +} + +static void get_real_vendor_id(const char *devpath, uint16_t *val, + Error **errp) +{ + get_real_id(devpath, "vendor", val, errp); +} + +static void get_real_device_id(const char *devpath, uint16_t *val, + Error **errp) +{ + get_real_id(devpath, "device", val, errp); +} + +static void get_real_device(AssignedDevice *pci_dev, Error **errp) +{ + char dir[128], name[128]; + int fd, r = 0; + FILE *f; + uint64_t start, end, size, flags; + uint16_t id; + PCIRegion *rp; + PCIDevRegions *dev = &pci_dev->real_device; + Error *local_err = NULL; + + dev->region_number = 0; + + snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/", + pci_dev->host.domain, pci_dev->host.bus, + pci_dev->host.slot, pci_dev->host.function); + + snprintf(name, sizeof(name), "%sconfig", dir); + + if (pci_dev->configfd_name && *pci_dev->configfd_name) { + dev->config_fd = monitor_fd_param(cur_mon, pci_dev->configfd_name, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } else { + dev->config_fd = open(name, O_RDWR); + + if (dev->config_fd == -1) { + error_setg_file_open(errp, errno, name); + return; + } + } +again: + r = read(dev->config_fd, pci_dev->dev.config, + pci_config_size(&pci_dev->dev)); + if (r < 0) { + if (errno == EINTR || errno == EAGAIN) { + goto again; + } + error_setg_errno(errp, errno, "read(\"%s\")", + (pci_dev->configfd_name && *pci_dev->configfd_name) ? + pci_dev->configfd_name : name); + return; + } + + /* Restore or clear multifunction, this is always controlled by qemu */ + if (pci_dev->dev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { + pci_dev->dev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; + } else { + pci_dev->dev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION; + } + + /* Clear host resource mapping info. If we choose not to register a + * BAR, such as might be the case with the option ROM, we can get + * confusing, unwritable, residual addresses from the host here. */ + memset(&pci_dev->dev.config[PCI_BASE_ADDRESS_0], 0, 24); + memset(&pci_dev->dev.config[PCI_ROM_ADDRESS], 0, 4); + + snprintf(name, sizeof(name), "%sresource", dir); + + f = fopen(name, "r"); + if (f == NULL) { + error_setg_file_open(errp, errno, name); + return; + } + + for (r = 0; r < PCI_ROM_SLOT; r++) { + if (fscanf(f, "%" SCNi64 " %" SCNi64 " %" SCNi64 "\n", + &start, &end, &flags) != 3) { + break; + } + + rp = dev->regions + r; + rp->valid = 0; + rp->resource_fd = -1; + size = end - start + 1; + flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH + | IORESOURCE_MEM_64; + if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0) { + continue; + } + if (flags & IORESOURCE_MEM) { + flags &= ~IORESOURCE_IO; + } else { + flags &= ~IORESOURCE_PREFETCH; + } + snprintf(name, sizeof(name), "%sresource%d", dir, r); + fd = open(name, O_RDWR); + if (fd == -1) { + continue; + } + rp->resource_fd = fd; + + rp->type = flags; + rp->valid = 1; + rp->base_addr = start; + rp->size = size; + pci_dev->v_addrs[r].region = rp; + DEBUG("region %d size %" PRIu64 " start 0x%" PRIx64 + " type %d resource_fd %d\n", + r, rp->size, start, rp->type, rp->resource_fd); + } + + fclose(f); + + /* read and fill vendor ID */ + get_real_vendor_id(dir, &id, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + pci_dev->dev.config[0] = id & 0xff; + pci_dev->dev.config[1] = (id & 0xff00) >> 8; + + /* read and fill device ID */ + get_real_device_id(dir, &id, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + pci_dev->dev.config[2] = id & 0xff; + pci_dev->dev.config[3] = (id & 0xff00) >> 8; + + pci_word_test_and_clear_mask(pci_dev->emulate_config_write + PCI_COMMAND, + PCI_COMMAND_MASTER | PCI_COMMAND_INTX_DISABLE); + + dev->region_number = r; +} + +static void free_msi_virqs(AssignedDevice *dev) +{ + int i; + + for (i = 0; i < dev->msi_virq_nr; i++) { + if (dev->msi_virq[i] >= 0) { + kvm_irqchip_release_virq(kvm_state, dev->msi_virq[i]); + dev->msi_virq[i] = -1; + } + } + g_free(dev->msi_virq); + dev->msi_virq = NULL; + dev->msi_virq_nr = 0; +} + +static void free_assigned_device(AssignedDevice *dev) +{ + int i; + + if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) { + assigned_dev_unregister_msix_mmio(dev); + } + for (i = 0; i < dev->real_device.region_number; i++) { + PCIRegion *pci_region = &dev->real_device.regions[i]; + AssignedDevRegion *region = &dev->v_addrs[i]; + + if (!pci_region->valid) { + continue; + } + if (pci_region->type & IORESOURCE_IO) { + if (region->u.r_baseport) { + memory_region_del_subregion(®ion->container, + ®ion->real_iomem); + } + } else if (pci_region->type & IORESOURCE_MEM) { + if (region->u.r_virtbase) { + memory_region_del_subregion(®ion->container, + ®ion->real_iomem); + + /* Remove MSI-X table subregion */ + if (pci_region->base_addr <= dev->msix_table_addr && + pci_region->base_addr + pci_region->size > + dev->msix_table_addr) { + memory_region_del_subregion(®ion->container, + &dev->mmio); + } + if (munmap(region->u.r_virtbase, + (pci_region->size + 0xFFF) & 0xFFFFF000)) { + error_report("Failed to unmap assigned device region: %s", + strerror(errno)); + } + } + } + if (pci_region->resource_fd >= 0) { + close(pci_region->resource_fd); + } + } + + if (dev->real_device.config_fd >= 0) { + close(dev->real_device.config_fd); + } + + free_msi_virqs(dev); +} + +/* This function tries to determine the cause of the PCI assignment failure. It + * always returns the cause as a dynamically allocated, human readable string. + * If the function fails to determine the cause for any internal reason, then + * the returned string will state that fact. + */ +static char *assign_failed_examine(const AssignedDevice *dev) +{ + char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns; + uint16_t vendor_id, device_id; + int r; + Error *local_err = NULL; + + snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/", + dev->host.domain, dev->host.bus, dev->host.slot, + dev->host.function); + + snprintf(name, sizeof(name), "%sdriver", dir); + + r = readlink(name, driver, sizeof(driver)); + if ((r <= 0) || r >= sizeof(driver)) { + goto fail; + } + + driver[r] = 0; + ns = strrchr(driver, '/'); + if (!ns) { + goto fail; + } + + ns++; + + if ((get_real_vendor_id(dir, &vendor_id, &local_err), local_err) || + (get_real_device_id(dir, &device_id, &local_err), local_err)) { + /* We're already analyzing an assignment error, so we suppress this + * one just like the others above. + */ + error_free(local_err); + goto fail; + } + + return g_strdup_printf( + "*** The driver '%s' is occupying your device %04x:%02x:%02x.%x.\n" + "***\n" + "*** You can try the following commands to free it:\n" + "***\n" + "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/new_id\n" + "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/%s/unbind\n" + "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/" + "pci-stub/bind\n" + "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/remove_id\n" + "***", + ns, dev->host.domain, dev->host.bus, dev->host.slot, + dev->host.function, vendor_id, device_id, + dev->host.domain, dev->host.bus, dev->host.slot, dev->host.function, + ns, dev->host.domain, dev->host.bus, dev->host.slot, + dev->host.function, vendor_id, device_id); + +fail: + return g_strdup("Couldn't find out why."); +} + +static void assign_device(AssignedDevice *dev, Error **errp) +{ + uint32_t flags = KVM_DEV_ASSIGN_ENABLE_IOMMU; + int r; + + /* Only pass non-zero PCI segment to capable module */ + if (!kvm_check_extension(kvm_state, KVM_CAP_PCI_SEGMENT) && + dev->host.domain) { + error_setg(errp, "Can't assign device inside non-zero PCI segment " + "as this KVM module doesn't support it."); + return; + } + + if (!kvm_check_extension(kvm_state, KVM_CAP_IOMMU)) { + error_setg(errp, "No IOMMU found. Unable to assign device \"%s\"", + dev->dev.qdev.id); + return; + } + + if (dev->features & ASSIGNED_DEVICE_SHARE_INTX_MASK && + kvm_has_intx_set_mask()) { + flags |= KVM_DEV_ASSIGN_PCI_2_3; + } + + r = kvm_device_pci_assign(kvm_state, &dev->host, flags, &dev->dev_id); + if (r < 0) { + switch (r) { + case -EBUSY: { + char *cause; + + cause = assign_failed_examine(dev); + error_setg_errno(errp, -r, "Failed to assign device \"%s\"\n%s", + dev->dev.qdev.id, cause); + g_free(cause); + break; + } + default: + error_setg_errno(errp, -r, "Failed to assign device \"%s\"", + dev->dev.qdev.id); + break; + } + } +} + +static void verify_irqchip_in_kernel(Error **errp) +{ + if (kvm_irqchip_in_kernel()) { + return; + } + error_setg(errp, "pci-assign requires KVM with in-kernel irqchip enabled"); +} + +static int assign_intx(AssignedDevice *dev, Error **errp) +{ + AssignedIRQType new_type; + PCIINTxRoute intx_route; + bool intx_host_msi; + int r; + Error *local_err = NULL; + + /* Interrupt PIN 0 means don't use INTx */ + if (assigned_dev_pci_read_byte(&dev->dev, PCI_INTERRUPT_PIN) == 0) { + pci_device_set_intx_routing_notifier(&dev->dev, NULL); + return 0; + } + + verify_irqchip_in_kernel(&local_err); + if (local_err) { + error_propagate(errp, local_err); + return -ENOTSUP; + } + + pci_device_set_intx_routing_notifier(&dev->dev, + assigned_dev_update_irq_routing); + + intx_route = pci_device_route_intx_to_irq(&dev->dev, dev->intpin); + assert(intx_route.mode != PCI_INTX_INVERTED); + + if (!pci_intx_route_changed(&dev->intx_route, &intx_route)) { + return 0; + } + + switch (dev->assigned_irq_type) { + case ASSIGNED_IRQ_INTX_HOST_INTX: + case ASSIGNED_IRQ_INTX_HOST_MSI: + intx_host_msi = dev->assigned_irq_type == ASSIGNED_IRQ_INTX_HOST_MSI; + r = kvm_device_intx_deassign(kvm_state, dev->dev_id, intx_host_msi); + break; + case ASSIGNED_IRQ_MSI: + r = kvm_device_msi_deassign(kvm_state, dev->dev_id); + break; + case ASSIGNED_IRQ_MSIX: + r = kvm_device_msix_deassign(kvm_state, dev->dev_id); + break; + default: + r = 0; + break; + } + if (r) { + perror("assign_intx: deassignment of previous interrupt failed"); + } + dev->assigned_irq_type = ASSIGNED_IRQ_NONE; + + if (intx_route.mode == PCI_INTX_DISABLED) { + dev->intx_route = intx_route; + return 0; + } + +retry: + if (dev->features & ASSIGNED_DEVICE_PREFER_MSI_MASK && + dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { + intx_host_msi = true; + new_type = ASSIGNED_IRQ_INTX_HOST_MSI; + } else { + intx_host_msi = false; + new_type = ASSIGNED_IRQ_INTX_HOST_INTX; + } + + r = kvm_device_intx_assign(kvm_state, dev->dev_id, intx_host_msi, + intx_route.irq); + if (r < 0) { + if (r == -EIO && !(dev->features & ASSIGNED_DEVICE_PREFER_MSI_MASK) && + dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { + /* Retry with host-side MSI. There might be an IRQ conflict and + * either the kernel or the device doesn't support sharing. */ + error_report("Host-side INTx sharing not supported, " + "using MSI instead"); + error_printf("Some devices do not work properly in this mode.\n"); + dev->features |= ASSIGNED_DEVICE_PREFER_MSI_MASK; + goto retry; + } + error_setg_errno(errp, -r, + "Failed to assign irq for \"%s\"\n" + "Perhaps you are assigning a device " + "that shares an IRQ with another device?", + dev->dev.qdev.id); + return r; + } + + dev->intx_route = intx_route; + dev->assigned_irq_type = new_type; + return r; +} + +static void deassign_device(AssignedDevice *dev) +{ + int r; + + r = kvm_device_pci_deassign(kvm_state, dev->dev_id); + assert(r == 0); +} + +/* The pci config space got updated. Check if irq numbers have changed + * for our devices + */ +static void assigned_dev_update_irq_routing(PCIDevice *dev) +{ + AssignedDevice *assigned_dev = PCI_ASSIGN(dev); + Error *err = NULL; + int r; + + r = assign_intx(assigned_dev, &err); + if (r < 0) { + error_report_err(err); + err = NULL; + qdev_unplug(&dev->qdev, &err); + assert(!err); + } +} + +static void assigned_dev_update_msi(PCIDevice *pci_dev) +{ + AssignedDevice *assigned_dev = PCI_ASSIGN(pci_dev); + uint8_t ctrl_byte = pci_get_byte(pci_dev->config + pci_dev->msi_cap + + PCI_MSI_FLAGS); + int r; + + /* Some guests gratuitously disable MSI even if they're not using it, + * try to catch this by only deassigning irqs if the guest is using + * MSI or intends to start. */ + if (assigned_dev->assigned_irq_type == ASSIGNED_IRQ_MSI || + (ctrl_byte & PCI_MSI_FLAGS_ENABLE)) { + r = kvm_device_msi_deassign(kvm_state, assigned_dev->dev_id); + /* -ENXIO means no assigned irq */ + if (r && r != -ENXIO) { + perror("assigned_dev_update_msi: deassign irq"); + } + + free_msi_virqs(assigned_dev); + + assigned_dev->assigned_irq_type = ASSIGNED_IRQ_NONE; + pci_device_set_intx_routing_notifier(pci_dev, NULL); + } + + if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) { + MSIMessage msg = msi_get_message(pci_dev, 0); + int virq; + + virq = kvm_irqchip_add_msi_route(kvm_state, msg); + if (virq < 0) { + perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route"); + return; + } + + assigned_dev->msi_virq = g_malloc(sizeof(*assigned_dev->msi_virq)); + assigned_dev->msi_virq_nr = 1; + assigned_dev->msi_virq[0] = virq; + if (kvm_device_msi_assign(kvm_state, assigned_dev->dev_id, virq) < 0) { + perror("assigned_dev_update_msi: kvm_device_msi_assign"); + } + + assigned_dev->intx_route.mode = PCI_INTX_DISABLED; + assigned_dev->intx_route.irq = -1; + assigned_dev->assigned_irq_type = ASSIGNED_IRQ_MSI; + } else { + Error *local_err = NULL; + + assign_intx(assigned_dev, &local_err); + if (local_err) { + error_report_err(local_err); + } + } +} + +static void assigned_dev_update_msi_msg(PCIDevice *pci_dev) +{ + AssignedDevice *assigned_dev = PCI_ASSIGN(pci_dev); + uint8_t ctrl_byte = pci_get_byte(pci_dev->config + pci_dev->msi_cap + + PCI_MSI_FLAGS); + + if (assigned_dev->assigned_irq_type != ASSIGNED_IRQ_MSI || + !(ctrl_byte & PCI_MSI_FLAGS_ENABLE)) { + return; + } + + kvm_irqchip_update_msi_route(kvm_state, assigned_dev->msi_virq[0], + msi_get_message(pci_dev, 0)); +} + +static bool assigned_dev_msix_masked(MSIXTableEntry *entry) +{ + return (entry->ctrl & cpu_to_le32(0x1)) != 0; +} + +/* + * When MSI-X is first enabled the vector table typically has all the + * vectors masked, so we can't use that as the obvious test to figure out + * how many vectors to initially enable. Instead we look at the data field + * because this is what worked for pci-assign for a long time. This makes + * sure the physical MSI-X state tracks the guest's view, which is important + * for some VF/PF and PF/fw communication channels. + */ +static bool assigned_dev_msix_skipped(MSIXTableEntry *entry) +{ + return !entry->data; +} + +static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) +{ + AssignedDevice *adev = PCI_ASSIGN(pci_dev); + uint16_t entries_nr = 0; + int i, r = 0; + MSIXTableEntry *entry = adev->msix_table; + MSIMessage msg; + + /* Get the usable entry number for allocating */ + for (i = 0; i < adev->msix_max; i++, entry++) { + if (assigned_dev_msix_skipped(entry)) { + continue; + } + entries_nr++; + } + + DEBUG("MSI-X entries: %d\n", entries_nr); + + /* It's valid to enable MSI-X with all entries masked */ + if (!entries_nr) { + return 0; + } + + r = kvm_device_msix_init_vectors(kvm_state, adev->dev_id, entries_nr); + if (r != 0) { + error_report("fail to set MSI-X entry number for MSIX! %s", + strerror(-r)); + return r; + } + + free_msi_virqs(adev); + + adev->msi_virq_nr = adev->msix_max; + adev->msi_virq = g_malloc(adev->msix_max * sizeof(*adev->msi_virq)); + + entry = adev->msix_table; + for (i = 0; i < adev->msix_max; i++, entry++) { + adev->msi_virq[i] = -1; + + if (assigned_dev_msix_skipped(entry)) { + continue; + } + + msg.address = entry->addr_lo | ((uint64_t)entry->addr_hi << 32); + msg.data = entry->data; + r = kvm_irqchip_add_msi_route(kvm_state, msg); + if (r < 0) { + return r; + } + adev->msi_virq[i] = r; + + DEBUG("MSI-X vector %d, gsi %d, addr %08x_%08x, data %08x\n", i, + r, entry->addr_hi, entry->addr_lo, entry->data); + + r = kvm_device_msix_set_vector(kvm_state, adev->dev_id, i, + adev->msi_virq[i]); + if (r) { + error_report("fail to set MSI-X entry! %s", strerror(-r)); + break; + } + } + + return r; +} + +static void assigned_dev_update_msix(PCIDevice *pci_dev) +{ + AssignedDevice *assigned_dev = PCI_ASSIGN(pci_dev); + uint16_t ctrl_word = pci_get_word(pci_dev->config + pci_dev->msix_cap + + PCI_MSIX_FLAGS); + int r; + + /* Some guests gratuitously disable MSIX even if they're not using it, + * try to catch this by only deassigning irqs if the guest is using + * MSIX or intends to start. */ + if ((assigned_dev->assigned_irq_type == ASSIGNED_IRQ_MSIX) || + (ctrl_word & PCI_MSIX_FLAGS_ENABLE)) { + r = kvm_device_msix_deassign(kvm_state, assigned_dev->dev_id); + /* -ENXIO means no assigned irq */ + if (r && r != -ENXIO) { + perror("assigned_dev_update_msix: deassign irq"); + } + + free_msi_virqs(assigned_dev); + + assigned_dev->assigned_irq_type = ASSIGNED_IRQ_NONE; + pci_device_set_intx_routing_notifier(pci_dev, NULL); + } + + if (ctrl_word & PCI_MSIX_FLAGS_ENABLE) { + if (assigned_dev_update_msix_mmio(pci_dev) < 0) { + perror("assigned_dev_update_msix_mmio"); + return; + } + + if (assigned_dev->msi_virq_nr > 0) { + if (kvm_device_msix_assign(kvm_state, assigned_dev->dev_id) < 0) { + perror("assigned_dev_enable_msix: assign irq"); + return; + } + } + assigned_dev->intx_route.mode = PCI_INTX_DISABLED; + assigned_dev->intx_route.irq = -1; + assigned_dev->assigned_irq_type = ASSIGNED_IRQ_MSIX; + } else { + Error *local_err = NULL; + + assign_intx(assigned_dev, &local_err); + if (local_err) { + error_report_err(local_err); + } + } +} + +static uint32_t assigned_dev_pci_read_config(PCIDevice *pci_dev, + uint32_t address, int len) +{ + AssignedDevice *assigned_dev = PCI_ASSIGN(pci_dev); + uint32_t virt_val = pci_default_read_config(pci_dev, address, len); + uint32_t real_val, emulate_mask, full_emulation_mask; + + emulate_mask = 0; + memcpy(&emulate_mask, assigned_dev->emulate_config_read + address, len); + emulate_mask = le32_to_cpu(emulate_mask); + + full_emulation_mask = 0xffffffff >> (32 - len * 8); + + if (emulate_mask != full_emulation_mask) { + real_val = assigned_dev_pci_read(pci_dev, address, len); + return (virt_val & emulate_mask) | (real_val & ~emulate_mask); + } else { + return virt_val; + } +} + +static void assigned_dev_pci_write_config(PCIDevice *pci_dev, uint32_t address, + uint32_t val, int len) +{ + AssignedDevice *assigned_dev = PCI_ASSIGN(pci_dev); + uint16_t old_cmd = pci_get_word(pci_dev->config + PCI_COMMAND); + uint32_t emulate_mask, full_emulation_mask; + int ret; + + pci_default_write_config(pci_dev, address, val, len); + + if (kvm_has_intx_set_mask() && + range_covers_byte(address, len, PCI_COMMAND + 1)) { + bool intx_masked = (pci_get_word(pci_dev->config + PCI_COMMAND) & + PCI_COMMAND_INTX_DISABLE); + + if (intx_masked != !!(old_cmd & PCI_COMMAND_INTX_DISABLE)) { + ret = kvm_device_intx_set_mask(kvm_state, assigned_dev->dev_id, + intx_masked); + if (ret) { + perror("assigned_dev_pci_write_config: set intx mask"); + } + } + } + if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { + if (range_covers_byte(address, len, + pci_dev->msi_cap + PCI_MSI_FLAGS)) { + assigned_dev_update_msi(pci_dev); + } else if (ranges_overlap(address, len, /* 32bit MSI only */ + pci_dev->msi_cap + PCI_MSI_ADDRESS_LO, 6)) { + assigned_dev_update_msi_msg(pci_dev); + } + } + if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) { + if (range_covers_byte(address, len, + pci_dev->msix_cap + PCI_MSIX_FLAGS + 1)) { + assigned_dev_update_msix(pci_dev); + } + } + + emulate_mask = 0; + memcpy(&emulate_mask, assigned_dev->emulate_config_write + address, len); + emulate_mask = le32_to_cpu(emulate_mask); + + full_emulation_mask = 0xffffffff >> (32 - len * 8); + + if (emulate_mask != full_emulation_mask) { + if (emulate_mask) { + val &= ~emulate_mask; + val |= assigned_dev_pci_read(pci_dev, address, len) & emulate_mask; + } + assigned_dev_pci_write(pci_dev, address, val, len); + } +} + +static void assigned_dev_setup_cap_read(AssignedDevice *dev, uint32_t offset, + uint32_t len) +{ + assigned_dev_direct_config_read(dev, offset, len); + assigned_dev_emulate_config_read(dev, offset + PCI_CAP_LIST_NEXT, 1); +} + +static int assigned_device_pci_cap_init(PCIDevice *pci_dev, Error **errp) +{ + AssignedDevice *dev = PCI_ASSIGN(pci_dev); + PCIRegion *pci_region = dev->real_device.regions; + int ret, pos; + Error *local_err = NULL; + + /* Clear initial capabilities pointer and status copied from hw */ + pci_set_byte(pci_dev->config + PCI_CAPABILITY_LIST, 0); + pci_set_word(pci_dev->config + PCI_STATUS, + pci_get_word(pci_dev->config + PCI_STATUS) & + ~PCI_STATUS_CAP_LIST); + + /* Expose MSI capability + * MSI capability is the 1st capability in capability config */ + pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSI, 0); + if (pos != 0 && kvm_check_extension(kvm_state, KVM_CAP_ASSIGN_DEV_IRQ)) { + verify_irqchip_in_kernel(&local_err); + if (local_err) { + error_propagate(errp, local_err); + return -ENOTSUP; + } + dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI; + /* Only 32-bit/no-mask currently supported */ + ret = pci_add_capability2(pci_dev, PCI_CAP_ID_MSI, pos, 10, + &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + return ret; + } + pci_dev->msi_cap = pos; + + pci_set_word(pci_dev->config + pos + PCI_MSI_FLAGS, + pci_get_word(pci_dev->config + pos + PCI_MSI_FLAGS) & + PCI_MSI_FLAGS_QMASK); + pci_set_long(pci_dev->config + pos + PCI_MSI_ADDRESS_LO, 0); + pci_set_word(pci_dev->config + pos + PCI_MSI_DATA_32, 0); + + /* Set writable fields */ + pci_set_word(pci_dev->wmask + pos + PCI_MSI_FLAGS, + PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE); + pci_set_long(pci_dev->wmask + pos + PCI_MSI_ADDRESS_LO, 0xfffffffc); + pci_set_word(pci_dev->wmask + pos + PCI_MSI_DATA_32, 0xffff); + } + /* Expose MSI-X capability */ + pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX, 0); + if (pos != 0 && kvm_device_msix_supported(kvm_state)) { + int bar_nr; + uint32_t msix_table_entry; + uint16_t msix_max; + + verify_irqchip_in_kernel(&local_err); + if (local_err) { + error_propagate(errp, local_err); + return -ENOTSUP; + } + dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX; + ret = pci_add_capability2(pci_dev, PCI_CAP_ID_MSIX, pos, 12, + &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + return ret; + } + pci_dev->msix_cap = pos; + + msix_max = (pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS) & + PCI_MSIX_FLAGS_QSIZE) + 1; + msix_max = MIN(msix_max, KVM_MAX_MSIX_PER_DEV); + pci_set_word(pci_dev->config + pos + PCI_MSIX_FLAGS, msix_max - 1); + + /* Only enable and function mask bits are writable */ + pci_set_word(pci_dev->wmask + pos + PCI_MSIX_FLAGS, + PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL); + + msix_table_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_TABLE); + bar_nr = msix_table_entry & PCI_MSIX_FLAGS_BIRMASK; + msix_table_entry &= ~PCI_MSIX_FLAGS_BIRMASK; + dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry; + dev->msix_max = msix_max; + } + + /* Minimal PM support, nothing writable, device appears to NAK changes */ + pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PM, 0); + if (pos) { + uint16_t pmc; + + ret = pci_add_capability2(pci_dev, PCI_CAP_ID_PM, pos, PCI_PM_SIZEOF, + &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + return ret; + } + + assigned_dev_setup_cap_read(dev, pos, PCI_PM_SIZEOF); + + pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS); + pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI); + pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc); + + /* assign_device will bring the device up to D0, so we don't need + * to worry about doing that ourselves here. */ + pci_set_word(pci_dev->config + pos + PCI_PM_CTRL, + PCI_PM_CTRL_NO_SOFT_RESET); + + pci_set_byte(pci_dev->config + pos + PCI_PM_PPB_EXTENSIONS, 0); + pci_set_byte(pci_dev->config + pos + PCI_PM_DATA_REGISTER, 0); + } + + pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_EXP, 0); + if (pos) { + uint8_t version, size = 0; + uint16_t type, devctl, lnksta; + uint32_t devcap, lnkcap; + + version = pci_get_byte(pci_dev->config + pos + PCI_EXP_FLAGS); + version &= PCI_EXP_FLAGS_VERS; + if (version == 1) { + size = 0x14; + } else if (version == 2) { + /* + * Check for non-std size, accept reduced size to 0x34, + * which is what bcm5761 implemented, violating the + * PCIe v3.0 spec that regs should exist and be read as 0, + * not optionally provided and shorten the struct size. + */ + size = MIN(0x3c, PCI_CONFIG_SPACE_SIZE - pos); + if (size < 0x34) { + error_setg(errp, "Invalid size PCIe cap-id 0x%x", + PCI_CAP_ID_EXP); + return -EINVAL; + } else if (size != 0x3c) { + error_report("WARNING, %s: PCIe cap-id 0x%x has " + "non-standard size 0x%x; std size should be 0x3c", + __func__, PCI_CAP_ID_EXP, size); + } + } else if (version == 0) { + uint16_t vid, did; + vid = pci_get_word(pci_dev->config + PCI_VENDOR_ID); + did = pci_get_word(pci_dev->config + PCI_DEVICE_ID); + if (vid == PCI_VENDOR_ID_INTEL && did == 0x10ed) { + /* + * quirk for Intel 82599 VF with invalid PCIe capability + * version, should really be version 2 (same as PF) + */ + size = 0x3c; + } + } + + if (size == 0) { + error_setg(errp, "Unsupported PCI express capability version %d", + version); + return -EINVAL; + } + + ret = pci_add_capability2(pci_dev, PCI_CAP_ID_EXP, pos, size, + &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + return ret; + } + + assigned_dev_setup_cap_read(dev, pos, size); + + type = pci_get_word(pci_dev->config + pos + PCI_EXP_FLAGS); + type = (type & PCI_EXP_FLAGS_TYPE) >> 4; + if (type != PCI_EXP_TYPE_ENDPOINT && + type != PCI_EXP_TYPE_LEG_END && type != PCI_EXP_TYPE_RC_END) { + error_setg(errp, "Device assignment only supports endpoint " + "assignment, device type %d", type); + return -EINVAL; + } + + /* capabilities, pass existing read-only copy + * PCI_EXP_FLAGS_IRQ: updated by hardware, should be direct read */ + + /* device capabilities: hide FLR */ + devcap = pci_get_long(pci_dev->config + pos + PCI_EXP_DEVCAP); + devcap &= ~PCI_EXP_DEVCAP_FLR; + pci_set_long(pci_dev->config + pos + PCI_EXP_DEVCAP, devcap); + + /* device control: clear all error reporting enable bits, leaving + * only a few host values. Note, these are + * all writable, but not passed to hw. + */ + devctl = pci_get_word(pci_dev->config + pos + PCI_EXP_DEVCTL); + devctl = (devctl & (PCI_EXP_DEVCTL_READRQ | PCI_EXP_DEVCTL_PAYLOAD)) | + PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN; + pci_set_word(pci_dev->config + pos + PCI_EXP_DEVCTL, devctl); + devctl = PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_AUX_PME; + pci_set_word(pci_dev->wmask + pos + PCI_EXP_DEVCTL, ~devctl); + + /* Clear device status */ + pci_set_word(pci_dev->config + pos + PCI_EXP_DEVSTA, 0); + + /* Link capabilities, expose links and latencues, clear reporting */ + lnkcap = pci_get_long(pci_dev->config + pos + PCI_EXP_LNKCAP); + lnkcap &= (PCI_EXP_LNKCAP_SLS | PCI_EXP_LNKCAP_MLW | + PCI_EXP_LNKCAP_ASPMS | PCI_EXP_LNKCAP_L0SEL | + PCI_EXP_LNKCAP_L1EL); + pci_set_long(pci_dev->config + pos + PCI_EXP_LNKCAP, lnkcap); + + /* Link control, pass existing read-only copy. Should be writable? */ + + /* Link status, only expose current speed and width */ + lnksta = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKSTA); + lnksta &= (PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW); + pci_set_word(pci_dev->config + pos + PCI_EXP_LNKSTA, lnksta); + + if (version >= 2) { + /* Slot capabilities, control, status - not needed for endpoints */ + pci_set_long(pci_dev->config + pos + PCI_EXP_SLTCAP, 0); + pci_set_word(pci_dev->config + pos + PCI_EXP_SLTCTL, 0); + pci_set_word(pci_dev->config + pos + PCI_EXP_SLTSTA, 0); + + /* Root control, capabilities, status - not needed for endpoints */ + pci_set_word(pci_dev->config + pos + PCI_EXP_RTCTL, 0); + pci_set_word(pci_dev->config + pos + PCI_EXP_RTCAP, 0); + pci_set_long(pci_dev->config + pos + PCI_EXP_RTSTA, 0); + + /* Device capabilities/control 2, pass existing read-only copy */ + /* Link control 2, pass existing read-only copy */ + } + } + + pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PCIX, 0); + if (pos) { + uint16_t cmd; + uint32_t status; + + /* Only expose the minimum, 8 byte capability */ + ret = pci_add_capability2(pci_dev, PCI_CAP_ID_PCIX, pos, 8, + &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + return ret; + } + + assigned_dev_setup_cap_read(dev, pos, 8); + + /* Command register, clear upper bits, including extended modes */ + cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD); + cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ | + PCI_X_CMD_MAX_SPLIT); + pci_set_word(pci_dev->config + pos + PCI_X_CMD, cmd); + + /* Status register, update with emulated PCI bus location, clear + * error bits, leave the rest. */ + status = pci_get_long(pci_dev->config + pos + PCI_X_STATUS); + status &= ~(PCI_X_STATUS_BUS | PCI_X_STATUS_DEVFN); + status |= (pci_bus_num(pci_dev->bus) << 8) | pci_dev->devfn; + status &= ~(PCI_X_STATUS_SPL_DISC | PCI_X_STATUS_UNX_SPL | + PCI_X_STATUS_SPL_ERR); + pci_set_long(pci_dev->config + pos + PCI_X_STATUS, status); + } + + pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VPD, 0); + if (pos) { + /* Direct R/W passthrough */ + ret = pci_add_capability2(pci_dev, PCI_CAP_ID_VPD, pos, 8, + &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + return ret; + } + + assigned_dev_setup_cap_read(dev, pos, 8); + + /* direct write for cap content */ + assigned_dev_direct_config_write(dev, pos + 2, 6); + } + + /* Devices can have multiple vendor capabilities, get them all */ + for (pos = 0; (pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR, pos)); + pos += PCI_CAP_LIST_NEXT) { + uint8_t len = pci_get_byte(pci_dev->config + pos + PCI_CAP_FLAGS); + /* Direct R/W passthrough */ + ret = pci_add_capability2(pci_dev, PCI_CAP_ID_VNDR, pos, len, + &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + return ret; + } + + assigned_dev_setup_cap_read(dev, pos, len); + + /* direct write for cap content */ + assigned_dev_direct_config_write(dev, pos + 2, len - 2); + } + + /* If real and virtual capability list status bits differ, virtualize the + * access. */ + if ((pci_get_word(pci_dev->config + PCI_STATUS) & PCI_STATUS_CAP_LIST) != + (assigned_dev_pci_read_byte(pci_dev, PCI_STATUS) & + PCI_STATUS_CAP_LIST)) { + dev->emulate_config_read[PCI_STATUS] |= PCI_STATUS_CAP_LIST; + } + + return 0; +} + +static uint64_t +assigned_dev_msix_mmio_read(void *opaque, hwaddr addr, + unsigned size) +{ + AssignedDevice *adev = opaque; + uint64_t val; + + memcpy(&val, (void *)((uint8_t *)adev->msix_table + addr), size); + + return val; +} + +static void assigned_dev_msix_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + AssignedDevice *adev = opaque; + PCIDevice *pdev = &adev->dev; + uint16_t ctrl; + MSIXTableEntry orig; + int i = addr >> 4; + + if (i >= adev->msix_max) { + return; /* Drop write */ + } + + ctrl = pci_get_word(pdev->config + pdev->msix_cap + PCI_MSIX_FLAGS); + + DEBUG("write to MSI-X table offset 0x%lx, val 0x%lx\n", addr, val); + + if (ctrl & PCI_MSIX_FLAGS_ENABLE) { + orig = adev->msix_table[i]; + } + + memcpy((uint8_t *)adev->msix_table + addr, &val, size); + + if (ctrl & PCI_MSIX_FLAGS_ENABLE) { + MSIXTableEntry *entry = &adev->msix_table[i]; + + if (!assigned_dev_msix_masked(&orig) && + assigned_dev_msix_masked(entry)) { + /* + * Vector masked, disable it + * + * XXX It's not clear if we can or should actually attempt + * to mask or disable the interrupt. KVM doesn't have + * support for pending bits and kvm_assign_set_msix_entry + * doesn't modify the device hardware mask. Interrupts + * while masked are simply not injected to the guest, so + * are lost. Can we get away with always injecting an + * interrupt on unmask? + */ + } else if (assigned_dev_msix_masked(&orig) && + !assigned_dev_msix_masked(entry)) { + /* Vector unmasked */ + if (i >= adev->msi_virq_nr || adev->msi_virq[i] < 0) { + /* Previously unassigned vector, start from scratch */ + assigned_dev_update_msix(pdev); + return; + } else { + /* Update an existing, previously masked vector */ + MSIMessage msg; + int ret; + + msg.address = entry->addr_lo | + ((uint64_t)entry->addr_hi << 32); + msg.data = entry->data; + + ret = kvm_irqchip_update_msi_route(kvm_state, + adev->msi_virq[i], msg); + if (ret) { + error_report("Error updating irq routing entry (%d)", ret); + } + } + } + } +} + +static const MemoryRegionOps assigned_dev_msix_mmio_ops = { + .read = assigned_dev_msix_mmio_read, + .write = assigned_dev_msix_mmio_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 4, + .max_access_size = 8, + }, +}; + +static void assigned_dev_msix_reset(AssignedDevice *dev) +{ + MSIXTableEntry *entry; + int i; + + if (!dev->msix_table) { + return; + } + + memset(dev->msix_table, 0, MSIX_PAGE_SIZE); + + for (i = 0, entry = dev->msix_table; i < dev->msix_max; i++, entry++) { + entry->ctrl = cpu_to_le32(0x1); /* Masked */ + } +} + +static void assigned_dev_register_msix_mmio(AssignedDevice *dev, Error **errp) +{ + dev->msix_table = mmap(NULL, MSIX_PAGE_SIZE, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, 0, 0); + if (dev->msix_table == MAP_FAILED) { + error_setg_errno(errp, errno, "failed to allocate msix_table"); + dev->msix_table = NULL; + return; + } + + assigned_dev_msix_reset(dev); + + memory_region_init_io(&dev->mmio, OBJECT(dev), &assigned_dev_msix_mmio_ops, + dev, "assigned-dev-msix", MSIX_PAGE_SIZE); +} + +static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) +{ + if (!dev->msix_table) { + return; + } + + if (munmap(dev->msix_table, MSIX_PAGE_SIZE) == -1) { + error_report("error unmapping msix_table! %s", strerror(errno)); + } + dev->msix_table = NULL; +} + +static const VMStateDescription vmstate_assigned_device = { + .name = "pci-assign", + .unmigratable = 1, +}; + +static void reset_assigned_device(DeviceState *dev) +{ + PCIDevice *pci_dev = PCI_DEVICE(dev); + AssignedDevice *adev = PCI_ASSIGN(pci_dev); + char reset_file[64]; + const char reset[] = "1"; + int fd, ret; + + /* + * If a guest is reset without being shutdown, MSI/MSI-X can still + * be running. We want to return the device to a known state on + * reset, so disable those here. We especially do not want MSI-X + * enabled since it lives in MMIO space, which is about to get + * disabled. + */ + if (adev->assigned_irq_type == ASSIGNED_IRQ_MSIX) { + uint16_t ctrl = pci_get_word(pci_dev->config + + pci_dev->msix_cap + PCI_MSIX_FLAGS); + + pci_set_word(pci_dev->config + pci_dev->msix_cap + PCI_MSIX_FLAGS, + ctrl & ~PCI_MSIX_FLAGS_ENABLE); + assigned_dev_update_msix(pci_dev); + } else if (adev->assigned_irq_type == ASSIGNED_IRQ_MSI) { + uint8_t ctrl = pci_get_byte(pci_dev->config + + pci_dev->msi_cap + PCI_MSI_FLAGS); + + pci_set_byte(pci_dev->config + pci_dev->msi_cap + PCI_MSI_FLAGS, + ctrl & ~PCI_MSI_FLAGS_ENABLE); + assigned_dev_update_msi(pci_dev); + } + + snprintf(reset_file, sizeof(reset_file), + "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/reset", + adev->host.domain, adev->host.bus, adev->host.slot, + adev->host.function); + + /* + * Issue a device reset via pci-sysfs. Note that we use write(2) here + * and ignore the return value because some kernels have a bug that + * returns 0 rather than bytes written on success, sending us into an + * infinite retry loop using other write mechanisms. + */ + fd = open(reset_file, O_WRONLY); + if (fd != -1) { + ret = write(fd, reset, strlen(reset)); + (void)ret; + close(fd); + } + + /* + * When a 0 is written to the bus master register, the device is logically + * disconnected from the PCI bus. This avoids further DMA transfers. + */ + assigned_dev_pci_write_config(pci_dev, PCI_COMMAND, 0, 1); +} + +static void assigned_realize(struct PCIDevice *pci_dev, Error **errp) +{ + AssignedDevice *dev = PCI_ASSIGN(pci_dev); + uint8_t e_intx; + int r; + Error *local_err = NULL; + + if (!kvm_enabled()) { + error_setg(&local_err, "pci-assign requires KVM support"); + goto exit_with_error; + } + + if (!dev->host.domain && !dev->host.bus && !dev->host.slot && + !dev->host.function) { + error_setg(&local_err, "no host device specified"); + goto exit_with_error; + } + + /* + * Set up basic config space access control. Will be further refined during + * device initialization. + */ + assigned_dev_emulate_config_read(dev, 0, PCI_CONFIG_SPACE_SIZE); + assigned_dev_direct_config_read(dev, PCI_STATUS, 2); + assigned_dev_direct_config_read(dev, PCI_REVISION_ID, 1); + assigned_dev_direct_config_read(dev, PCI_CLASS_PROG, 3); + assigned_dev_direct_config_read(dev, PCI_CACHE_LINE_SIZE, 1); + assigned_dev_direct_config_read(dev, PCI_LATENCY_TIMER, 1); + assigned_dev_direct_config_read(dev, PCI_BIST, 1); + assigned_dev_direct_config_read(dev, PCI_CARDBUS_CIS, 4); + assigned_dev_direct_config_read(dev, PCI_SUBSYSTEM_VENDOR_ID, 2); + assigned_dev_direct_config_read(dev, PCI_SUBSYSTEM_ID, 2); + assigned_dev_direct_config_read(dev, PCI_CAPABILITY_LIST + 1, 7); + assigned_dev_direct_config_read(dev, PCI_MIN_GNT, 1); + assigned_dev_direct_config_read(dev, PCI_MAX_LAT, 1); + memcpy(dev->emulate_config_write, dev->emulate_config_read, + sizeof(dev->emulate_config_read)); + + get_real_device(dev, &local_err); + if (local_err) { + goto out; + } + + if (assigned_device_pci_cap_init(pci_dev, &local_err) < 0) { + goto out; + } + + /* intercept MSI-X entry page in the MMIO */ + if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) { + assigned_dev_register_msix_mmio(dev, &local_err); + if (local_err) { + goto out; + } + } + + /* handle real device's MMIO/PIO BARs */ + assigned_dev_register_regions(dev->real_device.regions, + dev->real_device.region_number, dev, + &local_err); + if (local_err) { + goto out; + } + + /* handle interrupt routing */ + e_intx = dev->dev.config[PCI_INTERRUPT_PIN] - 1; + dev->intpin = e_intx; + dev->intx_route.mode = PCI_INTX_DISABLED; + dev->intx_route.irq = -1; + + /* assign device to guest */ + assign_device(dev, &local_err); + if (local_err) { + goto out; + } + + /* assign legacy INTx to the device */ + r = assign_intx(dev, &local_err); + if (r < 0) { + goto assigned_out; + } + + assigned_dev_load_option_rom(dev); + + return; + +assigned_out: + deassign_device(dev); + +out: + free_assigned_device(dev); + +exit_with_error: + assert(local_err); + error_propagate(errp, local_err); +} + +static void assigned_exitfn(struct PCIDevice *pci_dev) +{ + AssignedDevice *dev = PCI_ASSIGN(pci_dev); + + deassign_device(dev); + free_assigned_device(dev); +} + +static void assigned_dev_instance_init(Object *obj) +{ + PCIDevice *pci_dev = PCI_DEVICE(obj); + AssignedDevice *d = PCI_ASSIGN(pci_dev); + + device_add_bootindex_property(obj, &d->bootindex, + "bootindex", NULL, + &pci_dev->qdev, NULL); +} + +static Property assigned_dev_properties[] = { + DEFINE_PROP_PCI_HOST_DEVADDR("host", AssignedDevice, host), + DEFINE_PROP_BIT("prefer_msi", AssignedDevice, features, + ASSIGNED_DEVICE_PREFER_MSI_BIT, false), + DEFINE_PROP_BIT("share_intx", AssignedDevice, features, + ASSIGNED_DEVICE_SHARE_INTX_BIT, true), + DEFINE_PROP_STRING("configfd", AssignedDevice, configfd_name), + DEFINE_PROP_END_OF_LIST(), +}; + +static void assign_class_init(ObjectClass *klass, void *data) +{ + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + k->realize = assigned_realize; + k->exit = assigned_exitfn; + k->config_read = assigned_dev_pci_read_config; + k->config_write = assigned_dev_pci_write_config; + dc->props = assigned_dev_properties; + dc->vmsd = &vmstate_assigned_device; + dc->reset = reset_assigned_device; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + dc->desc = "KVM-based PCI passthrough"; +} + +static const TypeInfo assign_info = { + .name = TYPE_PCI_ASSIGN, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(AssignedDevice), + .class_init = assign_class_init, + .instance_init = assigned_dev_instance_init, +}; + +static void assign_register_types(void) +{ + type_register_static(&assign_info); +} + +type_init(assign_register_types) + +/* + * Scan the assigned devices for the devices that have an option ROM, and then + * load the corresponding ROM data to RAM. If an error occurs while loading an + * option ROM, we just ignore that option ROM and continue with the next one. + */ +static void assigned_dev_load_option_rom(AssignedDevice *dev) +{ + char name[32], rom_file[64]; + FILE *fp; + uint8_t val; + struct stat st; + void *ptr; + + /* If loading ROM from file, pci handles it */ + if (dev->dev.romfile || !dev->dev.rom_bar) { + return; + } + + snprintf(rom_file, sizeof(rom_file), + "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom", + dev->host.domain, dev->host.bus, dev->host.slot, + dev->host.function); + + if (stat(rom_file, &st)) { + return; + } + + if (access(rom_file, F_OK)) { + error_report("pci-assign: Insufficient privileges for %s", rom_file); + return; + } + + /* Write "1" to the ROM file to enable it */ + fp = fopen(rom_file, "r+"); + if (fp == NULL) { + return; + } + val = 1; + if (fwrite(&val, 1, 1, fp) != 1) { + goto close_rom; + } + fseek(fp, 0, SEEK_SET); + + snprintf(name, sizeof(name), "%s.rom", + object_get_typename(OBJECT(dev))); + memory_region_init_ram(&dev->dev.rom, OBJECT(dev), name, st.st_size, + &error_abort); + vmstate_register_ram(&dev->dev.rom, &dev->dev.qdev); + ptr = memory_region_get_ram_ptr(&dev->dev.rom); + memset(ptr, 0xff, st.st_size); + + if (!fread(ptr, 1, st.st_size, fp)) { + error_report("pci-assign: Cannot read from host %s", rom_file); + error_printf("Device option ROM contents are probably invalid " + "(check dmesg).\nSkip option ROM probe with rombar=0, " + "or load from file with romfile=\n"); + goto close_rom; + } + + pci_register_bar(&dev->dev, PCI_ROM_SLOT, 0, &dev->dev.rom); + dev->dev.has_rom = true; +close_rom: + /* Write "0" to disable ROM */ + fseek(fp, 0, SEEK_SET); + val = 0; + if (!fwrite(&val, 1, 1, fp)) { + DEBUG("%s\n", "Failed to disable pci-sysfs rom file"); + } + fclose(fp); +} diff --git a/qemu/hw/i386/kvmvapic.c b/qemu/hw/i386/kvmvapic.c new file mode 100644 index 000000000..c6d34b254 --- /dev/null +++ b/qemu/hw/i386/kvmvapic.c @@ -0,0 +1,860 @@ +/* + * TPR optimization for 32-bit Windows guests (XP and Server 2003) + * + * Copyright (C) 2007-2008 Qumranet Technologies + * Copyright (C) 2012 Jan Kiszka, Siemens AG + * + * This work is licensed under the terms of the GNU GPL version 2, or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ +#include "sysemu/sysemu.h" +#include "sysemu/cpus.h" +#include "sysemu/kvm.h" +#include "hw/i386/apic_internal.h" +#include "hw/sysbus.h" + +#define VAPIC_IO_PORT 0x7e + +#define VAPIC_CPU_SHIFT 7 + +#define ROM_BLOCK_SIZE 512 +#define ROM_BLOCK_MASK (~(ROM_BLOCK_SIZE - 1)) + +typedef enum VAPICMode { + VAPIC_INACTIVE = 0, + VAPIC_ACTIVE = 1, + VAPIC_STANDBY = 2, +} VAPICMode; + +typedef struct VAPICHandlers { + uint32_t set_tpr; + uint32_t set_tpr_eax; + uint32_t get_tpr[8]; + uint32_t get_tpr_stack; +} QEMU_PACKED VAPICHandlers; + +typedef struct GuestROMState { + char signature[8]; + uint32_t vaddr; + uint32_t fixup_start; + uint32_t fixup_end; + uint32_t vapic_vaddr; + uint32_t vapic_size; + uint32_t vcpu_shift; + uint32_t real_tpr_addr; + VAPICHandlers up; + VAPICHandlers mp; +} QEMU_PACKED GuestROMState; + +typedef struct VAPICROMState { + SysBusDevice busdev; + MemoryRegion io; + MemoryRegion rom; + uint32_t state; + uint32_t rom_state_paddr; + uint32_t rom_state_vaddr; + uint32_t vapic_paddr; + uint32_t real_tpr_addr; + GuestROMState rom_state; + size_t rom_size; + bool rom_mapped_writable; + VMChangeStateEntry *vmsentry; +} VAPICROMState; + +#define TYPE_VAPIC "kvmvapic" +#define VAPIC(obj) OBJECT_CHECK(VAPICROMState, (obj), TYPE_VAPIC) + +#define TPR_INSTR_ABS_MODRM 0x1 +#define TPR_INSTR_MATCH_MODRM_REG 0x2 + +typedef struct TPRInstruction { + uint8_t opcode; + uint8_t modrm_reg; + unsigned int flags; + TPRAccess access; + size_t length; + off_t addr_offset; +} TPRInstruction; + +/* must be sorted by length, shortest first */ +static const TPRInstruction tpr_instr[] = { + { /* mov abs to eax */ + .opcode = 0xa1, + .access = TPR_ACCESS_READ, + .length = 5, + .addr_offset = 1, + }, + { /* mov eax to abs */ + .opcode = 0xa3, + .access = TPR_ACCESS_WRITE, + .length = 5, + .addr_offset = 1, + }, + { /* mov r32 to r/m32 */ + .opcode = 0x89, + .flags = TPR_INSTR_ABS_MODRM, + .access = TPR_ACCESS_WRITE, + .length = 6, + .addr_offset = 2, + }, + { /* mov r/m32 to r32 */ + .opcode = 0x8b, + .flags = TPR_INSTR_ABS_MODRM, + .access = TPR_ACCESS_READ, + .length = 6, + .addr_offset = 2, + }, + { /* push r/m32 */ + .opcode = 0xff, + .modrm_reg = 6, + .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG, + .access = TPR_ACCESS_READ, + .length = 6, + .addr_offset = 2, + }, + { /* mov imm32, r/m32 (c7/0) */ + .opcode = 0xc7, + .modrm_reg = 0, + .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG, + .access = TPR_ACCESS_WRITE, + .length = 10, + .addr_offset = 2, + }, +}; + +static void read_guest_rom_state(VAPICROMState *s) +{ + cpu_physical_memory_read(s->rom_state_paddr, &s->rom_state, + sizeof(GuestROMState)); +} + +static void write_guest_rom_state(VAPICROMState *s) +{ + cpu_physical_memory_write(s->rom_state_paddr, &s->rom_state, + sizeof(GuestROMState)); +} + +static void update_guest_rom_state(VAPICROMState *s) +{ + read_guest_rom_state(s); + + s->rom_state.real_tpr_addr = cpu_to_le32(s->real_tpr_addr); + s->rom_state.vcpu_shift = cpu_to_le32(VAPIC_CPU_SHIFT); + + write_guest_rom_state(s); +} + +static int find_real_tpr_addr(VAPICROMState *s, CPUX86State *env) +{ + CPUState *cs = CPU(x86_env_get_cpu(env)); + hwaddr paddr; + target_ulong addr; + + if (s->state == VAPIC_ACTIVE) { + return 0; + } + /* + * If there is no prior TPR access instruction we could analyze (which is + * the case after resume from hibernation), we need to scan the possible + * virtual address space for the APIC mapping. + */ + for (addr = 0xfffff000; addr >= 0x80000000; addr -= TARGET_PAGE_SIZE) { + paddr = cpu_get_phys_page_debug(cs, addr); + if (paddr != APIC_DEFAULT_ADDRESS) { + continue; + } + s->real_tpr_addr = addr + 0x80; + update_guest_rom_state(s); + return 0; + } + return -1; +} + +static uint8_t modrm_reg(uint8_t modrm) +{ + return (modrm >> 3) & 7; +} + +static bool is_abs_modrm(uint8_t modrm) +{ + return (modrm & 0xc7) == 0x05; +} + +static bool opcode_matches(uint8_t *opcode, const TPRInstruction *instr) +{ + return opcode[0] == instr->opcode && + (!(instr->flags & TPR_INSTR_ABS_MODRM) || is_abs_modrm(opcode[1])) && + (!(instr->flags & TPR_INSTR_MATCH_MODRM_REG) || + modrm_reg(opcode[1]) == instr->modrm_reg); +} + +static int evaluate_tpr_instruction(VAPICROMState *s, X86CPU *cpu, + target_ulong *pip, TPRAccess access) +{ + CPUState *cs = CPU(cpu); + const TPRInstruction *instr; + target_ulong ip = *pip; + uint8_t opcode[2]; + uint32_t real_tpr_addr; + int i; + + if ((ip & 0xf0000000ULL) != 0x80000000ULL && + (ip & 0xf0000000ULL) != 0xe0000000ULL) { + return -1; + } + + /* + * Early Windows 2003 SMP initialization contains a + * + * mov imm32, r/m32 + * + * instruction that is patched by TPR optimization. The problem is that + * RSP, used by the patched instruction, is zero, so the guest gets a + * double fault and dies. + */ + if (cpu->env.regs[R_ESP] == 0) { + return -1; + } + + if (kvm_enabled() && !kvm_irqchip_in_kernel()) { + /* + * KVM without kernel-based TPR access reporting will pass an IP that + * points after the accessing instruction. So we need to look backward + * to find the reason. + */ + for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) { + instr = &tpr_instr[i]; + if (instr->access != access) { + continue; + } + if (cpu_memory_rw_debug(cs, ip - instr->length, opcode, + sizeof(opcode), 0) < 0) { + return -1; + } + if (opcode_matches(opcode, instr)) { + ip -= instr->length; + goto instruction_ok; + } + } + return -1; + } else { + if (cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0) < 0) { + return -1; + } + for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) { + instr = &tpr_instr[i]; + if (opcode_matches(opcode, instr)) { + goto instruction_ok; + } + } + return -1; + } + +instruction_ok: + /* + * Grab the virtual TPR address from the instruction + * and update the cached values. + */ + if (cpu_memory_rw_debug(cs, ip + instr->addr_offset, + (void *)&real_tpr_addr, + sizeof(real_tpr_addr), 0) < 0) { + return -1; + } + real_tpr_addr = le32_to_cpu(real_tpr_addr); + if ((real_tpr_addr & 0xfff) != 0x80) { + return -1; + } + s->real_tpr_addr = real_tpr_addr; + update_guest_rom_state(s); + + *pip = ip; + return 0; +} + +static int update_rom_mapping(VAPICROMState *s, CPUX86State *env, target_ulong ip) +{ + CPUState *cs = CPU(x86_env_get_cpu(env)); + hwaddr paddr; + uint32_t rom_state_vaddr; + uint32_t pos, patch, offset; + + /* nothing to do if already activated */ + if (s->state == VAPIC_ACTIVE) { + return 0; + } + + /* bail out if ROM init code was not executed (missing ROM?) */ + if (s->state == VAPIC_INACTIVE) { + return -1; + } + + /* find out virtual address of the ROM */ + rom_state_vaddr = s->rom_state_paddr + (ip & 0xf0000000); + paddr = cpu_get_phys_page_debug(cs, rom_state_vaddr); + if (paddr == -1) { + return -1; + } + paddr += rom_state_vaddr & ~TARGET_PAGE_MASK; + if (paddr != s->rom_state_paddr) { + return -1; + } + read_guest_rom_state(s); + if (memcmp(s->rom_state.signature, "kvm aPiC", 8) != 0) { + return -1; + } + s->rom_state_vaddr = rom_state_vaddr; + + /* fixup addresses in ROM if needed */ + if (rom_state_vaddr == le32_to_cpu(s->rom_state.vaddr)) { + return 0; + } + for (pos = le32_to_cpu(s->rom_state.fixup_start); + pos < le32_to_cpu(s->rom_state.fixup_end); + pos += 4) { + cpu_physical_memory_read(paddr + pos - s->rom_state.vaddr, + &offset, sizeof(offset)); + offset = le32_to_cpu(offset); + cpu_physical_memory_read(paddr + offset, &patch, sizeof(patch)); + patch = le32_to_cpu(patch); + patch += rom_state_vaddr - le32_to_cpu(s->rom_state.vaddr); + patch = cpu_to_le32(patch); + cpu_physical_memory_write(paddr + offset, &patch, sizeof(patch)); + } + read_guest_rom_state(s); + s->vapic_paddr = paddr + le32_to_cpu(s->rom_state.vapic_vaddr) - + le32_to_cpu(s->rom_state.vaddr); + + return 0; +} + +/* + * Tries to read the unique processor number from the Kernel Processor Control + * Region (KPCR) of 32-bit Windows XP and Server 2003. Returns -1 if the KPCR + * cannot be accessed or is considered invalid. This also ensures that we are + * not patching the wrong guest. + */ +static int get_kpcr_number(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + struct kpcr { + uint8_t fill1[0x1c]; + uint32_t self; + uint8_t fill2[0x31]; + uint8_t number; + } QEMU_PACKED kpcr; + + if (cpu_memory_rw_debug(CPU(cpu), env->segs[R_FS].base, + (void *)&kpcr, sizeof(kpcr), 0) < 0 || + kpcr.self != env->segs[R_FS].base) { + return -1; + } + return kpcr.number; +} + +static int vapic_enable(VAPICROMState *s, X86CPU *cpu) +{ + int cpu_number = get_kpcr_number(cpu); + hwaddr vapic_paddr; + static const uint8_t enabled = 1; + + if (cpu_number < 0) { + return -1; + } + vapic_paddr = s->vapic_paddr + + (((hwaddr)cpu_number) << VAPIC_CPU_SHIFT); + cpu_physical_memory_write(vapic_paddr + offsetof(VAPICState, enabled), + &enabled, sizeof(enabled)); + apic_enable_vapic(cpu->apic_state, vapic_paddr); + + s->state = VAPIC_ACTIVE; + + return 0; +} + +static void patch_byte(X86CPU *cpu, target_ulong addr, uint8_t byte) +{ + cpu_memory_rw_debug(CPU(cpu), addr, &byte, 1, 1); +} + +static void patch_call(VAPICROMState *s, X86CPU *cpu, target_ulong ip, + uint32_t target) +{ + uint32_t offset; + + offset = cpu_to_le32(target - ip - 5); + patch_byte(cpu, ip, 0xe8); /* call near */ + cpu_memory_rw_debug(CPU(cpu), ip + 1, (void *)&offset, sizeof(offset), 1); +} + +static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip) +{ + CPUState *cs = CPU(cpu); + CPUX86State *env = &cpu->env; + VAPICHandlers *handlers; + uint8_t opcode[2]; + uint32_t imm32; + target_ulong current_pc = 0; + target_ulong current_cs_base = 0; + int current_flags = 0; + + if (smp_cpus == 1) { + handlers = &s->rom_state.up; + } else { + handlers = &s->rom_state.mp; + } + + if (!kvm_enabled()) { + cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base, + ¤t_flags); + } + + pause_all_vcpus(); + + cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0); + + switch (opcode[0]) { + case 0x89: /* mov r32 to r/m32 */ + patch_byte(cpu, ip, 0x50 + modrm_reg(opcode[1])); /* push reg */ + patch_call(s, cpu, ip + 1, handlers->set_tpr); + break; + case 0x8b: /* mov r/m32 to r32 */ + patch_byte(cpu, ip, 0x90); + patch_call(s, cpu, ip + 1, handlers->get_tpr[modrm_reg(opcode[1])]); + break; + case 0xa1: /* mov abs to eax */ + patch_call(s, cpu, ip, handlers->get_tpr[0]); + break; + case 0xa3: /* mov eax to abs */ + patch_call(s, cpu, ip, handlers->set_tpr_eax); + break; + case 0xc7: /* mov imm32, r/m32 (c7/0) */ + patch_byte(cpu, ip, 0x68); /* push imm32 */ + cpu_memory_rw_debug(cs, ip + 6, (void *)&imm32, sizeof(imm32), 0); + cpu_memory_rw_debug(cs, ip + 1, (void *)&imm32, sizeof(imm32), 1); + patch_call(s, cpu, ip + 5, handlers->set_tpr); + break; + case 0xff: /* push r/m32 */ + patch_byte(cpu, ip, 0x50); /* push eax */ + patch_call(s, cpu, ip + 1, handlers->get_tpr_stack); + break; + default: + abort(); + } + + resume_all_vcpus(); + + if (!kvm_enabled()) { + cs->current_tb = NULL; + tb_gen_code(cs, current_pc, current_cs_base, current_flags, 1); + cpu_resume_from_signal(cs, NULL); + } +} + +void vapic_report_tpr_access(DeviceState *dev, CPUState *cs, target_ulong ip, + TPRAccess access) +{ + VAPICROMState *s = VAPIC(dev); + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + cpu_synchronize_state(cs); + + if (evaluate_tpr_instruction(s, cpu, &ip, access) < 0) { + if (s->state == VAPIC_ACTIVE) { + vapic_enable(s, cpu); + } + return; + } + if (update_rom_mapping(s, env, ip) < 0) { + return; + } + if (vapic_enable(s, cpu) < 0) { + return; + } + patch_instruction(s, cpu, ip); +} + +typedef struct VAPICEnableTPRReporting { + DeviceState *apic; + bool enable; +} VAPICEnableTPRReporting; + +static void vapic_do_enable_tpr_reporting(void *data) +{ + VAPICEnableTPRReporting *info = data; + + apic_enable_tpr_access_reporting(info->apic, info->enable); +} + +static void vapic_enable_tpr_reporting(bool enable) +{ + VAPICEnableTPRReporting info = { + .enable = enable, + }; + CPUState *cs; + X86CPU *cpu; + + CPU_FOREACH(cs) { + cpu = X86_CPU(cs); + info.apic = cpu->apic_state; + run_on_cpu(cs, vapic_do_enable_tpr_reporting, &info); + } +} + +static void vapic_reset(DeviceState *dev) +{ + VAPICROMState *s = VAPIC(dev); + + s->state = VAPIC_INACTIVE; + s->rom_state_paddr = 0; + vapic_enable_tpr_reporting(false); +} + +/* + * Set the IRQ polling hypercalls to the supported variant: + * - vmcall if using KVM in-kernel irqchip + * - 32-bit VAPIC port write otherwise + */ +static int patch_hypercalls(VAPICROMState *s) +{ + hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK; + static const uint8_t vmcall_pattern[] = { /* vmcall */ + 0xb8, 0x1, 0, 0, 0, 0xf, 0x1, 0xc1 + }; + static const uint8_t outl_pattern[] = { /* nop; outl %eax,0x7e */ + 0xb8, 0x1, 0, 0, 0, 0x90, 0xe7, 0x7e + }; + uint8_t alternates[2]; + const uint8_t *pattern; + const uint8_t *patch; + int patches = 0; + off_t pos; + uint8_t *rom; + + rom = g_malloc(s->rom_size); + cpu_physical_memory_read(rom_paddr, rom, s->rom_size); + + for (pos = 0; pos < s->rom_size - sizeof(vmcall_pattern); pos++) { + if (kvm_irqchip_in_kernel()) { + pattern = outl_pattern; + alternates[0] = outl_pattern[7]; + alternates[1] = outl_pattern[7]; + patch = &vmcall_pattern[5]; + } else { + pattern = vmcall_pattern; + alternates[0] = vmcall_pattern[7]; + alternates[1] = 0xd9; /* AMD's VMMCALL */ + patch = &outl_pattern[5]; + } + if (memcmp(rom + pos, pattern, 7) == 0 && + (rom[pos + 7] == alternates[0] || rom[pos + 7] == alternates[1])) { + cpu_physical_memory_write(rom_paddr + pos + 5, patch, 3); + /* + * Don't flush the tb here. Under ordinary conditions, the patched + * calls are miles away from the current IP. Under malicious + * conditions, the guest could trick us to crash. + */ + } + } + + g_free(rom); + + if (patches != 0 && patches != 2) { + return -1; + } + + return 0; +} + +/* + * For TCG mode or the time KVM honors read-only memory regions, we need to + * enable write access to the option ROM so that variables can be updated by + * the guest. + */ +static int vapic_map_rom_writable(VAPICROMState *s) +{ + hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK; + MemoryRegionSection section; + MemoryRegion *as; + size_t rom_size; + uint8_t *ram; + + as = sysbus_address_space(&s->busdev); + + if (s->rom_mapped_writable) { + memory_region_del_subregion(as, &s->rom); + object_unparent(OBJECT(&s->rom)); + } + + /* grab RAM memory region (region @rom_paddr may still be pc.rom) */ + section = memory_region_find(as, 0, 1); + + /* read ROM size from RAM region */ + if (rom_paddr + 2 >= memory_region_size(section.mr)) { + return -1; + } + ram = memory_region_get_ram_ptr(section.mr); + rom_size = ram[rom_paddr + 2] * ROM_BLOCK_SIZE; + if (rom_size == 0) { + return -1; + } + s->rom_size = rom_size; + + /* We need to round to avoid creating subpages + * from which we cannot run code. */ + rom_size += rom_paddr & ~TARGET_PAGE_MASK; + rom_paddr &= TARGET_PAGE_MASK; + rom_size = TARGET_PAGE_ALIGN(rom_size); + + memory_region_init_alias(&s->rom, OBJECT(s), "kvmvapic-rom", section.mr, + rom_paddr, rom_size); + memory_region_add_subregion_overlap(as, rom_paddr, &s->rom, 1000); + s->rom_mapped_writable = true; + memory_region_unref(section.mr); + + return 0; +} + +static int vapic_prepare(VAPICROMState *s) +{ + if (vapic_map_rom_writable(s) < 0) { + return -1; + } + + if (patch_hypercalls(s) < 0) { + return -1; + } + + vapic_enable_tpr_reporting(true); + + return 0; +} + +static void vapic_write(void *opaque, hwaddr addr, uint64_t data, + unsigned int size) +{ + CPUState *cs = current_cpu; + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + hwaddr rom_paddr; + VAPICROMState *s = opaque; + + cpu_synchronize_state(cs); + + /* + * The VAPIC supports two PIO-based hypercalls, both via port 0x7E. + * o 16-bit write access: + * Reports the option ROM initialization to the hypervisor. Written + * value is the offset of the state structure in the ROM. + * o 8-bit write access: + * Reactivates the VAPIC after a guest hibernation, i.e. after the + * option ROM content has been re-initialized by a guest power cycle. + * o 32-bit write access: + * Poll for pending IRQs, considering the current VAPIC state. + */ + switch (size) { + case 2: + if (s->state == VAPIC_INACTIVE) { + rom_paddr = (env->segs[R_CS].base + env->eip) & ROM_BLOCK_MASK; + s->rom_state_paddr = rom_paddr + data; + + s->state = VAPIC_STANDBY; + } + if (vapic_prepare(s) < 0) { + s->state = VAPIC_INACTIVE; + s->rom_state_paddr = 0; + break; + } + break; + case 1: + if (kvm_enabled()) { + /* + * Disable triggering instruction in ROM by writing a NOP. + * + * We cannot do this in TCG mode as the reported IP is not + * accurate. + */ + pause_all_vcpus(); + patch_byte(cpu, env->eip - 2, 0x66); + patch_byte(cpu, env->eip - 1, 0x90); + resume_all_vcpus(); + } + + if (s->state == VAPIC_ACTIVE) { + break; + } + if (update_rom_mapping(s, env, env->eip) < 0) { + break; + } + if (find_real_tpr_addr(s, env) < 0) { + break; + } + vapic_enable(s, cpu); + break; + default: + case 4: + if (!kvm_irqchip_in_kernel()) { + apic_poll_irq(cpu->apic_state); + } + break; + } +} + +static uint64_t vapic_read(void *opaque, hwaddr addr, unsigned size) +{ + return 0xffffffff; +} + +static const MemoryRegionOps vapic_ops = { + .write = vapic_write, + .read = vapic_read, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void vapic_realize(DeviceState *dev, Error **errp) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + VAPICROMState *s = VAPIC(dev); + + memory_region_init_io(&s->io, OBJECT(s), &vapic_ops, s, "kvmvapic", 2); + sysbus_add_io(sbd, VAPIC_IO_PORT, &s->io); + sysbus_init_ioports(sbd, VAPIC_IO_PORT, 2); + + option_rom[nb_option_roms].name = "kvmvapic.bin"; + option_rom[nb_option_roms].bootindex = -1; + nb_option_roms++; +} + +static void do_vapic_enable(void *data) +{ + VAPICROMState *s = data; + X86CPU *cpu = X86_CPU(first_cpu); + + static const uint8_t enabled = 1; + cpu_physical_memory_write(s->vapic_paddr + offsetof(VAPICState, enabled), + &enabled, sizeof(enabled)); + apic_enable_vapic(cpu->apic_state, s->vapic_paddr); + s->state = VAPIC_ACTIVE; +} + +static void kvmvapic_vm_state_change(void *opaque, int running, + RunState state) +{ + VAPICROMState *s = opaque; + uint8_t *zero; + + if (!running) { + return; + } + + if (s->state == VAPIC_ACTIVE) { + if (smp_cpus == 1) { + run_on_cpu(first_cpu, do_vapic_enable, s); + } else { + zero = g_malloc0(s->rom_state.vapic_size); + cpu_physical_memory_write(s->vapic_paddr, zero, + s->rom_state.vapic_size); + g_free(zero); + } + } + + qemu_del_vm_change_state_handler(s->vmsentry); +} + +static int vapic_post_load(void *opaque, int version_id) +{ + VAPICROMState *s = opaque; + + /* + * The old implementation of qemu-kvm did not provide the state + * VAPIC_STANDBY. Reconstruct it. + */ + if (s->state == VAPIC_INACTIVE && s->rom_state_paddr != 0) { + s->state = VAPIC_STANDBY; + } + + if (s->state != VAPIC_INACTIVE) { + if (vapic_prepare(s) < 0) { + return -1; + } + } + + if (!s->vmsentry) { + s->vmsentry = + qemu_add_vm_change_state_handler(kvmvapic_vm_state_change, s); + } + return 0; +} + +static const VMStateDescription vmstate_handlers = { + .name = "kvmvapic-handlers", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(set_tpr, VAPICHandlers), + VMSTATE_UINT32(set_tpr_eax, VAPICHandlers), + VMSTATE_UINT32_ARRAY(get_tpr, VAPICHandlers, 8), + VMSTATE_UINT32(get_tpr_stack, VAPICHandlers), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_guest_rom = { + .name = "kvmvapic-guest-rom", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UNUSED(8), /* signature */ + VMSTATE_UINT32(vaddr, GuestROMState), + VMSTATE_UINT32(fixup_start, GuestROMState), + VMSTATE_UINT32(fixup_end, GuestROMState), + VMSTATE_UINT32(vapic_vaddr, GuestROMState), + VMSTATE_UINT32(vapic_size, GuestROMState), + VMSTATE_UINT32(vcpu_shift, GuestROMState), + VMSTATE_UINT32(real_tpr_addr, GuestROMState), + VMSTATE_STRUCT(up, GuestROMState, 0, vmstate_handlers, VAPICHandlers), + VMSTATE_STRUCT(mp, GuestROMState, 0, vmstate_handlers, VAPICHandlers), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_vapic = { + .name = "kvm-tpr-opt", /* compatible with qemu-kvm VAPIC */ + .version_id = 1, + .minimum_version_id = 1, + .post_load = vapic_post_load, + .fields = (VMStateField[]) { + VMSTATE_STRUCT(rom_state, VAPICROMState, 0, vmstate_guest_rom, + GuestROMState), + VMSTATE_UINT32(state, VAPICROMState), + VMSTATE_UINT32(real_tpr_addr, VAPICROMState), + VMSTATE_UINT32(rom_state_vaddr, VAPICROMState), + VMSTATE_UINT32(vapic_paddr, VAPICROMState), + VMSTATE_UINT32(rom_state_paddr, VAPICROMState), + VMSTATE_END_OF_LIST() + } +}; + +static void vapic_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = vapic_reset; + dc->vmsd = &vmstate_vapic; + dc->realize = vapic_realize; +} + +static const TypeInfo vapic_type = { + .name = TYPE_VAPIC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(VAPICROMState), + .class_init = vapic_class_init, +}; + +static void vapic_register(void) +{ + type_register_static(&vapic_type); +} + +type_init(vapic_register); diff --git a/qemu/hw/i386/multiboot.c b/qemu/hw/i386/multiboot.c new file mode 100644 index 000000000..1adbe9e25 --- /dev/null +++ b/qemu/hw/i386/multiboot.c @@ -0,0 +1,371 @@ +/* + * QEMU PC System Emulator + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "hw/hw.h" +#include "hw/nvram/fw_cfg.h" +#include "multiboot.h" +#include "hw/loader.h" +#include "elf.h" +#include "sysemu/sysemu.h" + +/* Show multiboot debug output */ +//#define DEBUG_MULTIBOOT + +#ifdef DEBUG_MULTIBOOT +#define mb_debug(a...) fprintf(stderr, ## a) +#else +#define mb_debug(a...) +#endif + +#define MULTIBOOT_STRUCT_ADDR 0x9000 + +#if MULTIBOOT_STRUCT_ADDR > 0xf0000 +#error multiboot struct needs to fit in 16 bit real mode +#endif + +enum { + /* Multiboot info */ + MBI_FLAGS = 0, + MBI_MEM_LOWER = 4, + MBI_MEM_UPPER = 8, + MBI_BOOT_DEVICE = 12, + MBI_CMDLINE = 16, + MBI_MODS_COUNT = 20, + MBI_MODS_ADDR = 24, + MBI_MMAP_ADDR = 48, + MBI_BOOTLOADER = 64, + + MBI_SIZE = 88, + + /* Multiboot modules */ + MB_MOD_START = 0, + MB_MOD_END = 4, + MB_MOD_CMDLINE = 8, + + MB_MOD_SIZE = 16, + + /* Region offsets */ + ADDR_E820_MAP = MULTIBOOT_STRUCT_ADDR + 0, + ADDR_MBI = ADDR_E820_MAP + 0x500, + + /* Multiboot flags */ + MULTIBOOT_FLAGS_MEMORY = 1 << 0, + MULTIBOOT_FLAGS_BOOT_DEVICE = 1 << 1, + MULTIBOOT_FLAGS_CMDLINE = 1 << 2, + MULTIBOOT_FLAGS_MODULES = 1 << 3, + MULTIBOOT_FLAGS_MMAP = 1 << 6, + MULTIBOOT_FLAGS_BOOTLOADER = 1 << 9, +}; + +typedef struct { + /* buffer holding kernel, cmdlines and mb_infos */ + void *mb_buf; + /* address in target */ + hwaddr mb_buf_phys; + /* size of mb_buf in bytes */ + unsigned mb_buf_size; + /* offset of mb-info's in bytes */ + hwaddr offset_mbinfo; + /* offset in buffer for cmdlines in bytes */ + hwaddr offset_cmdlines; + /* offset in buffer for bootloader name in bytes */ + hwaddr offset_bootloader; + /* offset of modules in bytes */ + hwaddr offset_mods; + /* available slots for mb modules infos */ + int mb_mods_avail; + /* currently used slots of mb modules */ + int mb_mods_count; +} MultibootState; + +const char *bootloader_name = "qemu"; + +static uint32_t mb_add_cmdline(MultibootState *s, const char *cmdline) +{ + hwaddr p = s->offset_cmdlines; + char *b = (char *)s->mb_buf + p; + + get_opt_value(b, strlen(cmdline) + 1, cmdline); + s->offset_cmdlines += strlen(b) + 1; + return s->mb_buf_phys + p; +} + +static uint32_t mb_add_bootloader(MultibootState *s, const char *bootloader) +{ + hwaddr p = s->offset_bootloader; + char *b = (char *)s->mb_buf + p; + + memcpy(b, bootloader, strlen(bootloader) + 1); + s->offset_bootloader += strlen(b) + 1; + return s->mb_buf_phys + p; +} + +static void mb_add_mod(MultibootState *s, + hwaddr start, hwaddr end, + hwaddr cmdline_phys) +{ + char *p; + assert(s->mb_mods_count < s->mb_mods_avail); + + p = (char *)s->mb_buf + s->offset_mbinfo + MB_MOD_SIZE * s->mb_mods_count; + + stl_p(p + MB_MOD_START, start); + stl_p(p + MB_MOD_END, end); + stl_p(p + MB_MOD_CMDLINE, cmdline_phys); + + mb_debug("mod%02d: "TARGET_FMT_plx" - "TARGET_FMT_plx"\n", + s->mb_mods_count, start, end); + + s->mb_mods_count++; +} + +int load_multiboot(FWCfgState *fw_cfg, + FILE *f, + const char *kernel_filename, + const char *initrd_filename, + const char *kernel_cmdline, + int kernel_file_size, + uint8_t *header) +{ + int i, is_multiboot = 0; + uint32_t flags = 0; + uint32_t mh_entry_addr; + uint32_t mh_load_addr; + uint32_t mb_kernel_size; + MultibootState mbs; + uint8_t bootinfo[MBI_SIZE]; + uint8_t *mb_bootinfo_data; + uint32_t cmdline_len; + + /* Ok, let's see if it is a multiboot image. + The header is 12x32bit long, so the latest entry may be 8192 - 48. */ + for (i = 0; i < (8192 - 48); i += 4) { + if (ldl_p(header+i) == 0x1BADB002) { + uint32_t checksum = ldl_p(header+i+8); + flags = ldl_p(header+i+4); + checksum += flags; + checksum += (uint32_t)0x1BADB002; + if (!checksum) { + is_multiboot = 1; + break; + } + } + } + + if (!is_multiboot) + return 0; /* no multiboot */ + + mb_debug("qemu: I believe we found a multiboot image!\n"); + memset(bootinfo, 0, sizeof(bootinfo)); + memset(&mbs, 0, sizeof(mbs)); + + if (flags & 0x00000004) { /* MULTIBOOT_HEADER_HAS_VBE */ + fprintf(stderr, "qemu: multiboot knows VBE. we don't.\n"); + } + if (!(flags & 0x00010000)) { /* MULTIBOOT_HEADER_HAS_ADDR */ + uint64_t elf_entry; + uint64_t elf_low, elf_high; + int kernel_size; + fclose(f); + + if (((struct elf64_hdr*)header)->e_machine == EM_X86_64) { + fprintf(stderr, "Cannot load x86-64 image, give a 32bit one.\n"); + exit(1); + } + + kernel_size = load_elf(kernel_filename, NULL, NULL, &elf_entry, + &elf_low, &elf_high, 0, ELF_MACHINE, 0); + if (kernel_size < 0) { + fprintf(stderr, "Error while loading elf kernel\n"); + exit(1); + } + mh_load_addr = elf_low; + mb_kernel_size = elf_high - elf_low; + mh_entry_addr = elf_entry; + + mbs.mb_buf = g_malloc(mb_kernel_size); + if (rom_copy(mbs.mb_buf, mh_load_addr, mb_kernel_size) != mb_kernel_size) { + fprintf(stderr, "Error while fetching elf kernel from rom\n"); + exit(1); + } + + mb_debug("qemu: loading multiboot-elf kernel (%#x bytes) with entry %#zx\n", + mb_kernel_size, (size_t)mh_entry_addr); + } else { + /* Valid if mh_flags sets MULTIBOOT_HEADER_HAS_ADDR. */ + uint32_t mh_header_addr = ldl_p(header+i+12); + uint32_t mh_load_end_addr = ldl_p(header+i+20); + uint32_t mh_bss_end_addr = ldl_p(header+i+24); + mh_load_addr = ldl_p(header+i+16); + uint32_t mb_kernel_text_offset = i - (mh_header_addr - mh_load_addr); + uint32_t mb_load_size = 0; + mh_entry_addr = ldl_p(header+i+28); + + if (mh_load_end_addr) { + mb_kernel_size = mh_bss_end_addr - mh_load_addr; + mb_load_size = mh_load_end_addr - mh_load_addr; + } else { + mb_kernel_size = kernel_file_size - mb_kernel_text_offset; + mb_load_size = mb_kernel_size; + } + + /* Valid if mh_flags sets MULTIBOOT_HEADER_HAS_VBE. + uint32_t mh_mode_type = ldl_p(header+i+32); + uint32_t mh_width = ldl_p(header+i+36); + uint32_t mh_height = ldl_p(header+i+40); + uint32_t mh_depth = ldl_p(header+i+44); */ + + mb_debug("multiboot: mh_header_addr = %#x\n", mh_header_addr); + mb_debug("multiboot: mh_load_addr = %#x\n", mh_load_addr); + mb_debug("multiboot: mh_load_end_addr = %#x\n", mh_load_end_addr); + mb_debug("multiboot: mh_bss_end_addr = %#x\n", mh_bss_end_addr); + mb_debug("qemu: loading multiboot kernel (%#x bytes) at %#x\n", + mb_load_size, mh_load_addr); + + mbs.mb_buf = g_malloc(mb_kernel_size); + fseek(f, mb_kernel_text_offset, SEEK_SET); + if (fread(mbs.mb_buf, 1, mb_load_size, f) != mb_load_size) { + fprintf(stderr, "fread() failed\n"); + exit(1); + } + memset(mbs.mb_buf + mb_load_size, 0, mb_kernel_size - mb_load_size); + fclose(f); + } + + mbs.mb_buf_phys = mh_load_addr; + + mbs.mb_buf_size = TARGET_PAGE_ALIGN(mb_kernel_size); + mbs.offset_mbinfo = mbs.mb_buf_size; + + /* Calculate space for cmdlines, bootloader name, and mb_mods */ + cmdline_len = strlen(kernel_filename) + 1; + cmdline_len += strlen(kernel_cmdline) + 1; + if (initrd_filename) { + const char *r = initrd_filename; + cmdline_len += strlen(r) + 1; + mbs.mb_mods_avail = 1; + while (*(r = get_opt_value(NULL, 0, r))) { + mbs.mb_mods_avail++; + r++; + } + } + + mbs.mb_buf_size += cmdline_len; + mbs.mb_buf_size += MB_MOD_SIZE * mbs.mb_mods_avail; + mbs.mb_buf_size += strlen(bootloader_name) + 1; + + mbs.mb_buf_size = TARGET_PAGE_ALIGN(mbs.mb_buf_size); + + /* enlarge mb_buf to hold cmdlines, bootloader, mb-info structs */ + mbs.mb_buf = g_realloc(mbs.mb_buf, mbs.mb_buf_size); + mbs.offset_cmdlines = mbs.offset_mbinfo + mbs.mb_mods_avail * MB_MOD_SIZE; + mbs.offset_bootloader = mbs.offset_cmdlines + cmdline_len; + + if (initrd_filename) { + char *next_initrd, not_last; + + mbs.offset_mods = mbs.mb_buf_size; + + do { + char *next_space; + int mb_mod_length; + uint32_t offs = mbs.mb_buf_size; + + next_initrd = (char *)get_opt_value(NULL, 0, initrd_filename); + not_last = *next_initrd; + *next_initrd = '\0'; + /* if a space comes after the module filename, treat everything + after that as parameters */ + hwaddr c = mb_add_cmdline(&mbs, initrd_filename); + if ((next_space = strchr(initrd_filename, ' '))) + *next_space = '\0'; + mb_debug("multiboot loading module: %s\n", initrd_filename); + mb_mod_length = get_image_size(initrd_filename); + if (mb_mod_length < 0) { + fprintf(stderr, "Failed to open file '%s'\n", initrd_filename); + exit(1); + } + + mbs.mb_buf_size = TARGET_PAGE_ALIGN(mb_mod_length + mbs.mb_buf_size); + mbs.mb_buf = g_realloc(mbs.mb_buf, mbs.mb_buf_size); + + load_image(initrd_filename, (unsigned char *)mbs.mb_buf + offs); + mb_add_mod(&mbs, mbs.mb_buf_phys + offs, + mbs.mb_buf_phys + offs + mb_mod_length, c); + + mb_debug("mod_start: %p\nmod_end: %p\n cmdline: "TARGET_FMT_plx"\n", + (char *)mbs.mb_buf + offs, + (char *)mbs.mb_buf + offs + mb_mod_length, c); + initrd_filename = next_initrd+1; + } while (not_last); + } + + /* Commandline support */ + char kcmdline[strlen(kernel_filename) + strlen(kernel_cmdline) + 2]; + snprintf(kcmdline, sizeof(kcmdline), "%s %s", + kernel_filename, kernel_cmdline); + stl_p(bootinfo + MBI_CMDLINE, mb_add_cmdline(&mbs, kcmdline)); + + stl_p(bootinfo + MBI_BOOTLOADER, mb_add_bootloader(&mbs, bootloader_name)); + + stl_p(bootinfo + MBI_MODS_ADDR, mbs.mb_buf_phys + mbs.offset_mbinfo); + stl_p(bootinfo + MBI_MODS_COUNT, mbs.mb_mods_count); /* mods_count */ + + /* the kernel is where we want it to be now */ + stl_p(bootinfo + MBI_FLAGS, MULTIBOOT_FLAGS_MEMORY + | MULTIBOOT_FLAGS_BOOT_DEVICE + | MULTIBOOT_FLAGS_CMDLINE + | MULTIBOOT_FLAGS_MODULES + | MULTIBOOT_FLAGS_MMAP + | MULTIBOOT_FLAGS_BOOTLOADER); + stl_p(bootinfo + MBI_BOOT_DEVICE, 0x8000ffff); /* XXX: use the -boot switch? */ + stl_p(bootinfo + MBI_MMAP_ADDR, ADDR_E820_MAP); + + mb_debug("multiboot: mh_entry_addr = %#x\n", mh_entry_addr); + mb_debug(" mb_buf_phys = "TARGET_FMT_plx"\n", mbs.mb_buf_phys); + mb_debug(" mod_start = "TARGET_FMT_plx"\n", mbs.mb_buf_phys + mbs.offset_mods); + mb_debug(" mb_mods_count = %d\n", mbs.mb_mods_count); + + /* save bootinfo off the stack */ + mb_bootinfo_data = g_malloc(sizeof(bootinfo)); + memcpy(mb_bootinfo_data, bootinfo, sizeof(bootinfo)); + + /* Pass variables to option rom */ + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, mh_entry_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, mbs.mb_buf_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, + mbs.mb_buf, mbs.mb_buf_size); + + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, ADDR_MBI); + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, sizeof(bootinfo)); + fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, mb_bootinfo_data, + sizeof(bootinfo)); + + option_rom[nb_option_roms].name = "multiboot.bin"; + option_rom[nb_option_roms].bootindex = 0; + nb_option_roms++; + + return 1; /* yes, we are multiboot */ +} diff --git a/qemu/hw/i386/multiboot.h b/qemu/hw/i386/multiboot.h new file mode 100644 index 000000000..60de309cd --- /dev/null +++ b/qemu/hw/i386/multiboot.h @@ -0,0 +1,14 @@ +#ifndef QEMU_MULTIBOOT_H +#define QEMU_MULTIBOOT_H + +#include "hw/nvram/fw_cfg.h" + +int load_multiboot(FWCfgState *fw_cfg, + FILE *f, + const char *kernel_filename, + const char *initrd_filename, + const char *kernel_cmdline, + int kernel_file_size, + uint8_t *header); + +#endif diff --git a/qemu/hw/i386/pc.c b/qemu/hw/i386/pc.c new file mode 100644 index 000000000..7661ea9cd --- /dev/null +++ b/qemu/hw/i386/pc.c @@ -0,0 +1,1965 @@ +/* + * QEMU PC System Emulator + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "hw/hw.h" +#include "hw/i386/pc.h" +#include "hw/char/serial.h" +#include "hw/i386/apic.h" +#include "hw/i386/topology.h" +#include "sysemu/cpus.h" +#include "hw/block/fdc.h" +#include "hw/ide.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_bus.h" +#include "hw/nvram/fw_cfg.h" +#include "hw/timer/hpet.h" +#include "hw/i386/smbios.h" +#include "hw/loader.h" +#include "elf.h" +#include "multiboot.h" +#include "hw/timer/mc146818rtc.h" +#include "hw/timer/i8254.h" +#include "hw/audio/pcspk.h" +#include "hw/pci/msi.h" +#include "hw/sysbus.h" +#include "sysemu/sysemu.h" +#include "sysemu/numa.h" +#include "sysemu/kvm.h" +#include "sysemu/qtest.h" +#include "kvm_i386.h" +#include "hw/xen/xen.h" +#include "sysemu/block-backend.h" +#include "hw/block/block.h" +#include "ui/qemu-spice.h" +#include "exec/memory.h" +#include "exec/address-spaces.h" +#include "sysemu/arch_init.h" +#include "qemu/bitmap.h" +#include "qemu/config-file.h" +#include "qemu/error-report.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/cpu_hotplug.h" +#include "hw/cpu/icc_bus.h" +#include "hw/boards.h" +#include "hw/pci/pci_host.h" +#include "acpi-build.h" +#include "hw/mem/pc-dimm.h" +#include "qapi/visitor.h" +#include "qapi-visit.h" + +/* debug PC/ISA interrupts */ +//#define DEBUG_IRQ + +#ifdef DEBUG_IRQ +#define DPRINTF(fmt, ...) \ + do { printf("CPUIRQ: " fmt , ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) +#endif + +/* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables + * (128K) and other BIOS datastructures (less than 4K reported to be used at + * the moment, 32K should be enough for a while). */ +static unsigned acpi_data_size = 0x20000 + 0x8000; +void pc_set_legacy_acpi_data_size(void) +{ + acpi_data_size = 0x10000; +} + +#define BIOS_CFG_IOPORT 0x510 +#define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0) +#define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1) +#define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2) +#define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3) +#define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4) + +#define E820_NR_ENTRIES 16 + +struct e820_entry { + uint64_t address; + uint64_t length; + uint32_t type; +} QEMU_PACKED __attribute((__aligned__(4))); + +struct e820_table { + uint32_t count; + struct e820_entry entry[E820_NR_ENTRIES]; +} QEMU_PACKED __attribute((__aligned__(4))); + +static struct e820_table e820_reserve; +static struct e820_entry *e820_table; +static unsigned e820_entries; +struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX}; + +void gsi_handler(void *opaque, int n, int level) +{ + GSIState *s = opaque; + + DPRINTF("pc: %s GSI %d\n", level ? "raising" : "lowering", n); + if (n < ISA_NUM_IRQS) { + qemu_set_irq(s->i8259_irq[n], level); + } + qemu_set_irq(s->ioapic_irq[n], level); +} + +static void ioport80_write(void *opaque, hwaddr addr, uint64_t data, + unsigned size) +{ +} + +static uint64_t ioport80_read(void *opaque, hwaddr addr, unsigned size) +{ + return 0xffffffffffffffffULL; +} + +/* MSDOS compatibility mode FPU exception support */ +static qemu_irq ferr_irq; + +void pc_register_ferr_irq(qemu_irq irq) +{ + ferr_irq = irq; +} + +/* XXX: add IGNNE support */ +void cpu_set_ferr(CPUX86State *s) +{ + qemu_irq_raise(ferr_irq); +} + +static void ioportF0_write(void *opaque, hwaddr addr, uint64_t data, + unsigned size) +{ + qemu_irq_lower(ferr_irq); +} + +static uint64_t ioportF0_read(void *opaque, hwaddr addr, unsigned size) +{ + return 0xffffffffffffffffULL; +} + +/* TSC handling */ +uint64_t cpu_get_tsc(CPUX86State *env) +{ + return cpu_get_ticks(); +} + +/* IRQ handling */ +int cpu_get_pic_interrupt(CPUX86State *env) +{ + X86CPU *cpu = x86_env_get_cpu(env); + int intno; + + intno = apic_get_interrupt(cpu->apic_state); + if (intno >= 0) { + return intno; + } + /* read the irq from the PIC */ + if (!apic_accept_pic_intr(cpu->apic_state)) { + return -1; + } + + intno = pic_read_irq(isa_pic); + return intno; +} + +static void pic_irq_request(void *opaque, int irq, int level) +{ + CPUState *cs = first_cpu; + X86CPU *cpu = X86_CPU(cs); + + DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq); + if (cpu->apic_state) { + CPU_FOREACH(cs) { + cpu = X86_CPU(cs); + if (apic_accept_pic_intr(cpu->apic_state)) { + apic_deliver_pic_intr(cpu->apic_state, level); + } + } + } else { + if (level) { + cpu_interrupt(cs, CPU_INTERRUPT_HARD); + } else { + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); + } + } +} + +/* PC cmos mappings */ + +#define REG_EQUIPMENT_BYTE 0x14 + +static int cmos_get_fd_drive_type(FDriveType fd0) +{ + int val; + + switch (fd0) { + case FDRIVE_DRV_144: + /* 1.44 Mb 3"5 drive */ + val = 4; + break; + case FDRIVE_DRV_288: + /* 2.88 Mb 3"5 drive */ + val = 5; + break; + case FDRIVE_DRV_120: + /* 1.2 Mb 5"5 drive */ + val = 2; + break; + case FDRIVE_DRV_NONE: + default: + val = 0; + break; + } + return val; +} + +static void cmos_init_hd(ISADevice *s, int type_ofs, int info_ofs, + int16_t cylinders, int8_t heads, int8_t sectors) +{ + rtc_set_memory(s, type_ofs, 47); + rtc_set_memory(s, info_ofs, cylinders); + rtc_set_memory(s, info_ofs + 1, cylinders >> 8); + rtc_set_memory(s, info_ofs + 2, heads); + rtc_set_memory(s, info_ofs + 3, 0xff); + rtc_set_memory(s, info_ofs + 4, 0xff); + rtc_set_memory(s, info_ofs + 5, 0xc0 | ((heads > 8) << 3)); + rtc_set_memory(s, info_ofs + 6, cylinders); + rtc_set_memory(s, info_ofs + 7, cylinders >> 8); + rtc_set_memory(s, info_ofs + 8, sectors); +} + +/* convert boot_device letter to something recognizable by the bios */ +static int boot_device2nibble(char boot_device) +{ + switch(boot_device) { + case 'a': + case 'b': + return 0x01; /* floppy boot */ + case 'c': + return 0x02; /* hard drive boot */ + case 'd': + return 0x03; /* CD-ROM boot */ + case 'n': + return 0x04; /* Network boot */ + } + return 0; +} + +static void set_boot_dev(ISADevice *s, const char *boot_device, Error **errp) +{ +#define PC_MAX_BOOT_DEVICES 3 + int nbds, bds[3] = { 0, }; + int i; + + nbds = strlen(boot_device); + if (nbds > PC_MAX_BOOT_DEVICES) { + error_setg(errp, "Too many boot devices for PC"); + return; + } + for (i = 0; i < nbds; i++) { + bds[i] = boot_device2nibble(boot_device[i]); + if (bds[i] == 0) { + error_setg(errp, "Invalid boot device for PC: '%c'", + boot_device[i]); + return; + } + } + rtc_set_memory(s, 0x3d, (bds[1] << 4) | bds[0]); + rtc_set_memory(s, 0x38, (bds[2] << 4) | (fd_bootchk ? 0x0 : 0x1)); +} + +static void pc_boot_set(void *opaque, const char *boot_device, Error **errp) +{ + set_boot_dev(opaque, boot_device, errp); +} + +static void pc_cmos_init_floppy(ISADevice *rtc_state, ISADevice *floppy) +{ + int val, nb, i; + FDriveType fd_type[2] = { FDRIVE_DRV_NONE, FDRIVE_DRV_NONE }; + + /* floppy type */ + if (floppy) { + for (i = 0; i < 2; i++) { + fd_type[i] = isa_fdc_get_drive_type(floppy, i); + } + } + val = (cmos_get_fd_drive_type(fd_type[0]) << 4) | + cmos_get_fd_drive_type(fd_type[1]); + rtc_set_memory(rtc_state, 0x10, val); + + val = rtc_get_memory(rtc_state, REG_EQUIPMENT_BYTE); + nb = 0; + if (fd_type[0] < FDRIVE_DRV_NONE) { + nb++; + } + if (fd_type[1] < FDRIVE_DRV_NONE) { + nb++; + } + switch (nb) { + case 0: + break; + case 1: + val |= 0x01; /* 1 drive, ready for boot */ + break; + case 2: + val |= 0x41; /* 2 drives, ready for boot */ + break; + } + rtc_set_memory(rtc_state, REG_EQUIPMENT_BYTE, val); +} + +typedef struct pc_cmos_init_late_arg { + ISADevice *rtc_state; + BusState *idebus[2]; +} pc_cmos_init_late_arg; + +typedef struct check_fdc_state { + ISADevice *floppy; + bool multiple; +} CheckFdcState; + +static int check_fdc(Object *obj, void *opaque) +{ + CheckFdcState *state = opaque; + Object *fdc; + uint32_t iobase; + Error *local_err = NULL; + + fdc = object_dynamic_cast(obj, TYPE_ISA_FDC); + if (!fdc) { + return 0; + } + + iobase = object_property_get_int(obj, "iobase", &local_err); + if (local_err || iobase != 0x3f0) { + error_free(local_err); + return 0; + } + + if (state->floppy) { + state->multiple = true; + } else { + state->floppy = ISA_DEVICE(obj); + } + return 0; +} + +static const char * const fdc_container_path[] = { + "/unattached", "/peripheral", "/peripheral-anon" +}; + +static void pc_cmos_init_late(void *opaque) +{ + pc_cmos_init_late_arg *arg = opaque; + ISADevice *s = arg->rtc_state; + int16_t cylinders; + int8_t heads, sectors; + int val; + int i, trans; + Object *container; + CheckFdcState state = { 0 }; + + val = 0; + if (ide_get_geometry(arg->idebus[0], 0, + &cylinders, &heads, §ors) >= 0) { + cmos_init_hd(s, 0x19, 0x1b, cylinders, heads, sectors); + val |= 0xf0; + } + if (ide_get_geometry(arg->idebus[0], 1, + &cylinders, &heads, §ors) >= 0) { + cmos_init_hd(s, 0x1a, 0x24, cylinders, heads, sectors); + val |= 0x0f; + } + rtc_set_memory(s, 0x12, val); + + val = 0; + for (i = 0; i < 4; i++) { + /* NOTE: ide_get_geometry() returns the physical + geometry. It is always such that: 1 <= sects <= 63, 1 + <= heads <= 16, 1 <= cylinders <= 16383. The BIOS + geometry can be different if a translation is done. */ + if (ide_get_geometry(arg->idebus[i / 2], i % 2, + &cylinders, &heads, §ors) >= 0) { + trans = ide_get_bios_chs_trans(arg->idebus[i / 2], i % 2) - 1; + assert((trans & ~3) == 0); + val |= trans << (i * 2); + } + } + rtc_set_memory(s, 0x39, val); + + /* + * Locate the FDC at IO address 0x3f0, and configure the CMOS registers + * accordingly. + */ + for (i = 0; i < ARRAY_SIZE(fdc_container_path); i++) { + container = container_get(qdev_get_machine(), fdc_container_path[i]); + object_child_foreach(container, check_fdc, &state); + } + + if (state.multiple) { + error_report("warning: multiple floppy disk controllers with " + "iobase=0x3f0 have been found;\n" + "the one being picked for CMOS setup might not reflect " + "your intent"); + } + pc_cmos_init_floppy(s, state.floppy); + + qemu_unregister_reset(pc_cmos_init_late, opaque); +} + +void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size, + const char *boot_device, MachineState *machine, + BusState *idebus0, BusState *idebus1, + ISADevice *s) +{ + int val; + static pc_cmos_init_late_arg arg; + PCMachineState *pc_machine = PC_MACHINE(machine); + Error *local_err = NULL; + + /* various important CMOS locations needed by PC/Bochs bios */ + + /* memory size */ + /* base memory (first MiB) */ + val = MIN(ram_size / 1024, 640); + rtc_set_memory(s, 0x15, val); + rtc_set_memory(s, 0x16, val >> 8); + /* extended memory (next 64MiB) */ + if (ram_size > 1024 * 1024) { + val = (ram_size - 1024 * 1024) / 1024; + } else { + val = 0; + } + if (val > 65535) + val = 65535; + rtc_set_memory(s, 0x17, val); + rtc_set_memory(s, 0x18, val >> 8); + rtc_set_memory(s, 0x30, val); + rtc_set_memory(s, 0x31, val >> 8); + /* memory between 16MiB and 4GiB */ + if (ram_size > 16 * 1024 * 1024) { + val = (ram_size - 16 * 1024 * 1024) / 65536; + } else { + val = 0; + } + if (val > 65535) + val = 65535; + rtc_set_memory(s, 0x34, val); + rtc_set_memory(s, 0x35, val >> 8); + /* memory above 4GiB */ + val = above_4g_mem_size / 65536; + rtc_set_memory(s, 0x5b, val); + rtc_set_memory(s, 0x5c, val >> 8); + rtc_set_memory(s, 0x5d, val >> 16); + + /* set the number of CPU */ + rtc_set_memory(s, 0x5f, smp_cpus - 1); + + object_property_add_link(OBJECT(machine), "rtc_state", + TYPE_ISA_DEVICE, + (Object **)&pc_machine->rtc, + object_property_allow_set_link, + OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort); + object_property_set_link(OBJECT(machine), OBJECT(s), + "rtc_state", &error_abort); + + set_boot_dev(s, boot_device, &local_err); + if (local_err) { + error_report_err(local_err); + exit(1); + } + + val = 0; + val |= 0x02; /* FPU is there */ + val |= 0x04; /* PS/2 mouse installed */ + rtc_set_memory(s, REG_EQUIPMENT_BYTE, val); + + /* hard drives and FDC */ + arg.rtc_state = s; + arg.idebus[0] = idebus0; + arg.idebus[1] = idebus1; + qemu_register_reset(pc_cmos_init_late, &arg); +} + +#define TYPE_PORT92 "port92" +#define PORT92(obj) OBJECT_CHECK(Port92State, (obj), TYPE_PORT92) + +/* port 92 stuff: could be split off */ +typedef struct Port92State { + ISADevice parent_obj; + + MemoryRegion io; + uint8_t outport; + qemu_irq *a20_out; +} Port92State; + +static void port92_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) +{ + Port92State *s = opaque; + int oldval = s->outport; + + DPRINTF("port92: write 0x%02" PRIx64 "\n", val); + s->outport = val; + qemu_set_irq(*s->a20_out, (val >> 1) & 1); + if ((val & 1) && !(oldval & 1)) { + qemu_system_reset_request(); + } +} + +static uint64_t port92_read(void *opaque, hwaddr addr, + unsigned size) +{ + Port92State *s = opaque; + uint32_t ret; + + ret = s->outport; + DPRINTF("port92: read 0x%02x\n", ret); + return ret; +} + +static void port92_init(ISADevice *dev, qemu_irq *a20_out) +{ + Port92State *s = PORT92(dev); + + s->a20_out = a20_out; +} + +static const VMStateDescription vmstate_port92_isa = { + .name = "port92", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT8(outport, Port92State), + VMSTATE_END_OF_LIST() + } +}; + +static void port92_reset(DeviceState *d) +{ + Port92State *s = PORT92(d); + + s->outport &= ~1; +} + +static const MemoryRegionOps port92_ops = { + .read = port92_read, + .write = port92_write, + .impl = { + .min_access_size = 1, + .max_access_size = 1, + }, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void port92_initfn(Object *obj) +{ + Port92State *s = PORT92(obj); + + memory_region_init_io(&s->io, OBJECT(s), &port92_ops, s, "port92", 1); + + s->outport = 0; +} + +static void port92_realizefn(DeviceState *dev, Error **errp) +{ + ISADevice *isadev = ISA_DEVICE(dev); + Port92State *s = PORT92(dev); + + isa_register_ioport(isadev, &s->io, 0x92); +} + +static void port92_class_initfn(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = port92_realizefn; + dc->reset = port92_reset; + dc->vmsd = &vmstate_port92_isa; + /* + * Reason: unlike ordinary ISA devices, this one needs additional + * wiring: its A20 output line needs to be wired up by + * port92_init(). + */ + dc->cannot_instantiate_with_device_add_yet = true; +} + +static const TypeInfo port92_info = { + .name = TYPE_PORT92, + .parent = TYPE_ISA_DEVICE, + .instance_size = sizeof(Port92State), + .instance_init = port92_initfn, + .class_init = port92_class_initfn, +}; + +static void port92_register_types(void) +{ + type_register_static(&port92_info); +} + +type_init(port92_register_types) + +static void handle_a20_line_change(void *opaque, int irq, int level) +{ + X86CPU *cpu = opaque; + + /* XXX: send to all CPUs ? */ + /* XXX: add logic to handle multiple A20 line sources */ + x86_cpu_set_a20(cpu, level); +} + +int e820_add_entry(uint64_t address, uint64_t length, uint32_t type) +{ + int index = le32_to_cpu(e820_reserve.count); + struct e820_entry *entry; + + if (type != E820_RAM) { + /* old FW_CFG_E820_TABLE entry -- reservations only */ + if (index >= E820_NR_ENTRIES) { + return -EBUSY; + } + entry = &e820_reserve.entry[index++]; + + entry->address = cpu_to_le64(address); + entry->length = cpu_to_le64(length); + entry->type = cpu_to_le32(type); + + e820_reserve.count = cpu_to_le32(index); + } + + /* new "etc/e820" file -- include ram too */ + e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1); + e820_table[e820_entries].address = cpu_to_le64(address); + e820_table[e820_entries].length = cpu_to_le64(length); + e820_table[e820_entries].type = cpu_to_le32(type); + e820_entries++; + + return e820_entries; +} + +int e820_get_num_entries(void) +{ + return e820_entries; +} + +bool e820_get_entry(int idx, uint32_t type, uint64_t *address, uint64_t *length) +{ + if (idx < e820_entries && e820_table[idx].type == cpu_to_le32(type)) { + *address = le64_to_cpu(e820_table[idx].address); + *length = le64_to_cpu(e820_table[idx].length); + return true; + } + return false; +} + +/* Enables contiguous-apic-ID mode, for compatibility */ +static bool compat_apic_id_mode; + +void enable_compat_apic_id_mode(void) +{ + compat_apic_id_mode = true; +} + +/* Calculates initial APIC ID for a specific CPU index + * + * Currently we need to be able to calculate the APIC ID from the CPU index + * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have + * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of + * all CPUs up to max_cpus. + */ +static uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index) +{ + uint32_t correct_id; + static bool warned; + + correct_id = x86_apicid_from_cpu_idx(smp_cores, smp_threads, cpu_index); + if (compat_apic_id_mode) { + if (cpu_index != correct_id && !warned && !qtest_enabled()) { + error_report("APIC IDs set in compatibility mode, " + "CPU topology won't match the configuration"); + warned = true; + } + return cpu_index; + } else { + return correct_id; + } +} + +/* Calculates the limit to CPU APIC ID values + * + * This function returns the limit for the APIC ID value, so that all + * CPU APIC IDs are < pc_apic_id_limit(). + * + * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init(). + */ +static unsigned int pc_apic_id_limit(unsigned int max_cpus) +{ + return x86_cpu_apic_id_from_index(max_cpus - 1) + 1; +} + +static FWCfgState *bochs_bios_init(void) +{ + FWCfgState *fw_cfg; + uint8_t *smbios_tables, *smbios_anchor; + size_t smbios_tables_len, smbios_anchor_len; + uint64_t *numa_fw_cfg; + int i, j; + unsigned int apic_id_limit = pc_apic_id_limit(max_cpus); + + fw_cfg = fw_cfg_init_io(BIOS_CFG_IOPORT); + /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86: + * + * SeaBIOS needs FW_CFG_MAX_CPUS for CPU hotplug, but the CPU hotplug + * QEMU<->SeaBIOS interface is not based on the "CPU index", but on the APIC + * ID of hotplugged CPUs[1]. This means that FW_CFG_MAX_CPUS is not the + * "maximum number of CPUs", but the "limit to the APIC ID values SeaBIOS + * may see". + * + * So, this means we must not use max_cpus, here, but the maximum possible + * APIC ID value, plus one. + * + * [1] The only kind of "CPU identifier" used between SeaBIOS and QEMU is + * the APIC ID, not the "CPU index" + */ + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)apic_id_limit); + fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES, + acpi_tables, acpi_tables_len); + fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, kvm_allows_irq0_override()); + + smbios_tables = smbios_get_table_legacy(&smbios_tables_len); + if (smbios_tables) { + fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, + smbios_tables, smbios_tables_len); + } + + smbios_get_tables(&smbios_tables, &smbios_tables_len, + &smbios_anchor, &smbios_anchor_len); + if (smbios_anchor) { + fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-tables", + smbios_tables, smbios_tables_len); + fw_cfg_add_file(fw_cfg, "etc/smbios/smbios-anchor", + smbios_anchor, smbios_anchor_len); + } + + fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE, + &e820_reserve, sizeof(e820_reserve)); + fw_cfg_add_file(fw_cfg, "etc/e820", e820_table, + sizeof(struct e820_entry) * e820_entries); + + fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg)); + /* allocate memory for the NUMA channel: one (64bit) word for the number + * of nodes, one word for each VCPU->node and one word for each node to + * hold the amount of memory. + */ + numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes); + numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); + for (i = 0; i < max_cpus; i++) { + unsigned int apic_id = x86_cpu_apic_id_from_index(i); + assert(apic_id < apic_id_limit); + for (j = 0; j < nb_numa_nodes; j++) { + if (test_bit(i, numa_info[j].node_cpu)) { + numa_fw_cfg[apic_id + 1] = cpu_to_le64(j); + break; + } + } + } + for (i = 0; i < nb_numa_nodes; i++) { + numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(numa_info[i].node_mem); + } + fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg, + (1 + apic_id_limit + nb_numa_nodes) * + sizeof(*numa_fw_cfg)); + + return fw_cfg; +} + +static long get_file_size(FILE *f) +{ + long where, size; + + /* XXX: on Unix systems, using fstat() probably makes more sense */ + + where = ftell(f); + fseek(f, 0, SEEK_END); + size = ftell(f); + fseek(f, where, SEEK_SET); + + return size; +} + +static void load_linux(FWCfgState *fw_cfg, + const char *kernel_filename, + const char *initrd_filename, + const char *kernel_cmdline, + hwaddr max_ram_size) +{ + uint16_t protocol; + int setup_size, kernel_size, initrd_size = 0, cmdline_size; + uint32_t initrd_max; + uint8_t header[8192], *setup, *kernel, *initrd_data; + hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; + FILE *f; + char *vmode; + + /* Align to 16 bytes as a paranoia measure */ + cmdline_size = (strlen(kernel_cmdline)+16) & ~15; + + /* load the kernel header */ + f = fopen(kernel_filename, "rb"); + if (!f || !(kernel_size = get_file_size(f)) || + fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != + MIN(ARRAY_SIZE(header), kernel_size)) { + fprintf(stderr, "qemu: could not load kernel '%s': %s\n", + kernel_filename, strerror(errno)); + exit(1); + } + + /* kernel protocol version */ +#if 0 + fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202)); +#endif + if (ldl_p(header+0x202) == 0x53726448) { + protocol = lduw_p(header+0x206); + } else { + /* This looks like a multiboot kernel. If it is, let's stop + treating it like a Linux kernel. */ + if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename, + kernel_cmdline, kernel_size, header)) { + return; + } + protocol = 0; + } + + if (protocol < 0x200 || !(header[0x211] & 0x01)) { + /* Low kernel */ + real_addr = 0x90000; + cmdline_addr = 0x9a000 - cmdline_size; + prot_addr = 0x10000; + } else if (protocol < 0x202) { + /* High but ancient kernel */ + real_addr = 0x90000; + cmdline_addr = 0x9a000 - cmdline_size; + prot_addr = 0x100000; + } else { + /* High and recent kernel */ + real_addr = 0x10000; + cmdline_addr = 0x20000; + prot_addr = 0x100000; + } + +#if 0 + fprintf(stderr, + "qemu: real_addr = 0x" TARGET_FMT_plx "\n" + "qemu: cmdline_addr = 0x" TARGET_FMT_plx "\n" + "qemu: prot_addr = 0x" TARGET_FMT_plx "\n", + real_addr, + cmdline_addr, + prot_addr); +#endif + + /* highest address for loading the initrd */ + if (protocol >= 0x203) { + initrd_max = ldl_p(header+0x22c); + } else { + initrd_max = 0x37ffffff; + } + + if (initrd_max >= max_ram_size - acpi_data_size) { + initrd_max = max_ram_size - acpi_data_size - 1; + } + + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1); + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); + + if (protocol >= 0x202) { + stl_p(header+0x228, cmdline_addr); + } else { + stw_p(header+0x20, 0xA33F); + stw_p(header+0x22, cmdline_addr-real_addr); + } + + /* handle vga= parameter */ + vmode = strstr(kernel_cmdline, "vga="); + if (vmode) { + unsigned int video_mode; + /* skip "vga=" */ + vmode += 4; + if (!strncmp(vmode, "normal", 6)) { + video_mode = 0xffff; + } else if (!strncmp(vmode, "ext", 3)) { + video_mode = 0xfffe; + } else if (!strncmp(vmode, "ask", 3)) { + video_mode = 0xfffd; + } else { + video_mode = strtol(vmode, NULL, 0); + } + stw_p(header+0x1fa, video_mode); + } + + /* loader type */ + /* High nybble = B reserved for QEMU; low nybble is revision number. + If this code is substantially changed, you may want to consider + incrementing the revision. */ + if (protocol >= 0x200) { + header[0x210] = 0xB0; + } + /* heap */ + if (protocol >= 0x201) { + header[0x211] |= 0x80; /* CAN_USE_HEAP */ + stw_p(header+0x224, cmdline_addr-real_addr-0x200); + } + + /* load initrd */ + if (initrd_filename) { + if (protocol < 0x200) { + fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); + exit(1); + } + + initrd_size = get_image_size(initrd_filename); + if (initrd_size < 0) { + fprintf(stderr, "qemu: error reading initrd %s: %s\n", + initrd_filename, strerror(errno)); + exit(1); + } + + initrd_addr = (initrd_max-initrd_size) & ~4095; + + initrd_data = g_malloc(initrd_size); + load_image(initrd_filename, initrd_data); + + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); + + stl_p(header+0x218, initrd_addr); + stl_p(header+0x21c, initrd_size); + } + + /* load kernel and setup */ + setup_size = header[0x1f1]; + if (setup_size == 0) { + setup_size = 4; + } + setup_size = (setup_size+1)*512; + kernel_size -= setup_size; + + setup = g_malloc(setup_size); + kernel = g_malloc(kernel_size); + fseek(f, 0, SEEK_SET); + if (fread(setup, 1, setup_size, f) != setup_size) { + fprintf(stderr, "fread() failed\n"); + exit(1); + } + if (fread(kernel, 1, kernel_size, f) != kernel_size) { + fprintf(stderr, "fread() failed\n"); + exit(1); + } + fclose(f); + memcpy(setup, header, MIN(sizeof(header), setup_size)); + + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); + + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); + + option_rom[nb_option_roms].name = "linuxboot.bin"; + option_rom[nb_option_roms].bootindex = 0; + nb_option_roms++; +} + +#define NE2000_NB_MAX 6 + +static const int ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360, + 0x280, 0x380 }; +static const int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 }; + +void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd) +{ + static int nb_ne2k = 0; + + if (nb_ne2k == NE2000_NB_MAX) + return; + isa_ne2000_init(bus, ne2000_io[nb_ne2k], + ne2000_irq[nb_ne2k], nd); + nb_ne2k++; +} + +DeviceState *cpu_get_current_apic(void) +{ + if (current_cpu) { + X86CPU *cpu = X86_CPU(current_cpu); + return cpu->apic_state; + } else { + return NULL; + } +} + +void pc_acpi_smi_interrupt(void *opaque, int irq, int level) +{ + X86CPU *cpu = opaque; + + if (level) { + cpu_interrupt(CPU(cpu), CPU_INTERRUPT_SMI); + } +} + +static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id, + DeviceState *icc_bridge, Error **errp) +{ + X86CPU *cpu = NULL; + Error *local_err = NULL; + + if (icc_bridge == NULL) { + error_setg(&local_err, "Invalid icc-bridge value"); + goto out; + } + + cpu = cpu_x86_create(cpu_model, &local_err); + if (local_err != NULL) { + goto out; + } + + qdev_set_parent_bus(DEVICE(cpu), qdev_get_child_bus(icc_bridge, "icc")); + + object_property_set_int(OBJECT(cpu), apic_id, "apic-id", &local_err); + object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); + +out: + if (local_err) { + error_propagate(errp, local_err); + object_unref(OBJECT(cpu)); + cpu = NULL; + } + return cpu; +} + +static const char *current_cpu_model; + +void pc_hot_add_cpu(const int64_t id, Error **errp) +{ + DeviceState *icc_bridge; + X86CPU *cpu; + int64_t apic_id = x86_cpu_apic_id_from_index(id); + Error *local_err = NULL; + + if (id < 0) { + error_setg(errp, "Invalid CPU id: %" PRIi64, id); + return; + } + + if (cpu_exists(apic_id)) { + error_setg(errp, "Unable to add CPU: %" PRIi64 + ", it already exists", id); + return; + } + + if (id >= max_cpus) { + error_setg(errp, "Unable to add CPU: %" PRIi64 + ", max allowed: %d", id, max_cpus - 1); + return; + } + + if (apic_id >= ACPI_CPU_HOTPLUG_ID_LIMIT) { + error_setg(errp, "Unable to add CPU: %" PRIi64 + ", resulting APIC ID (%" PRIi64 ") is too large", + id, apic_id); + return; + } + + icc_bridge = DEVICE(object_resolve_path_type("icc-bridge", + TYPE_ICC_BRIDGE, NULL)); + cpu = pc_new_cpu(current_cpu_model, apic_id, icc_bridge, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + object_unref(OBJECT(cpu)); +} + +void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge) +{ + int i; + X86CPU *cpu = NULL; + Error *error = NULL; + unsigned long apic_id_limit; + + /* init CPUs */ + if (cpu_model == NULL) { +#ifdef TARGET_X86_64 + cpu_model = "qemu64"; +#else + cpu_model = "qemu32"; +#endif + } + current_cpu_model = cpu_model; + + apic_id_limit = pc_apic_id_limit(max_cpus); + if (apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) { + error_report("max_cpus is too large. APIC ID of last CPU is %lu", + apic_id_limit - 1); + exit(1); + } + + for (i = 0; i < smp_cpus; i++) { + cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i), + icc_bridge, &error); + if (error) { + error_report_err(error); + exit(1); + } + object_unref(OBJECT(cpu)); + } + + /* map APIC MMIO area if CPU has APIC */ + if (cpu && cpu->apic_state) { + /* XXX: what if the base changes? */ + sysbus_mmio_map_overlap(SYS_BUS_DEVICE(icc_bridge), 0, + APIC_DEFAULT_ADDRESS, 0x1000); + } + + /* tell smbios about cpuid version and features */ + smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); +} + +/* pci-info ROM file. Little endian format */ +typedef struct PcRomPciInfo { + uint64_t w32_min; + uint64_t w32_max; + uint64_t w64_min; + uint64_t w64_max; +} PcRomPciInfo; + +typedef struct PcGuestInfoState { + PcGuestInfo info; + Notifier machine_done; +} PcGuestInfoState; + +static +void pc_guest_info_machine_done(Notifier *notifier, void *data) +{ + PcGuestInfoState *guest_info_state = container_of(notifier, + PcGuestInfoState, + machine_done); + PCIBus *bus = find_i440fx(); + + if (bus) { + int extra_hosts = 0; + + QLIST_FOREACH(bus, &bus->child, sibling) { + /* look for expander root buses */ + if (pci_bus_is_root(bus)) { + extra_hosts++; + } + } + if (extra_hosts && guest_info_state->info.fw_cfg) { + uint64_t *val = g_malloc(sizeof(*val)); + *val = cpu_to_le64(extra_hosts); + fw_cfg_add_file(guest_info_state->info.fw_cfg, + "etc/extra-pci-roots", val, sizeof(*val)); + } + } + + acpi_setup(&guest_info_state->info); +} + +PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, + ram_addr_t above_4g_mem_size) +{ + PcGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state); + PcGuestInfo *guest_info = &guest_info_state->info; + int i, j; + + guest_info->ram_size_below_4g = below_4g_mem_size; + guest_info->ram_size = below_4g_mem_size + above_4g_mem_size; + guest_info->apic_id_limit = pc_apic_id_limit(max_cpus); + guest_info->apic_xrupt_override = kvm_allows_irq0_override(); + guest_info->numa_nodes = nb_numa_nodes; + guest_info->node_mem = g_malloc0(guest_info->numa_nodes * + sizeof *guest_info->node_mem); + for (i = 0; i < nb_numa_nodes; i++) { + guest_info->node_mem[i] = numa_info[i].node_mem; + } + + guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit * + sizeof *guest_info->node_cpu); + + for (i = 0; i < max_cpus; i++) { + unsigned int apic_id = x86_cpu_apic_id_from_index(i); + assert(apic_id < guest_info->apic_id_limit); + for (j = 0; j < nb_numa_nodes; j++) { + if (test_bit(i, numa_info[j].node_cpu)) { + guest_info->node_cpu[apic_id] = j; + break; + } + } + } + + guest_info_state->machine_done.notify = pc_guest_info_machine_done; + qemu_add_machine_init_done_notifier(&guest_info_state->machine_done); + return guest_info; +} + +/* setup pci memory address space mapping into system address space */ +void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory, + MemoryRegion *pci_address_space) +{ + /* Set to lower priority than RAM */ + memory_region_add_subregion_overlap(system_memory, 0x0, + pci_address_space, -1); +} + +void pc_acpi_init(const char *default_dsdt) +{ + char *filename; + + if (acpi_tables != NULL) { + /* manually set via -acpitable, leave it alone */ + return; + } + + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, default_dsdt); + if (filename == NULL) { + fprintf(stderr, "WARNING: failed to find %s\n", default_dsdt); + } else { + QemuOpts *opts = qemu_opts_create(qemu_find_opts("acpi"), NULL, 0, + &error_abort); + Error *err = NULL; + + qemu_opt_set(opts, "file", filename, &error_abort); + + acpi_table_add_builtin(opts, &err); + if (err) { + error_report("WARNING: failed to load %s: %s", filename, + error_get_pretty(err)); + error_free(err); + } + g_free(filename); + } +} + +FWCfgState *xen_load_linux(const char *kernel_filename, + const char *kernel_cmdline, + const char *initrd_filename, + ram_addr_t below_4g_mem_size, + PcGuestInfo *guest_info) +{ + int i; + FWCfgState *fw_cfg; + + assert(kernel_filename != NULL); + + fw_cfg = fw_cfg_init_io(BIOS_CFG_IOPORT); + rom_set_fw(fw_cfg); + + load_linux(fw_cfg, kernel_filename, initrd_filename, + kernel_cmdline, below_4g_mem_size); + for (i = 0; i < nb_option_roms; i++) { + assert(!strcmp(option_rom[i].name, "linuxboot.bin") || + !strcmp(option_rom[i].name, "multiboot.bin")); + rom_add_option(option_rom[i].name, option_rom[i].bootindex); + } + guest_info->fw_cfg = fw_cfg; + return fw_cfg; +} + +FWCfgState *pc_memory_init(MachineState *machine, + MemoryRegion *system_memory, + ram_addr_t below_4g_mem_size, + ram_addr_t above_4g_mem_size, + MemoryRegion *rom_memory, + MemoryRegion **ram_memory, + PcGuestInfo *guest_info) +{ + int linux_boot, i; + MemoryRegion *ram, *option_rom_mr; + MemoryRegion *ram_below_4g, *ram_above_4g; + FWCfgState *fw_cfg; + PCMachineState *pcms = PC_MACHINE(machine); + + assert(machine->ram_size == below_4g_mem_size + above_4g_mem_size); + + linux_boot = (machine->kernel_filename != NULL); + + /* Allocate RAM. We allocate it as a single memory region and use + * aliases to address portions of it, mostly for backwards compatibility + * with older qemus that used qemu_ram_alloc(). + */ + ram = g_malloc(sizeof(*ram)); + memory_region_allocate_system_memory(ram, NULL, "pc.ram", + machine->ram_size); + *ram_memory = ram; + ram_below_4g = g_malloc(sizeof(*ram_below_4g)); + memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram, + 0, below_4g_mem_size); + memory_region_add_subregion(system_memory, 0, ram_below_4g); + e820_add_entry(0, below_4g_mem_size, E820_RAM); + if (above_4g_mem_size > 0) { + ram_above_4g = g_malloc(sizeof(*ram_above_4g)); + memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram, + below_4g_mem_size, above_4g_mem_size); + memory_region_add_subregion(system_memory, 0x100000000ULL, + ram_above_4g); + e820_add_entry(0x100000000ULL, above_4g_mem_size, E820_RAM); + } + + if (!guest_info->has_reserved_memory && + (machine->ram_slots || + (machine->maxram_size > machine->ram_size))) { + MachineClass *mc = MACHINE_GET_CLASS(machine); + + error_report("\"-memory 'slots|maxmem'\" is not supported by: %s", + mc->name); + exit(EXIT_FAILURE); + } + + /* initialize hotplug memory address space */ + if (guest_info->has_reserved_memory && + (machine->ram_size < machine->maxram_size)) { + ram_addr_t hotplug_mem_size = + machine->maxram_size - machine->ram_size; + + if (machine->ram_slots > ACPI_MAX_RAM_SLOTS) { + error_report("unsupported amount of memory slots: %"PRIu64, + machine->ram_slots); + exit(EXIT_FAILURE); + } + + if (QEMU_ALIGN_UP(machine->maxram_size, + TARGET_PAGE_SIZE) != machine->maxram_size) { + error_report("maximum memory size must by aligned to multiple of " + "%d bytes", TARGET_PAGE_SIZE); + exit(EXIT_FAILURE); + } + + pcms->hotplug_memory.base = + ROUND_UP(0x100000000ULL + above_4g_mem_size, 1ULL << 30); + + if (pcms->enforce_aligned_dimm) { + /* size hotplug region assuming 1G page max alignment per slot */ + hotplug_mem_size += (1ULL << 30) * machine->ram_slots; + } + + if ((pcms->hotplug_memory.base + hotplug_mem_size) < + hotplug_mem_size) { + error_report("unsupported amount of maximum memory: " RAM_ADDR_FMT, + machine->maxram_size); + exit(EXIT_FAILURE); + } + + memory_region_init(&pcms->hotplug_memory.mr, OBJECT(pcms), + "hotplug-memory", hotplug_mem_size); + memory_region_add_subregion(system_memory, pcms->hotplug_memory.base, + &pcms->hotplug_memory.mr); + } + + /* Initialize PC system firmware */ + pc_system_firmware_init(rom_memory, guest_info->isapc_ram_fw); + + option_rom_mr = g_malloc(sizeof(*option_rom_mr)); + memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, + &error_abort); + vmstate_register_ram_global(option_rom_mr); + memory_region_add_subregion_overlap(rom_memory, + PC_ROM_MIN_VGA, + option_rom_mr, + 1); + + fw_cfg = bochs_bios_init(); + rom_set_fw(fw_cfg); + + if (guest_info->has_reserved_memory && pcms->hotplug_memory.base) { + uint64_t *val = g_malloc(sizeof(*val)); + *val = cpu_to_le64(ROUND_UP(pcms->hotplug_memory.base, 0x1ULL << 30)); + fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val)); + } + + if (linux_boot) { + load_linux(fw_cfg, machine->kernel_filename, machine->initrd_filename, + machine->kernel_cmdline, below_4g_mem_size); + } + + for (i = 0; i < nb_option_roms; i++) { + rom_add_option(option_rom[i].name, option_rom[i].bootindex); + } + guest_info->fw_cfg = fw_cfg; + return fw_cfg; +} + +qemu_irq pc_allocate_cpu_irq(void) +{ + return qemu_allocate_irq(pic_irq_request, NULL, 0); +} + +DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus) +{ + DeviceState *dev = NULL; + + if (pci_bus) { + PCIDevice *pcidev = pci_vga_init(pci_bus); + dev = pcidev ? &pcidev->qdev : NULL; + } else if (isa_bus) { + ISADevice *isadev = isa_vga_init(isa_bus); + dev = isadev ? DEVICE(isadev) : NULL; + } + return dev; +} + +static void cpu_request_exit(void *opaque, int irq, int level) +{ + CPUState *cpu = current_cpu; + + if (cpu && level) { + cpu_exit(cpu); + } +} + +static const MemoryRegionOps ioport80_io_ops = { + .write = ioport80_write, + .read = ioport80_read, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl = { + .min_access_size = 1, + .max_access_size = 1, + }, +}; + +static const MemoryRegionOps ioportF0_io_ops = { + .write = ioportF0_write, + .read = ioportF0_read, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl = { + .min_access_size = 1, + .max_access_size = 1, + }, +}; + +void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, + ISADevice **rtc_state, + bool create_fdctrl, + bool no_vmport, + uint32 hpet_irqs) +{ + int i; + DriveInfo *fd[MAX_FD]; + DeviceState *hpet = NULL; + int pit_isa_irq = 0; + qemu_irq pit_alt_irq = NULL; + qemu_irq rtc_irq = NULL; + qemu_irq *a20_line; + ISADevice *i8042, *port92, *vmmouse, *pit = NULL; + qemu_irq *cpu_exit_irq; + MemoryRegion *ioport80_io = g_new(MemoryRegion, 1); + MemoryRegion *ioportF0_io = g_new(MemoryRegion, 1); + + memory_region_init_io(ioport80_io, NULL, &ioport80_io_ops, NULL, "ioport80", 1); + memory_region_add_subregion(isa_bus->address_space_io, 0x80, ioport80_io); + + memory_region_init_io(ioportF0_io, NULL, &ioportF0_io_ops, NULL, "ioportF0", 1); + memory_region_add_subregion(isa_bus->address_space_io, 0xf0, ioportF0_io); + + /* + * Check if an HPET shall be created. + * + * Without KVM_CAP_PIT_STATE2, we cannot switch off the in-kernel PIT + * when the HPET wants to take over. Thus we have to disable the latter. + */ + if (!no_hpet && (!kvm_irqchip_in_kernel() || kvm_has_pit_state2())) { + /* In order to set property, here not using sysbus_try_create_simple */ + hpet = qdev_try_create(NULL, TYPE_HPET); + if (hpet) { + /* For pc-piix-*, hpet's intcap is always IRQ2. For pc-q35-1.7 + * and earlier, use IRQ2 for compat. Otherwise, use IRQ16~23, + * IRQ8 and IRQ2. + */ + uint8_t compat = object_property_get_int(OBJECT(hpet), + HPET_INTCAP, NULL); + if (!compat) { + qdev_prop_set_uint32(hpet, HPET_INTCAP, hpet_irqs); + } + qdev_init_nofail(hpet); + sysbus_mmio_map(SYS_BUS_DEVICE(hpet), 0, HPET_BASE); + + for (i = 0; i < GSI_NUM_PINS; i++) { + sysbus_connect_irq(SYS_BUS_DEVICE(hpet), i, gsi[i]); + } + pit_isa_irq = -1; + pit_alt_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_PIT_INT); + rtc_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_RTC_INT); + } + } + *rtc_state = rtc_init(isa_bus, 2000, rtc_irq); + + qemu_register_boot_set(pc_boot_set, *rtc_state); + + if (!xen_enabled()) { + if (kvm_irqchip_in_kernel()) { + pit = kvm_pit_init(isa_bus, 0x40); + } else { + pit = pit_init(isa_bus, 0x40, pit_isa_irq, pit_alt_irq); + } + if (hpet) { + /* connect PIT to output control line of the HPET */ + qdev_connect_gpio_out(hpet, 0, qdev_get_gpio_in(DEVICE(pit), 0)); + } + pcspk_init(isa_bus, pit); + } + + serial_hds_isa_init(isa_bus, MAX_SERIAL_PORTS); + parallel_hds_isa_init(isa_bus, MAX_PARALLEL_PORTS); + + a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 2); + i8042 = isa_create_simple(isa_bus, "i8042"); + i8042_setup_a20_line(i8042, &a20_line[0]); + if (!no_vmport) { + vmport_init(isa_bus); + vmmouse = isa_try_create(isa_bus, "vmmouse"); + } else { + vmmouse = NULL; + } + if (vmmouse) { + DeviceState *dev = DEVICE(vmmouse); + qdev_prop_set_ptr(dev, "ps2_mouse", i8042); + qdev_init_nofail(dev); + } + port92 = isa_create_simple(isa_bus, "port92"); + port92_init(port92, &a20_line[1]); + + cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1); + DMA_init(0, cpu_exit_irq); + + for(i = 0; i < MAX_FD; i++) { + fd[i] = drive_get(IF_FLOPPY, 0, i); + create_fdctrl |= !!fd[i]; + } + if (create_fdctrl) { + fdctrl_init_isa(isa_bus, fd); + } +} + +void pc_nic_init(ISABus *isa_bus, PCIBus *pci_bus) +{ + int i; + + for (i = 0; i < nb_nics; i++) { + NICInfo *nd = &nd_table[i]; + + if (!pci_bus || (nd->model && strcmp(nd->model, "ne2k_isa") == 0)) { + pc_init_ne2k_isa(isa_bus, nd); + } else { + pci_nic_init_nofail(nd, pci_bus, "e1000", NULL); + } + } +} + +void pc_pci_device_init(PCIBus *pci_bus) +{ + int max_bus; + int bus; + + max_bus = drive_get_max_bus(IF_SCSI); + for (bus = 0; bus <= max_bus; bus++) { + pci_create_simple(pci_bus, -1, "lsi53c895a"); + } +} + +void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name) +{ + DeviceState *dev; + SysBusDevice *d; + unsigned int i; + + if (kvm_irqchip_in_kernel()) { + dev = qdev_create(NULL, "kvm-ioapic"); + } else { + dev = qdev_create(NULL, "ioapic"); + } + if (parent_name) { + object_property_add_child(object_resolve_path(parent_name, NULL), + "ioapic", OBJECT(dev), NULL); + } + qdev_init_nofail(dev); + d = SYS_BUS_DEVICE(dev); + sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS); + + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); + } +} + +static void pc_dimm_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + HotplugHandlerClass *hhc; + Error *local_err = NULL; + PCMachineState *pcms = PC_MACHINE(hotplug_dev); + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + uint64_t align = TARGET_PAGE_SIZE; + + if (memory_region_get_alignment(mr) && pcms->enforce_aligned_dimm) { + align = memory_region_get_alignment(mr); + } + + if (!pcms->acpi_dev) { + error_setg(&local_err, + "memory hotplug is not enabled: missing acpi device"); + goto out; + } + + pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err); + if (local_err) { + goto out; + } + + hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); + hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort); +out: + error_propagate(errp, local_err); +} + +static void pc_dimm_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + HotplugHandlerClass *hhc; + Error *local_err = NULL; + PCMachineState *pcms = PC_MACHINE(hotplug_dev); + + if (!pcms->acpi_dev) { + error_setg(&local_err, + "memory hotplug is not enabled: missing acpi device"); + goto out; + } + + hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); + hhc->unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); + +out: + error_propagate(errp, local_err); +} + +static void pc_dimm_unplug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(hotplug_dev); + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + HotplugHandlerClass *hhc; + Error *local_err = NULL; + + hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); + hhc->unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); + + if (local_err) { + goto out; + } + + pc_dimm_memory_unplug(dev, &pcms->hotplug_memory, mr); + object_unparent(OBJECT(dev)); + + out: + error_propagate(errp, local_err); +} + +static void pc_cpu_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + HotplugHandlerClass *hhc; + Error *local_err = NULL; + PCMachineState *pcms = PC_MACHINE(hotplug_dev); + + if (!dev->hotplugged) { + goto out; + } + + if (!pcms->acpi_dev) { + error_setg(&local_err, + "cpu hotplug is not enabled: missing acpi device"); + goto out; + } + + hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); + hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); + if (local_err) { + goto out; + } + + /* increment the number of CPUs */ + rtc_set_memory(pcms->rtc, 0x5f, rtc_get_memory(pcms->rtc, 0x5f) + 1); +out: + error_propagate(errp, local_err); +} + +static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + pc_dimm_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + pc_cpu_plug(hotplug_dev, dev, errp); + } +} + +static void pc_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + pc_dimm_unplug_request(hotplug_dev, dev, errp); + } else { + error_setg(errp, "acpi: device unplug request for not supported device" + " type: %s", object_get_typename(OBJECT(dev))); + } +} + +static void pc_machine_device_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + pc_dimm_unplug(hotplug_dev, dev, errp); + } else { + error_setg(errp, "acpi: device unplug for not supported device" + " type: %s", object_get_typename(OBJECT(dev))); + } +} + +static HotplugHandler *pc_get_hotpug_handler(MachineState *machine, + DeviceState *dev) +{ + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || + object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + return HOTPLUG_HANDLER(machine); + } + + return pcmc->get_hotplug_handler ? + pcmc->get_hotplug_handler(machine, dev) : NULL; +} + +static void +pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + int64_t value = memory_region_size(&pcms->hotplug_memory.mr); + + visit_type_int(v, &value, name, errp); +} + +static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v, + void *opaque, const char *name, + Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + uint64_t value = pcms->max_ram_below_4g; + + visit_type_size(v, &value, name, errp); +} + +static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v, + void *opaque, const char *name, + Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + Error *error = NULL; + uint64_t value; + + visit_type_size(v, &value, name, &error); + if (error) { + error_propagate(errp, error); + return; + } + if (value > (1ULL << 32)) { + error_set(&error, ERROR_CLASS_GENERIC_ERROR, + "Machine option 'max-ram-below-4g=%"PRIu64 + "' expects size less than or equal to 4G", value); + error_propagate(errp, error); + return; + } + + if (value < (1ULL << 20)) { + error_report("Warning: small max_ram_below_4g(%"PRIu64 + ") less than 1M. BIOS may not work..", + value); + } + + pcms->max_ram_below_4g = value; +} + +static void pc_machine_get_vmport(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + OnOffAuto vmport = pcms->vmport; + + visit_type_OnOffAuto(v, &vmport, name, errp); +} + +static void pc_machine_set_vmport(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + visit_type_OnOffAuto(v, &pcms->vmport, name, errp); +} + +bool pc_machine_is_smm_enabled(PCMachineState *pcms) +{ + bool smm_available = false; + + if (pcms->smm == ON_OFF_AUTO_OFF) { + return false; + } + + if (tcg_enabled() || qtest_enabled()) { + smm_available = true; + } else if (kvm_enabled()) { + smm_available = kvm_has_smm(); + } + + if (smm_available) { + return true; + } + + if (pcms->smm == ON_OFF_AUTO_ON) { + error_report("System Management Mode not supported by this hypervisor."); + exit(1); + } + return false; +} + +static void pc_machine_get_smm(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + OnOffAuto smm = pcms->smm; + + visit_type_OnOffAuto(v, &smm, name, errp); +} + +static void pc_machine_set_smm(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + visit_type_OnOffAuto(v, &pcms->smm, name, errp); +} + +static bool pc_machine_get_aligned_dimm(Object *obj, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + return pcms->enforce_aligned_dimm; +} + +static void pc_machine_initfn(Object *obj) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int", + pc_machine_get_hotplug_memory_region_size, + NULL, NULL, NULL, NULL); + + pcms->max_ram_below_4g = 1ULL << 32; /* 4G */ + object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G, "size", + pc_machine_get_max_ram_below_4g, + pc_machine_set_max_ram_below_4g, + NULL, NULL, NULL); + object_property_set_description(obj, PC_MACHINE_MAX_RAM_BELOW_4G, + "Maximum ram below the 4G boundary (32bit boundary)", + NULL); + + pcms->smm = ON_OFF_AUTO_AUTO; + object_property_add(obj, PC_MACHINE_SMM, "OnOffAuto", + pc_machine_get_smm, + pc_machine_set_smm, + NULL, NULL, NULL); + object_property_set_description(obj, PC_MACHINE_SMM, + "Enable SMM (pc & q35)", + NULL); + + pcms->vmport = ON_OFF_AUTO_AUTO; + object_property_add(obj, PC_MACHINE_VMPORT, "OnOffAuto", + pc_machine_get_vmport, + pc_machine_set_vmport, + NULL, NULL, NULL); + object_property_set_description(obj, PC_MACHINE_VMPORT, + "Enable vmport (pc & q35)", + NULL); + + pcms->enforce_aligned_dimm = true; + object_property_add_bool(obj, PC_MACHINE_ENFORCE_ALIGNED_DIMM, + pc_machine_get_aligned_dimm, + NULL, NULL); +} + +static unsigned pc_cpu_index_to_socket_id(unsigned cpu_index) +{ + unsigned pkg_id, core_id, smt_id; + x86_topo_ids_from_idx(smp_cores, smp_threads, cpu_index, + &pkg_id, &core_id, &smt_id); + return pkg_id; +} + +static void pc_machine_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + PCMachineClass *pcmc = PC_MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); + + pcmc->get_hotplug_handler = mc->get_hotplug_handler; + mc->get_hotplug_handler = pc_get_hotpug_handler; + mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id; + hc->plug = pc_machine_device_plug_cb; + hc->unplug_request = pc_machine_device_unplug_request_cb; + hc->unplug = pc_machine_device_unplug_cb; +} + +static const TypeInfo pc_machine_info = { + .name = TYPE_PC_MACHINE, + .parent = TYPE_MACHINE, + .abstract = true, + .instance_size = sizeof(PCMachineState), + .instance_init = pc_machine_initfn, + .class_size = sizeof(PCMachineClass), + .class_init = pc_machine_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } + }, +}; + +static void pc_machine_register_types(void) +{ + type_register_static(&pc_machine_info); +} + +type_init(pc_machine_register_types) diff --git a/qemu/hw/i386/pc_piix.c b/qemu/hw/i386/pc_piix.c new file mode 100644 index 000000000..a896624f8 --- /dev/null +++ b/qemu/hw/i386/pc_piix.c @@ -0,0 +1,933 @@ +/* + * QEMU PC System Emulator + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <glib.h> + +#include "hw/hw.h" +#include "hw/loader.h" +#include "hw/i386/pc.h" +#include "hw/i386/apic.h" +#include "hw/i386/smbios.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_ids.h" +#include "hw/usb.h" +#include "net/net.h" +#include "hw/boards.h" +#include "hw/ide.h" +#include "sysemu/kvm.h" +#include "hw/kvm/clock.h" +#include "sysemu/sysemu.h" +#include "hw/sysbus.h" +#include "hw/cpu/icc_bus.h" +#include "sysemu/arch_init.h" +#include "sysemu/block-backend.h" +#include "hw/i2c/smbus.h" +#include "hw/xen/xen.h" +#include "exec/memory.h" +#include "exec/address-spaces.h" +#include "hw/acpi/acpi.h" +#include "cpu.h" +#include "qemu/error-report.h" +#ifdef CONFIG_XEN +# include <xen/hvm/hvm_info_table.h> +#endif +#include "migration/migration.h" + +#define MAX_IDE_BUS 2 + +static const int ide_iobase[MAX_IDE_BUS] = { 0x1f0, 0x170 }; +static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 }; +static const int ide_irq[MAX_IDE_BUS] = { 14, 15 }; + +static bool pci_enabled = true; +static bool has_acpi_build = true; +static bool rsdp_in_ram = true; +static int legacy_acpi_table_size; +static bool smbios_defaults = true; +static bool smbios_legacy_mode; +static bool smbios_uuid_encoded = true; +/* Make sure that guest addresses aligned at 1Gbyte boundaries get mapped to + * host addresses aligned at 1Gbyte boundaries. This way we can use 1GByte + * pages in the host. + */ +static bool gigabyte_align = true; +static bool has_reserved_memory = true; +static bool kvmclock_enabled = true; + +/* PC hardware initialisation */ +static void pc_init1(MachineState *machine) +{ + PCMachineState *pc_machine = PC_MACHINE(machine); + MemoryRegion *system_memory = get_system_memory(); + MemoryRegion *system_io = get_system_io(); + int i; + ram_addr_t below_4g_mem_size, above_4g_mem_size; + PCIBus *pci_bus; + ISABus *isa_bus; + PCII440FXState *i440fx_state; + int piix3_devfn = -1; + qemu_irq *gsi; + qemu_irq *i8259; + qemu_irq smi_irq; + GSIState *gsi_state; + DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS]; + BusState *idebus[MAX_IDE_BUS]; + ISADevice *rtc_state; + MemoryRegion *ram_memory; + MemoryRegion *pci_memory; + MemoryRegion *rom_memory; + DeviceState *icc_bridge; + PcGuestInfo *guest_info; + ram_addr_t lowmem; + + /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory). + * If it doesn't, we need to split it in chunks below and above 4G. + * In any case, try to make sure that guest addresses aligned at + * 1G boundaries get mapped to host addresses aligned at 1G boundaries. + * For old machine types, use whatever split we used historically to avoid + * breaking migration. + */ + if (machine->ram_size >= 0xe0000000) { + lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000; + } else { + lowmem = 0xe0000000; + } + + /* Handle the machine opt max-ram-below-4g. It is basically doing + * min(qemu limit, user limit). + */ + if (lowmem > pc_machine->max_ram_below_4g) { + lowmem = pc_machine->max_ram_below_4g; + if (machine->ram_size - lowmem > lowmem && + lowmem & ((1ULL << 30) - 1)) { + error_report("Warning: Large machine and max_ram_below_4g(%"PRIu64 + ") not a multiple of 1G; possible bad performance.", + pc_machine->max_ram_below_4g); + } + } + + if (machine->ram_size >= lowmem) { + above_4g_mem_size = machine->ram_size - lowmem; + below_4g_mem_size = lowmem; + } else { + above_4g_mem_size = 0; + below_4g_mem_size = machine->ram_size; + } + + if (xen_enabled() && xen_hvm_init(&below_4g_mem_size, &above_4g_mem_size, + &ram_memory) != 0) { + fprintf(stderr, "xen hardware virtual machine initialisation failed\n"); + exit(1); + } + + icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE); + object_property_add_child(qdev_get_machine(), "icc-bridge", + OBJECT(icc_bridge), NULL); + + pc_cpus_init(machine->cpu_model, icc_bridge); + + if (kvm_enabled() && kvmclock_enabled) { + kvmclock_create(); + } + + if (pci_enabled) { + pci_memory = g_new(MemoryRegion, 1); + memory_region_init(pci_memory, NULL, "pci", UINT64_MAX); + rom_memory = pci_memory; + } else { + pci_memory = NULL; + rom_memory = system_memory; + } + + guest_info = pc_guest_info_init(below_4g_mem_size, above_4g_mem_size); + + guest_info->has_acpi_build = has_acpi_build; + guest_info->legacy_acpi_table_size = legacy_acpi_table_size; + + guest_info->isapc_ram_fw = !pci_enabled; + guest_info->has_reserved_memory = has_reserved_memory; + guest_info->rsdp_in_ram = rsdp_in_ram; + + if (smbios_defaults) { + MachineClass *mc = MACHINE_GET_CLASS(machine); + /* These values are guest ABI, do not change */ + smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", + mc->name, smbios_legacy_mode, smbios_uuid_encoded); + } + + /* allocate ram and load rom/bios */ + if (!xen_enabled()) { + pc_memory_init(machine, system_memory, + below_4g_mem_size, above_4g_mem_size, + rom_memory, &ram_memory, guest_info); + } else if (machine->kernel_filename != NULL) { + /* For xen HVM direct kernel boot, load linux here */ + xen_load_linux(machine->kernel_filename, + machine->kernel_cmdline, + machine->initrd_filename, + below_4g_mem_size, + guest_info); + } + + gsi_state = g_malloc0(sizeof(*gsi_state)); + if (kvm_irqchip_in_kernel()) { + kvm_pc_setup_irq_routing(pci_enabled); + gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, + GSI_NUM_PINS); + } else { + gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); + } + + if (pci_enabled) { + pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi, + system_memory, system_io, machine->ram_size, + below_4g_mem_size, + above_4g_mem_size, + pci_memory, ram_memory); + } else { + pci_bus = NULL; + i440fx_state = NULL; + isa_bus = isa_bus_new(NULL, get_system_memory(), system_io); + no_hpet = 1; + } + isa_bus_irqs(isa_bus, gsi); + + if (kvm_irqchip_in_kernel()) { + i8259 = kvm_i8259_init(isa_bus); + } else if (xen_enabled()) { + i8259 = xen_interrupt_controller_init(); + } else { + i8259 = i8259_init(isa_bus, pc_allocate_cpu_irq()); + } + + for (i = 0; i < ISA_NUM_IRQS; i++) { + gsi_state->i8259_irq[i] = i8259[i]; + } + g_free(i8259); + if (pci_enabled) { + ioapic_init_gsi(gsi_state, "i440fx"); + } + qdev_init_nofail(icc_bridge); + + pc_register_ferr_irq(gsi[13]); + + pc_vga_init(isa_bus, pci_enabled ? pci_bus : NULL); + + assert(pc_machine->vmport != ON_OFF_AUTO_MAX); + if (pc_machine->vmport == ON_OFF_AUTO_AUTO) { + pc_machine->vmport = xen_enabled() ? ON_OFF_AUTO_OFF : ON_OFF_AUTO_ON; + } + + /* init basic PC hardware */ + pc_basic_device_init(isa_bus, gsi, &rtc_state, true, + (pc_machine->vmport != ON_OFF_AUTO_ON), 0x4); + + pc_nic_init(isa_bus, pci_bus); + + ide_drive_get(hd, ARRAY_SIZE(hd)); + if (pci_enabled) { + PCIDevice *dev; + if (xen_enabled()) { + dev = pci_piix3_xen_ide_init(pci_bus, hd, piix3_devfn + 1); + } else { + dev = pci_piix3_ide_init(pci_bus, hd, piix3_devfn + 1); + } + idebus[0] = qdev_get_child_bus(&dev->qdev, "ide.0"); + idebus[1] = qdev_get_child_bus(&dev->qdev, "ide.1"); + } else { + for(i = 0; i < MAX_IDE_BUS; i++) { + ISADevice *dev; + char busname[] = "ide.0"; + dev = isa_ide_init(isa_bus, ide_iobase[i], ide_iobase2[i], + ide_irq[i], + hd[MAX_IDE_DEVS * i], hd[MAX_IDE_DEVS * i + 1]); + /* + * The ide bus name is ide.0 for the first bus and ide.1 for the + * second one. + */ + busname[4] = '0' + i; + idebus[i] = qdev_get_child_bus(DEVICE(dev), busname); + } + } + + pc_cmos_init(below_4g_mem_size, above_4g_mem_size, machine->boot_order, + machine, idebus[0], idebus[1], rtc_state); + + if (pci_enabled && usb_enabled()) { + pci_create_simple(pci_bus, piix3_devfn + 2, "piix3-usb-uhci"); + } + + if (pci_enabled && acpi_enabled) { + DeviceState *piix4_pm; + I2CBus *smbus; + + smi_irq = qemu_allocate_irq(pc_acpi_smi_interrupt, first_cpu, 0); + /* TODO: Populate SPD eeprom data. */ + smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100, + gsi[9], smi_irq, + pc_machine_is_smm_enabled(pc_machine), + &piix4_pm); + smbus_eeprom_init(smbus, 8, NULL, 0); + + object_property_add_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP, + TYPE_HOTPLUG_HANDLER, + (Object **)&pc_machine->acpi_dev, + object_property_allow_set_link, + OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort); + object_property_set_link(OBJECT(machine), OBJECT(piix4_pm), + PC_MACHINE_ACPI_DEVICE_PROP, &error_abort); + } + + if (pci_enabled) { + pc_pci_device_init(pci_bus); + } +} + +static void pc_compat_2_3(MachineState *machine) +{ + PCMachineState *pcms = PC_MACHINE(machine); + savevm_skip_section_footers(); + if (kvm_enabled()) { + pcms->smm = ON_OFF_AUTO_OFF; + } + global_state_set_optional(); + savevm_skip_configuration(); +} + +static void pc_compat_2_2(MachineState *machine) +{ + pc_compat_2_3(machine); + rsdp_in_ram = false; + x86_cpu_compat_set_features("kvm64", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("kvm32", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Conroe", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Penryn", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Nehalem", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Westmere", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("SandyBridge", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Haswell", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Broadwell", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G1", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G2", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G3", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G4", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G5", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Haswell", FEAT_1_ECX, 0, CPUID_EXT_F16C); + x86_cpu_compat_set_features("Haswell", FEAT_1_ECX, 0, CPUID_EXT_RDRAND); + x86_cpu_compat_set_features("Broadwell", FEAT_1_ECX, 0, CPUID_EXT_F16C); + x86_cpu_compat_set_features("Broadwell", FEAT_1_ECX, 0, CPUID_EXT_RDRAND); + machine->suppress_vmdesc = true; +} + +static void pc_compat_2_1(MachineState *machine) +{ + PCMachineState *pcms = PC_MACHINE(machine); + + pc_compat_2_2(machine); + smbios_uuid_encoded = false; + x86_cpu_compat_set_features("coreduo", FEAT_1_ECX, CPUID_EXT_VMX, 0); + x86_cpu_compat_set_features("core2duo", FEAT_1_ECX, CPUID_EXT_VMX, 0); + x86_cpu_compat_kvm_no_autodisable(FEAT_8000_0001_ECX, CPUID_EXT3_SVM); + pcms->enforce_aligned_dimm = false; +} + +static void pc_compat_2_0(MachineState *machine) +{ + pc_compat_2_1(machine); + /* This value depends on the actual DSDT and SSDT compiled into + * the source QEMU; unfortunately it depends on the binary and + * not on the machine type, so we cannot make pc-i440fx-1.7 work on + * both QEMU 1.7 and QEMU 2.0. + * + * Large variations cause migration to fail for more than one + * consecutive value of the "-smp" maxcpus option. + * + * For small variations of the kind caused by different iasl versions, + * the 4k rounding usually leaves slack. However, there could be still + * one or two values that break. For QEMU 1.7 and QEMU 2.0 the + * slack is only ~10 bytes before one "-smp maxcpus" value breaks! + * + * 6652 is valid for QEMU 2.0, the right value for pc-i440fx-1.7 on + * QEMU 1.7 it is 6414. For RHEL/CentOS 7.0 it is 6418. + */ + legacy_acpi_table_size = 6652; + smbios_legacy_mode = true; + has_reserved_memory = false; + pc_set_legacy_acpi_data_size(); +} + +static void pc_compat_1_7(MachineState *machine) +{ + pc_compat_2_0(machine); + smbios_defaults = false; + gigabyte_align = false; + option_rom_has_mr = true; + legacy_acpi_table_size = 6414; + x86_cpu_compat_kvm_no_autoenable(FEAT_1_ECX, CPUID_EXT_X2APIC); +} + +static void pc_compat_1_6(MachineState *machine) +{ + pc_compat_1_7(machine); + rom_file_has_mr = false; + has_acpi_build = false; +} + +static void pc_compat_1_5(MachineState *machine) +{ + pc_compat_1_6(machine); +} + +static void pc_compat_1_4(MachineState *machine) +{ + pc_compat_1_5(machine); + x86_cpu_compat_set_features("n270", FEAT_1_ECX, 0, CPUID_EXT_MOVBE); + x86_cpu_compat_set_features("Westmere", FEAT_1_ECX, 0, CPUID_EXT_PCLMULQDQ); +} + +static void pc_compat_1_3(MachineState *machine) +{ + pc_compat_1_4(machine); + enable_compat_apic_id_mode(); +} + +/* PC compat function for pc-0.14 to pc-1.2 */ +static void pc_compat_1_2(MachineState *machine) +{ + pc_compat_1_3(machine); + x86_cpu_compat_kvm_no_autoenable(FEAT_KVM, 1 << KVM_FEATURE_PV_EOI); +} + +/* PC compat function for pc-0.10 to pc-0.13 */ +static void pc_compat_0_13(MachineState *machine) +{ + pc_compat_1_2(machine); + kvmclock_enabled = false; +} + +static void pc_init_isa(MachineState *machine) +{ + pci_enabled = false; + has_acpi_build = false; + smbios_defaults = false; + gigabyte_align = false; + smbios_legacy_mode = true; + has_reserved_memory = false; + option_rom_has_mr = true; + rom_file_has_mr = false; + if (!machine->cpu_model) { + machine->cpu_model = "486"; + } + x86_cpu_compat_kvm_no_autoenable(FEAT_KVM, 1 << KVM_FEATURE_PV_EOI); + enable_compat_apic_id_mode(); + pc_init1(machine); +} + +#ifdef CONFIG_XEN +static void pc_xen_hvm_init(MachineState *machine) +{ + PCIBus *bus; + + pc_init1(machine); + + bus = pci_find_primary_bus(); + if (bus != NULL) { + pci_create_simple(bus, -1, "xen-platform"); + } +} +#endif + +#define DEFINE_I440FX_MACHINE(suffix, name, compatfn, optionfn) \ + static void pc_init_##suffix(MachineState *machine) \ + { \ + void (*compat)(MachineState *m) = (compatfn); \ + if (compat) { \ + compat(machine); \ + } \ + pc_init1(machine); \ + } \ + DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) + +static void pc_i440fx_machine_options(MachineClass *m) +{ + pc_default_machine_options(m); + m->family = "pc_piix"; + m->desc = "Standard PC (i440FX + PIIX, 1996)"; + m->hot_add_cpu = pc_hot_add_cpu; +} + +static void pc_i440fx_2_4_machine_options(MachineClass *m) +{ + pc_i440fx_machine_options(m); + m->default_machine_opts = "firmware=bios-256k.bin"; + m->default_display = "std"; + m->alias = "pc"; + m->is_default = 1; +} + +DEFINE_I440FX_MACHINE(v2_4, "pc-i440fx-2.4", NULL, + pc_i440fx_2_4_machine_options) + + +static void pc_i440fx_2_3_machine_options(MachineClass *m) +{ + pc_i440fx_2_4_machine_options(m); + m->alias = NULL; + m->is_default = 0; + SET_MACHINE_COMPAT(m, PC_COMPAT_2_3); +} + +DEFINE_I440FX_MACHINE(v2_3, "pc-i440fx-2.3", pc_compat_2_3, + pc_i440fx_2_3_machine_options); + + +static void pc_i440fx_2_2_machine_options(MachineClass *m) +{ + pc_i440fx_2_3_machine_options(m); + SET_MACHINE_COMPAT(m, PC_COMPAT_2_2); +} + +DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2, + pc_i440fx_2_2_machine_options); + + +static void pc_i440fx_2_1_machine_options(MachineClass *m) +{ + pc_i440fx_2_2_machine_options(m); + m->default_display = NULL; + SET_MACHINE_COMPAT(m, PC_COMPAT_2_1); +} + +DEFINE_I440FX_MACHINE(v2_1, "pc-i440fx-2.1", pc_compat_2_1, + pc_i440fx_2_1_machine_options); + + + +static void pc_i440fx_2_0_machine_options(MachineClass *m) +{ + pc_i440fx_2_1_machine_options(m); + SET_MACHINE_COMPAT(m, PC_COMPAT_2_0); +} + +DEFINE_I440FX_MACHINE(v2_0, "pc-i440fx-2.0", pc_compat_2_0, + pc_i440fx_2_0_machine_options); + + +static void pc_i440fx_1_7_machine_options(MachineClass *m) +{ + pc_i440fx_2_0_machine_options(m); + m->default_machine_opts = NULL; + SET_MACHINE_COMPAT(m, PC_COMPAT_1_7); +} + +DEFINE_I440FX_MACHINE(v1_7, "pc-i440fx-1.7", pc_compat_1_7, + pc_i440fx_1_7_machine_options); + + +static void pc_i440fx_1_6_machine_options(MachineClass *m) +{ + pc_i440fx_1_7_machine_options(m); + SET_MACHINE_COMPAT(m, PC_COMPAT_1_6); +} + +DEFINE_I440FX_MACHINE(v1_6, "pc-i440fx-1.6", pc_compat_1_6, + pc_i440fx_1_6_machine_options); + + +static void pc_i440fx_1_5_machine_options(MachineClass *m) +{ + pc_i440fx_1_6_machine_options(m); + SET_MACHINE_COMPAT(m, PC_COMPAT_1_5); +} + +DEFINE_I440FX_MACHINE(v1_5, "pc-i440fx-1.5", pc_compat_1_5, + pc_i440fx_1_5_machine_options); + + +static void pc_i440fx_1_4_machine_options(MachineClass *m) +{ + pc_i440fx_1_5_machine_options(m); + m->hot_add_cpu = NULL; + SET_MACHINE_COMPAT(m, PC_COMPAT_1_4); +} + +DEFINE_I440FX_MACHINE(v1_4, "pc-i440fx-1.4", pc_compat_1_4, + pc_i440fx_1_4_machine_options); + + +#define PC_COMPAT_1_3 \ + PC_COMPAT_1_4 \ + {\ + .driver = "usb-tablet",\ + .property = "usb_version",\ + .value = stringify(1),\ + },{\ + .driver = "virtio-net-pci",\ + .property = "ctrl_mac_addr",\ + .value = "off", \ + },{ \ + .driver = "virtio-net-pci", \ + .property = "mq", \ + .value = "off", \ + }, {\ + .driver = "e1000",\ + .property = "autonegotiation",\ + .value = "off",\ + }, + + +static void pc_i440fx_1_3_machine_options(MachineClass *m) +{ + pc_i440fx_1_4_machine_options(m); + SET_MACHINE_COMPAT(m, PC_COMPAT_1_3); +} + +DEFINE_I440FX_MACHINE(v1_3, "pc-1.3", pc_compat_1_3, + pc_i440fx_1_3_machine_options); + + +#define PC_COMPAT_1_2 \ + PC_COMPAT_1_3 \ + {\ + .driver = "nec-usb-xhci",\ + .property = "msi",\ + .value = "off",\ + },{\ + .driver = "nec-usb-xhci",\ + .property = "msix",\ + .value = "off",\ + },{\ + .driver = "ivshmem",\ + .property = "use64",\ + .value = "0",\ + },{\ + .driver = "qxl",\ + .property = "revision",\ + .value = stringify(3),\ + },{\ + .driver = "qxl-vga",\ + .property = "revision",\ + .value = stringify(3),\ + },{\ + .driver = "VGA",\ + .property = "mmio",\ + .value = "off",\ + }, + +static void pc_i440fx_1_2_machine_options(MachineClass *m) +{ + pc_i440fx_1_3_machine_options(m); + SET_MACHINE_COMPAT(m, PC_COMPAT_1_2); +} + +DEFINE_I440FX_MACHINE(v1_2, "pc-1.2", pc_compat_1_2, + pc_i440fx_1_2_machine_options); + + +#define PC_COMPAT_1_1 \ + PC_COMPAT_1_2 \ + {\ + .driver = "virtio-scsi-pci",\ + .property = "hotplug",\ + .value = "off",\ + },{\ + .driver = "virtio-scsi-pci",\ + .property = "param_change",\ + .value = "off",\ + },{\ + .driver = "VGA",\ + .property = "vgamem_mb",\ + .value = stringify(8),\ + },{\ + .driver = "vmware-svga",\ + .property = "vgamem_mb",\ + .value = stringify(8),\ + },{\ + .driver = "qxl-vga",\ + .property = "vgamem_mb",\ + .value = stringify(8),\ + },{\ + .driver = "qxl",\ + .property = "vgamem_mb",\ + .value = stringify(8),\ + },{\ + .driver = "virtio-blk-pci",\ + .property = "config-wce",\ + .value = "off",\ + }, + +static void pc_i440fx_1_1_machine_options(MachineClass *m) +{ + pc_i440fx_1_2_machine_options(m); + SET_MACHINE_COMPAT(m, PC_COMPAT_1_1); +} + +DEFINE_I440FX_MACHINE(v1_1, "pc-1.1", pc_compat_1_2, + pc_i440fx_1_1_machine_options); + + +#define PC_COMPAT_1_0 \ + PC_COMPAT_1_1 \ + {\ + .driver = TYPE_ISA_FDC,\ + .property = "check_media_rate",\ + .value = "off",\ + }, {\ + .driver = "virtio-balloon-pci",\ + .property = "class",\ + .value = stringify(PCI_CLASS_MEMORY_RAM),\ + },{\ + .driver = "apic-common",\ + .property = "vapic",\ + .value = "off",\ + },{\ + .driver = TYPE_USB_DEVICE,\ + .property = "full-path",\ + .value = "no",\ + }, + +static void pc_i440fx_1_0_machine_options(MachineClass *m) +{ + pc_i440fx_1_1_machine_options(m); + m->hw_version = "1.0"; + SET_MACHINE_COMPAT(m, PC_COMPAT_1_0); +} + +DEFINE_I440FX_MACHINE(v1_0, "pc-1.0", pc_compat_1_2, + pc_i440fx_1_0_machine_options); + + +#define PC_COMPAT_0_15 \ + PC_COMPAT_1_0 + +static void pc_i440fx_0_15_machine_options(MachineClass *m) +{ + pc_i440fx_1_0_machine_options(m); + m->hw_version = "0.15"; + SET_MACHINE_COMPAT(m, PC_COMPAT_0_15); +} + +DEFINE_I440FX_MACHINE(v0_15, "pc-0.15", pc_compat_1_2, + pc_i440fx_0_15_machine_options); + + +#define PC_COMPAT_0_14 \ + PC_COMPAT_0_15 \ + {\ + .driver = "virtio-blk-pci",\ + .property = "event_idx",\ + .value = "off",\ + },{\ + .driver = "virtio-serial-pci",\ + .property = "event_idx",\ + .value = "off",\ + },{\ + .driver = "virtio-net-pci",\ + .property = "event_idx",\ + .value = "off",\ + },{\ + .driver = "virtio-balloon-pci",\ + .property = "event_idx",\ + .value = "off",\ + },{\ + .driver = "qxl",\ + .property = "revision",\ + .value = stringify(2),\ + },{\ + .driver = "qxl-vga",\ + .property = "revision",\ + .value = stringify(2),\ + }, + +static void pc_i440fx_0_14_machine_options(MachineClass *m) +{ + pc_i440fx_0_15_machine_options(m); + m->hw_version = "0.14"; + SET_MACHINE_COMPAT(m, PC_COMPAT_0_14); +} + +DEFINE_I440FX_MACHINE(v0_14, "pc-0.14", pc_compat_1_2, + pc_i440fx_0_14_machine_options); + + +#define PC_COMPAT_0_13 \ + PC_COMPAT_0_14 \ + {\ + .driver = TYPE_PCI_DEVICE,\ + .property = "command_serr_enable",\ + .value = "off",\ + },{\ + .driver = "AC97",\ + .property = "use_broken_id",\ + .value = stringify(1),\ + },{\ + .driver = "virtio-9p-pci",\ + .property = "vectors",\ + .value = stringify(0),\ + },{\ + .driver = "VGA",\ + .property = "rombar",\ + .value = stringify(0),\ + },{\ + .driver = "vmware-svga",\ + .property = "rombar",\ + .value = stringify(0),\ + }, + +static void pc_i440fx_0_13_machine_options(MachineClass *m) +{ + pc_i440fx_0_14_machine_options(m); + m->hw_version = "0.13"; + SET_MACHINE_COMPAT(m, PC_COMPAT_0_13); +} + +DEFINE_I440FX_MACHINE(v0_13, "pc-0.13", pc_compat_0_13, + pc_i440fx_0_13_machine_options); + + +#define PC_COMPAT_0_12 \ + PC_COMPAT_0_13 \ + {\ + .driver = "virtio-serial-pci",\ + .property = "max_ports",\ + .value = stringify(1),\ + },{\ + .driver = "virtio-serial-pci",\ + .property = "vectors",\ + .value = stringify(0),\ + },{\ + .driver = "usb-mouse",\ + .property = "serial",\ + .value = "1",\ + },{\ + .driver = "usb-tablet",\ + .property = "serial",\ + .value = "1",\ + },{\ + .driver = "usb-kbd",\ + .property = "serial",\ + .value = "1",\ + }, + +static void pc_i440fx_0_12_machine_options(MachineClass *m) +{ + pc_i440fx_0_13_machine_options(m); + m->hw_version = "0.12"; + SET_MACHINE_COMPAT(m, PC_COMPAT_0_12); +} + +DEFINE_I440FX_MACHINE(v0_12, "pc-0.12", pc_compat_0_13, + pc_i440fx_0_12_machine_options); + + +#define PC_COMPAT_0_11 \ + PC_COMPAT_0_12 \ + {\ + .driver = "virtio-blk-pci",\ + .property = "vectors",\ + .value = stringify(0),\ + },{\ + .driver = TYPE_PCI_DEVICE,\ + .property = "rombar",\ + .value = stringify(0),\ + },{\ + .driver = "ide-drive",\ + .property = "ver",\ + .value = "0.11",\ + },{\ + .driver = "scsi-disk",\ + .property = "ver",\ + .value = "0.11",\ + }, + +static void pc_i440fx_0_11_machine_options(MachineClass *m) +{ + pc_i440fx_0_12_machine_options(m); + m->hw_version = "0.11"; + SET_MACHINE_COMPAT(m, PC_COMPAT_0_11); +} + +DEFINE_I440FX_MACHINE(v0_11, "pc-0.11", pc_compat_0_13, + pc_i440fx_0_11_machine_options); + + +#define PC_COMPAT_0_10 \ + PC_COMPAT_0_11 \ + {\ + .driver = "virtio-blk-pci",\ + .property = "class",\ + .value = stringify(PCI_CLASS_STORAGE_OTHER),\ + },{\ + .driver = "virtio-serial-pci",\ + .property = "class",\ + .value = stringify(PCI_CLASS_DISPLAY_OTHER),\ + },{\ + .driver = "virtio-net-pci",\ + .property = "vectors",\ + .value = stringify(0),\ + },{\ + .driver = "ide-drive",\ + .property = "ver",\ + .value = "0.10",\ + },{\ + .driver = "scsi-disk",\ + .property = "ver",\ + .value = "0.10",\ + }, + +static void pc_i440fx_0_10_machine_options(MachineClass *m) +{ + pc_i440fx_0_11_machine_options(m); + m->hw_version = "0.10"; + SET_MACHINE_COMPAT(m, PC_COMPAT_0_10); +} + +DEFINE_I440FX_MACHINE(v0_10, "pc-0.10", pc_compat_0_13, + pc_i440fx_0_10_machine_options); + + +static void isapc_machine_options(MachineClass *m) +{ + pc_common_machine_options(m); + m->desc = "ISA-only PC"; + m->max_cpus = 1; +} + +DEFINE_PC_MACHINE(isapc, "isapc", pc_init_isa, + isapc_machine_options); + + +#ifdef CONFIG_XEN +static void xenfv_machine_options(MachineClass *m) +{ + pc_common_machine_options(m); + m->desc = "Xen Fully-virtualized PC"; + m->max_cpus = HVM_MAX_VCPUS; + m->default_machine_opts = "accel=xen"; + m->hot_add_cpu = pc_hot_add_cpu; +} + +DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init, + xenfv_machine_options); +#endif diff --git a/qemu/hw/i386/pc_q35.c b/qemu/hw/i386/pc_q35.c new file mode 100644 index 000000000..974aead5a --- /dev/null +++ b/qemu/hw/i386/pc_q35.c @@ -0,0 +1,492 @@ +/* + * Q35 chipset based pc system emulator + * + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2009, 2010 + * Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * Copyright (C) 2012 Jason Baron <jbaron@redhat.com> + * + * This is based on pc.c, but heavily modified. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "hw/hw.h" +#include "hw/loader.h" +#include "sysemu/arch_init.h" +#include "hw/i2c/smbus.h" +#include "hw/boards.h" +#include "hw/timer/mc146818rtc.h" +#include "hw/xen/xen.h" +#include "sysemu/kvm.h" +#include "hw/kvm/clock.h" +#include "hw/pci-host/q35.h" +#include "exec/address-spaces.h" +#include "hw/i386/ich9.h" +#include "hw/i386/smbios.h" +#include "hw/ide/pci.h" +#include "hw/ide/ahci.h" +#include "hw/usb.h" +#include "hw/cpu/icc_bus.h" +#include "qemu/error-report.h" +#include "migration/migration.h" + +/* ICH9 AHCI has 6 ports */ +#define MAX_SATA_PORTS 6 + +static bool has_acpi_build = true; +static bool rsdp_in_ram = true; +static bool smbios_defaults = true; +static bool smbios_legacy_mode; +static bool smbios_uuid_encoded = true; +/* Make sure that guest addresses aligned at 1Gbyte boundaries get mapped to + * host addresses aligned at 1Gbyte boundaries. This way we can use 1GByte + * pages in the host. + */ +static bool gigabyte_align = true; +static bool has_reserved_memory = true; + +/* PC hardware initialisation */ +static void pc_q35_init(MachineState *machine) +{ + PCMachineState *pc_machine = PC_MACHINE(machine); + ram_addr_t below_4g_mem_size, above_4g_mem_size; + Q35PCIHost *q35_host; + PCIHostState *phb; + PCIBus *host_bus; + PCIDevice *lpc; + BusState *idebus[MAX_SATA_PORTS]; + ISADevice *rtc_state; + MemoryRegion *pci_memory; + MemoryRegion *rom_memory; + MemoryRegion *ram_memory; + GSIState *gsi_state; + ISABus *isa_bus; + int pci_enabled = 1; + qemu_irq *gsi; + qemu_irq *i8259; + int i; + ICH9LPCState *ich9_lpc; + PCIDevice *ahci; + DeviceState *icc_bridge; + PcGuestInfo *guest_info; + ram_addr_t lowmem; + DriveInfo *hd[MAX_SATA_PORTS]; + MachineClass *mc = MACHINE_GET_CLASS(machine); + + /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory + * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping + * also known as MMCFG). + * If it doesn't, we need to split it in chunks below and above 4G. + * In any case, try to make sure that guest addresses aligned at + * 1G boundaries get mapped to host addresses aligned at 1G boundaries. + * For old machine types, use whatever split we used historically to avoid + * breaking migration. + */ + if (machine->ram_size >= 0xb0000000) { + lowmem = gigabyte_align ? 0x80000000 : 0xb0000000; + } else { + lowmem = 0xb0000000; + } + + /* Handle the machine opt max-ram-below-4g. It is basically doing + * min(qemu limit, user limit). + */ + if (lowmem > pc_machine->max_ram_below_4g) { + lowmem = pc_machine->max_ram_below_4g; + if (machine->ram_size - lowmem > lowmem && + lowmem & ((1ULL << 30) - 1)) { + error_report("Warning: Large machine and max_ram_below_4g(%"PRIu64 + ") not a multiple of 1G; possible bad performance.", + pc_machine->max_ram_below_4g); + } + } + + if (machine->ram_size >= lowmem) { + above_4g_mem_size = machine->ram_size - lowmem; + below_4g_mem_size = lowmem; + } else { + above_4g_mem_size = 0; + below_4g_mem_size = machine->ram_size; + } + + if (xen_enabled() && xen_hvm_init(&below_4g_mem_size, &above_4g_mem_size, + &ram_memory) != 0) { + fprintf(stderr, "xen hardware virtual machine initialisation failed\n"); + exit(1); + } + + icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE); + object_property_add_child(qdev_get_machine(), "icc-bridge", + OBJECT(icc_bridge), NULL); + + pc_cpus_init(machine->cpu_model, icc_bridge); + pc_acpi_init("q35-acpi-dsdt.aml"); + + kvmclock_create(); + + /* pci enabled */ + if (pci_enabled) { + pci_memory = g_new(MemoryRegion, 1); + memory_region_init(pci_memory, NULL, "pci", UINT64_MAX); + rom_memory = pci_memory; + } else { + pci_memory = NULL; + rom_memory = get_system_memory(); + } + + guest_info = pc_guest_info_init(below_4g_mem_size, above_4g_mem_size); + guest_info->isapc_ram_fw = false; + guest_info->has_acpi_build = has_acpi_build; + guest_info->has_reserved_memory = has_reserved_memory; + guest_info->rsdp_in_ram = rsdp_in_ram; + + /* Migration was not supported in 2.0 for Q35, so do not bother + * with this hack (see hw/i386/acpi-build.c). + */ + guest_info->legacy_acpi_table_size = 0; + + if (smbios_defaults) { + /* These values are guest ABI, do not change */ + smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", + mc->name, smbios_legacy_mode, smbios_uuid_encoded); + } + + /* allocate ram and load rom/bios */ + if (!xen_enabled()) { + pc_memory_init(machine, get_system_memory(), + below_4g_mem_size, above_4g_mem_size, + rom_memory, &ram_memory, guest_info); + } + + /* irq lines */ + gsi_state = g_malloc0(sizeof(*gsi_state)); + if (kvm_irqchip_in_kernel()) { + kvm_pc_setup_irq_routing(pci_enabled); + gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state, + GSI_NUM_PINS); + } else { + gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS); + } + + /* create pci host bus */ + q35_host = Q35_HOST_DEVICE(qdev_create(NULL, TYPE_Q35_HOST_DEVICE)); + + object_property_add_child(qdev_get_machine(), "q35", OBJECT(q35_host), NULL); + q35_host->mch.ram_memory = ram_memory; + q35_host->mch.pci_address_space = pci_memory; + q35_host->mch.system_memory = get_system_memory(); + q35_host->mch.address_space_io = get_system_io(); + q35_host->mch.below_4g_mem_size = below_4g_mem_size; + q35_host->mch.above_4g_mem_size = above_4g_mem_size; + q35_host->mch.guest_info = guest_info; + /* pci */ + qdev_init_nofail(DEVICE(q35_host)); + phb = PCI_HOST_BRIDGE(q35_host); + host_bus = phb->bus; + /* create ISA bus */ + lpc = pci_create_simple_multifunction(host_bus, PCI_DEVFN(ICH9_LPC_DEV, + ICH9_LPC_FUNC), true, + TYPE_ICH9_LPC_DEVICE); + + object_property_add_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP, + TYPE_HOTPLUG_HANDLER, + (Object **)&pc_machine->acpi_dev, + object_property_allow_set_link, + OBJ_PROP_LINK_UNREF_ON_RELEASE, &error_abort); + object_property_set_link(OBJECT(machine), OBJECT(lpc), + PC_MACHINE_ACPI_DEVICE_PROP, &error_abort); + + ich9_lpc = ICH9_LPC_DEVICE(lpc); + ich9_lpc->pic = gsi; + ich9_lpc->ioapic = gsi_state->ioapic_irq; + pci_bus_irqs(host_bus, ich9_lpc_set_irq, ich9_lpc_map_irq, ich9_lpc, + ICH9_LPC_NB_PIRQS); + pci_bus_set_route_irq_fn(host_bus, ich9_route_intx_pin_to_irq); + isa_bus = ich9_lpc->isa_bus; + + /*end early*/ + isa_bus_irqs(isa_bus, gsi); + + if (kvm_irqchip_in_kernel()) { + i8259 = kvm_i8259_init(isa_bus); + } else if (xen_enabled()) { + i8259 = xen_interrupt_controller_init(); + } else { + i8259 = i8259_init(isa_bus, pc_allocate_cpu_irq()); + } + + for (i = 0; i < ISA_NUM_IRQS; i++) { + gsi_state->i8259_irq[i] = i8259[i]; + } + if (pci_enabled) { + ioapic_init_gsi(gsi_state, "q35"); + } + qdev_init_nofail(icc_bridge); + + pc_register_ferr_irq(gsi[13]); + + assert(pc_machine->vmport != ON_OFF_AUTO_MAX); + if (pc_machine->vmport == ON_OFF_AUTO_AUTO) { + pc_machine->vmport = xen_enabled() ? ON_OFF_AUTO_OFF : ON_OFF_AUTO_ON; + } + + /* init basic PC hardware */ + pc_basic_device_init(isa_bus, gsi, &rtc_state, !mc->no_floppy, + (pc_machine->vmport != ON_OFF_AUTO_ON), 0xff0104); + + /* connect pm stuff to lpc */ + ich9_lpc_pm_init(lpc, pc_machine_is_smm_enabled(pc_machine), !mc->no_tco); + + /* ahci and SATA device, for q35 1 ahci controller is built-in */ + ahci = pci_create_simple_multifunction(host_bus, + PCI_DEVFN(ICH9_SATA1_DEV, + ICH9_SATA1_FUNC), + true, "ich9-ahci"); + idebus[0] = qdev_get_child_bus(&ahci->qdev, "ide.0"); + idebus[1] = qdev_get_child_bus(&ahci->qdev, "ide.1"); + g_assert(MAX_SATA_PORTS == ICH_AHCI(ahci)->ahci.ports); + ide_drive_get(hd, ICH_AHCI(ahci)->ahci.ports); + ahci_ide_create_devs(ahci, hd); + + if (usb_enabled()) { + /* Should we create 6 UHCI according to ich9 spec? */ + ehci_create_ich9_with_companions(host_bus, 0x1d); + } + + /* TODO: Populate SPD eeprom data. */ + smbus_eeprom_init(ich9_smb_init(host_bus, + PCI_DEVFN(ICH9_SMB_DEV, ICH9_SMB_FUNC), + 0xb100), + 8, NULL, 0); + + pc_cmos_init(below_4g_mem_size, above_4g_mem_size, machine->boot_order, + machine, idebus[0], idebus[1], rtc_state); + + /* the rest devices to which pci devfn is automatically assigned */ + pc_vga_init(isa_bus, host_bus); + pc_nic_init(isa_bus, host_bus); + if (pci_enabled) { + pc_pci_device_init(host_bus); + } +} + +static void pc_compat_2_3(MachineState *machine) +{ + PCMachineState *pcms = PC_MACHINE(machine); + savevm_skip_section_footers(); + if (kvm_enabled()) { + pcms->smm = ON_OFF_AUTO_OFF; + } + global_state_set_optional(); + savevm_skip_configuration(); +} + +static void pc_compat_2_2(MachineState *machine) +{ + pc_compat_2_3(machine); + rsdp_in_ram = false; + x86_cpu_compat_set_features("kvm64", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("kvm32", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Conroe", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Penryn", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Nehalem", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Westmere", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("SandyBridge", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Haswell", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Broadwell", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G1", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G2", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G3", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G4", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Opteron_G5", FEAT_1_EDX, 0, CPUID_VME); + x86_cpu_compat_set_features("Haswell", FEAT_1_ECX, 0, CPUID_EXT_F16C); + x86_cpu_compat_set_features("Haswell", FEAT_1_ECX, 0, CPUID_EXT_RDRAND); + x86_cpu_compat_set_features("Broadwell", FEAT_1_ECX, 0, CPUID_EXT_F16C); + x86_cpu_compat_set_features("Broadwell", FEAT_1_ECX, 0, CPUID_EXT_RDRAND); + machine->suppress_vmdesc = true; +} + +static void pc_compat_2_1(MachineState *machine) +{ + PCMachineState *pcms = PC_MACHINE(machine); + + pc_compat_2_2(machine); + pcms->enforce_aligned_dimm = false; + smbios_uuid_encoded = false; + x86_cpu_compat_set_features("coreduo", FEAT_1_ECX, CPUID_EXT_VMX, 0); + x86_cpu_compat_set_features("core2duo", FEAT_1_ECX, CPUID_EXT_VMX, 0); + x86_cpu_compat_kvm_no_autodisable(FEAT_8000_0001_ECX, CPUID_EXT3_SVM); +} + +static void pc_compat_2_0(MachineState *machine) +{ + pc_compat_2_1(machine); + smbios_legacy_mode = true; + has_reserved_memory = false; + pc_set_legacy_acpi_data_size(); +} + +static void pc_compat_1_7(MachineState *machine) +{ + pc_compat_2_0(machine); + smbios_defaults = false; + gigabyte_align = false; + option_rom_has_mr = true; + x86_cpu_compat_kvm_no_autoenable(FEAT_1_ECX, CPUID_EXT_X2APIC); +} + +static void pc_compat_1_6(MachineState *machine) +{ + pc_compat_1_7(machine); + rom_file_has_mr = false; + has_acpi_build = false; +} + +static void pc_compat_1_5(MachineState *machine) +{ + pc_compat_1_6(machine); +} + +static void pc_compat_1_4(MachineState *machine) +{ + pc_compat_1_5(machine); + x86_cpu_compat_set_features("n270", FEAT_1_ECX, 0, CPUID_EXT_MOVBE); + x86_cpu_compat_set_features("Westmere", FEAT_1_ECX, 0, CPUID_EXT_PCLMULQDQ); +} + +#define DEFINE_Q35_MACHINE(suffix, name, compatfn, optionfn) \ + static void pc_init_##suffix(MachineState *machine) \ + { \ + void (*compat)(MachineState *m) = (compatfn); \ + if (compat) { \ + compat(machine); \ + } \ + pc_q35_init(machine); \ + } \ + DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) + + +static void pc_q35_machine_options(MachineClass *m) +{ + pc_default_machine_options(m); + m->family = "pc_q35"; + m->desc = "Standard PC (Q35 + ICH9, 2009)"; + m->hot_add_cpu = pc_hot_add_cpu; + m->units_per_default_bus = 1; +} + +static void pc_q35_2_4_machine_options(MachineClass *m) +{ + pc_q35_machine_options(m); + m->default_machine_opts = "firmware=bios-256k.bin"; + m->default_display = "std"; + m->no_floppy = 1; + m->no_tco = 0; + m->alias = "q35"; +} + +DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, + pc_q35_2_4_machine_options); + + +static void pc_q35_2_3_machine_options(MachineClass *m) +{ + pc_q35_2_4_machine_options(m); + m->no_floppy = 0; + m->no_tco = 1; + m->alias = NULL; + SET_MACHINE_COMPAT(m, PC_COMPAT_2_3); +} + +DEFINE_Q35_MACHINE(v2_3, "pc-q35-2.3", pc_compat_2_3, + pc_q35_2_3_machine_options); + + +static void pc_q35_2_2_machine_options(MachineClass *m) +{ + pc_q35_2_3_machine_options(m); + SET_MACHINE_COMPAT(m, PC_COMPAT_2_2); +} + +DEFINE_Q35_MACHINE(v2_2, "pc-q35-2.2", pc_compat_2_2, + pc_q35_2_2_machine_options); + + +static void pc_q35_2_1_machine_options(MachineClass *m) +{ + pc_q35_2_2_machine_options(m); + m->default_display = NULL; + SET_MACHINE_COMPAT(m, PC_COMPAT_2_1); +} + +DEFINE_Q35_MACHINE(v2_1, "pc-q35-2.1", pc_compat_2_1, + pc_q35_2_1_machine_options); + + +static void pc_q35_2_0_machine_options(MachineClass *m) +{ + pc_q35_2_1_machine_options(m); + SET_MACHINE_COMPAT(m, PC_COMPAT_2_0); +} + +DEFINE_Q35_MACHINE(v2_0, "pc-q35-2.0", pc_compat_2_0, + pc_q35_2_0_machine_options); + + +static void pc_q35_1_7_machine_options(MachineClass *m) +{ + pc_q35_2_0_machine_options(m); + m->default_machine_opts = NULL; + SET_MACHINE_COMPAT(m, PC_COMPAT_1_7); +} + +DEFINE_Q35_MACHINE(v1_7, "pc-q35-1.7", pc_compat_1_7, + pc_q35_1_7_machine_options); + + +static void pc_q35_1_6_machine_options(MachineClass *m) +{ + pc_q35_machine_options(m); + SET_MACHINE_COMPAT(m, PC_COMPAT_1_6); +} + +DEFINE_Q35_MACHINE(v1_6, "pc-q35-1.6", pc_compat_1_6, + pc_q35_1_6_machine_options); + + +static void pc_q35_1_5_machine_options(MachineClass *m) +{ + pc_q35_1_6_machine_options(m); + SET_MACHINE_COMPAT(m, PC_COMPAT_1_5); +} + +DEFINE_Q35_MACHINE(v1_5, "pc-q35-1.5", pc_compat_1_5, + pc_q35_1_5_machine_options); + + +static void pc_q35_1_4_machine_options(MachineClass *m) +{ + pc_q35_1_5_machine_options(m); + m->hot_add_cpu = NULL; + SET_MACHINE_COMPAT(m, PC_COMPAT_1_4); +} + +DEFINE_Q35_MACHINE(v1_4, "pc-q35-1.4", pc_compat_1_4, + pc_q35_1_4_machine_options); diff --git a/qemu/hw/i386/pc_sysfw.c b/qemu/hw/i386/pc_sysfw.c new file mode 100644 index 000000000..662d99768 --- /dev/null +++ b/qemu/hw/i386/pc_sysfw.c @@ -0,0 +1,251 @@ +/* + * QEMU PC System Firmware + * + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2011-2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "sysemu/block-backend.h" +#include "qemu/error-report.h" +#include "hw/sysbus.h" +#include "hw/hw.h" +#include "hw/i386/pc.h" +#include "hw/boards.h" +#include "hw/loader.h" +#include "sysemu/sysemu.h" +#include "hw/block/flash.h" +#include "sysemu/kvm.h" + +#define BIOS_FILENAME "bios.bin" + +typedef struct PcSysFwDevice { + SysBusDevice busdev; + uint8_t isapc_ram_fw; +} PcSysFwDevice; + +static void pc_isa_bios_init(MemoryRegion *rom_memory, + MemoryRegion *flash_mem, + int ram_size) +{ + int isa_bios_size; + MemoryRegion *isa_bios; + uint64_t flash_size; + void *flash_ptr, *isa_bios_ptr; + + flash_size = memory_region_size(flash_mem); + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(flash_size, 128 * 1024); + isa_bios = g_malloc(sizeof(*isa_bios)); + memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, + &error_abort); + vmstate_register_ram_global(isa_bios); + memory_region_add_subregion_overlap(rom_memory, + 0x100000 - isa_bios_size, + isa_bios, + 1); + + /* copy ISA rom image from top of flash memory */ + flash_ptr = memory_region_get_ram_ptr(flash_mem); + isa_bios_ptr = memory_region_get_ram_ptr(isa_bios); + memcpy(isa_bios_ptr, + ((uint8_t*)flash_ptr) + (flash_size - isa_bios_size), + isa_bios_size); + + memory_region_set_readonly(isa_bios, true); +} + +#define FLASH_MAP_UNIT_MAX 2 + +/* We don't have a theoretically justifiable exact lower bound on the base + * address of any flash mapping. In practice, the IO-APIC MMIO range is + * [0xFEE00000..0xFEE01000[ -- see IO_APIC_DEFAULT_ADDRESS --, leaving free + * only 18MB-4KB below 4G. For now, restrict the cumulative mapping to 8MB in + * size. + */ +#define FLASH_MAP_BASE_MIN ((hwaddr)(0x100000000ULL - 8*1024*1024)) + +/* This function maps flash drives from 4G downward, in order of their unit + * numbers. The mapping starts at unit#0, with unit number increments of 1, and + * stops before the first missing flash drive, or before + * unit#FLASH_MAP_UNIT_MAX, whichever is reached first. + * + * Addressing within one flash drive is of course not reversed. + * + * An error message is printed and the process exits if: + * - the size of the backing file for a flash drive is non-positive, or not a + * multiple of the required sector size, or + * - the current mapping's base address would fall below FLASH_MAP_BASE_MIN. + * + * The drive with unit#0 (if available) is mapped at the highest address, and + * it is passed to pc_isa_bios_init(). Merging several drives for isa-bios is + * not supported. + */ +static void pc_system_flash_init(MemoryRegion *rom_memory) +{ + int unit; + DriveInfo *pflash_drv; + BlockBackend *blk; + int64_t size; + char *fatal_errmsg = NULL; + hwaddr phys_addr = 0x100000000ULL; + int sector_bits, sector_size; + pflash_t *system_flash; + MemoryRegion *flash_mem; + char name[64]; + + sector_bits = 12; + sector_size = 1 << sector_bits; + + for (unit = 0; + (unit < FLASH_MAP_UNIT_MAX && + (pflash_drv = drive_get(IF_PFLASH, 0, unit)) != NULL); + ++unit) { + blk = blk_by_legacy_dinfo(pflash_drv); + size = blk_getlength(blk); + if (size < 0) { + fatal_errmsg = g_strdup_printf("failed to get backing file size"); + } else if (size == 0) { + fatal_errmsg = g_strdup_printf("PC system firmware (pflash) " + "cannot have zero size"); + } else if ((size % sector_size) != 0) { + fatal_errmsg = g_strdup_printf("PC system firmware (pflash) " + "must be a multiple of 0x%x", sector_size); + } else if (phys_addr < size || phys_addr - size < FLASH_MAP_BASE_MIN) { + fatal_errmsg = g_strdup_printf("oversized backing file, pflash " + "segments cannot be mapped under " + TARGET_FMT_plx, FLASH_MAP_BASE_MIN); + } + if (fatal_errmsg != NULL) { + Location loc; + + /* push a new, "none" location on the location stack; overwrite its + * contents with the location saved in the option; print the error + * (includes location); pop the top + */ + loc_push_none(&loc); + if (pflash_drv->opts != NULL) { + qemu_opts_loc_restore(pflash_drv->opts); + } + error_report("%s", fatal_errmsg); + loc_pop(&loc); + g_free(fatal_errmsg); + exit(1); + } + + phys_addr -= size; + + /* pflash_cfi01_register() creates a deep copy of the name */ + snprintf(name, sizeof name, "system.flash%d", unit); + system_flash = pflash_cfi01_register(phys_addr, NULL /* qdev */, name, + size, blk, sector_size, + size >> sector_bits, + 1 /* width */, + 0x0000 /* id0 */, + 0x0000 /* id1 */, + 0x0000 /* id2 */, + 0x0000 /* id3 */, + 0 /* be */); + if (unit == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); + pc_isa_bios_init(rom_memory, flash_mem, size); + } + } +} + +static void old_pc_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw) +{ + char *filename; + MemoryRegion *bios, *isa_bios; + int bios_size, isa_bios_size; + int ret; + + /* BIOS load */ + if (bios_name == NULL) { + bios_name = BIOS_FILENAME; + } + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (filename) { + bios_size = get_image_size(filename); + } else { + bios_size = -1; + } + if (bios_size <= 0 || + (bios_size % 65536) != 0) { + goto bios_error; + } + bios = g_malloc(sizeof(*bios)); + memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_abort); + vmstate_register_ram_global(bios); + if (!isapc_ram_fw) { + memory_region_set_readonly(bios, true); + } + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); + if (ret != 0) { + bios_error: + fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); + exit(1); + } + g_free(filename); + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = bios_size; + if (isa_bios_size > (128 * 1024)) { + isa_bios_size = 128 * 1024; + } + isa_bios = g_malloc(sizeof(*isa_bios)); + memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, + bios_size - isa_bios_size, isa_bios_size); + memory_region_add_subregion_overlap(rom_memory, + 0x100000 - isa_bios_size, + isa_bios, + 1); + if (!isapc_ram_fw) { + memory_region_set_readonly(isa_bios, true); + } + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, + (uint32_t)(-bios_size), + bios); +} + +void pc_system_firmware_init(MemoryRegion *rom_memory, bool isapc_ram_fw) +{ + DriveInfo *pflash_drv; + + pflash_drv = drive_get(IF_PFLASH, 0, 0); + + if (isapc_ram_fw || pflash_drv == NULL) { + /* When a pflash drive is not found, use rom-mode */ + old_pc_system_rom_init(rom_memory, isapc_ram_fw); + return; + } + + if (kvm_enabled() && !kvm_readonly_mem_enabled()) { + /* Older KVM cannot execute from device memory. So, flash memory + * cannot be used unless the readonly memory kvm capability is present. */ + fprintf(stderr, "qemu: pflash with kvm requires KVM readonly memory support\n"); + exit(1); + } + + pc_system_flash_init(rom_memory); +} diff --git a/qemu/hw/i386/q35-acpi-dsdt.dsl b/qemu/hw/i386/q35-acpi-dsdt.dsl new file mode 100644 index 000000000..16eaca3fa --- /dev/null +++ b/qemu/hw/i386/q35-acpi-dsdt.dsl @@ -0,0 +1,435 @@ +/* + * Bochs/QEMU ACPI DSDT ASL definition + * + * Copyright (c) 2006 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2 as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +/* + * Copyright (c) 2010 Isaku Yamahata + * yamahata at valinux co jp + * Based on acpi-dsdt.dsl, but heavily modified for q35 chipset. + */ + +ACPI_EXTRACT_ALL_CODE Q35AcpiDsdtAmlCode + +DefinitionBlock ( + "q35-acpi-dsdt.aml",// Output Filename + "DSDT", // Signature + 0x01, // DSDT Compliance Revision + "BXPC", // OEMID + "BXDSDT", // TABLE ID + 0x2 // OEM Revision + ) +{ + +#include "acpi-dsdt-dbug.dsl" + + Scope(\_SB) { + OperationRegion(PCST, SystemIO, 0xae00, 0x0c) + OperationRegion(PCSB, SystemIO, 0xae0c, 0x01) + Field(PCSB, AnyAcc, NoLock, WriteAsZeros) { + PCIB, 8, + } + } + + +/**************************************************************** + * PCI Bus definition + ****************************************************************/ + Scope(\_SB) { + Device(PCI0) { + Name(_HID, EisaId("PNP0A08")) + Name(_CID, EisaId("PNP0A03")) + Name(_ADR, 0x00) + Name(_UID, 1) + + External(ISA, DeviceObj) + + // _OSC: based on sample of ACPI3.0b spec + Name(SUPP, 0) // PCI _OSC Support Field value + Name(CTRL, 0) // PCI _OSC Control Field value + Method(_OSC, 4) { + // Create DWORD-addressable fields from the Capabilities Buffer + CreateDWordField(Arg3, 0, CDW1) + + // Check for proper UUID + If (LEqual(Arg0, ToUUID("33DB4D5B-1FF7-401C-9657-7441C03DD766"))) { + // Create DWORD-addressable fields from the Capabilities Buffer + CreateDWordField(Arg3, 4, CDW2) + CreateDWordField(Arg3, 8, CDW3) + + // Save Capabilities DWORD2 & 3 + Store(CDW2, SUPP) + Store(CDW3, CTRL) + + // Always allow native PME, AER (no dependencies) + // Never allow SHPC (no SHPC controller in this system) + And(CTRL, 0x1D, CTRL) + +#if 0 // For now, nothing to do + If (Not(And(CDW1, 1))) { // Query flag clear? + // Disable GPEs for features granted native control. + If (And(CTRL, 0x01)) { // Hot plug control granted? + Store(0, HPCE) // clear the hot plug SCI enable bit + Store(1, HPCS) // clear the hot plug SCI status bit + } + If (And(CTRL, 0x04)) { // PME control granted? + Store(0, PMCE) // clear the PME SCI enable bit + Store(1, PMCS) // clear the PME SCI status bit + } + If (And(CTRL, 0x10)) { // OS restoring PCI Express cap structure? + // Set status to not restore PCI Express cap structure + // upon resume from S3 + Store(1, S3CR) + } + } +#endif + If (LNotEqual(Arg1, One)) { + // Unknown revision + Or(CDW1, 0x08, CDW1) + } + If (LNotEqual(CDW3, CTRL)) { + // Capabilities bits were masked + Or(CDW1, 0x10, CDW1) + } + // Update DWORD3 in the buffer + Store(CTRL, CDW3) + } Else { + Or(CDW1, 4, CDW1) // Unrecognized UUID + } + Return (Arg3) + } + } + } + +#include "acpi-dsdt-hpet.dsl" + + +/**************************************************************** + * LPC ISA bridge + ****************************************************************/ + + Scope(\_SB.PCI0) { + /* PCI D31:f0 LPC ISA bridge */ + Device(ISA) { + Name (_ADR, 0x001F0000) // _ADR: Address + + /* ICH9 PCI to ISA irq remapping */ + OperationRegion(PIRQ, PCI_Config, 0x60, 0x0C) + + OperationRegion(LPCD, PCI_Config, 0x80, 0x2) + Field(LPCD, AnyAcc, NoLock, Preserve) { + COMA, 3, + , 1, + COMB, 3, + + Offset(0x01), + LPTD, 2, + , 2, + FDCD, 2 + } + OperationRegion(LPCE, PCI_Config, 0x82, 0x2) + Field(LPCE, AnyAcc, NoLock, Preserve) { + CAEN, 1, + CBEN, 1, + LPEN, 1, + FDEN, 1 + } + } + } + +#include "acpi-dsdt-isa.dsl" + + +/**************************************************************** + * PCI IRQs + ****************************************************************/ + + /* Zero => PIC mode, One => APIC Mode */ + Name(\PICF, Zero) + Method(\_PIC, 1, NotSerialized) { + Store(Arg0, \PICF) + } + + Scope(\_SB) { + Scope(PCI0) { +#define prt_slot_lnk(nr, lnk0, lnk1, lnk2, lnk3) \ + Package() { nr##ffff, 0, lnk0, 0 }, \ + Package() { nr##ffff, 1, lnk1, 0 }, \ + Package() { nr##ffff, 2, lnk2, 0 }, \ + Package() { nr##ffff, 3, lnk3, 0 } + +#define prt_slot_lnkA(nr) prt_slot_lnk(nr, LNKA, LNKB, LNKC, LNKD) +#define prt_slot_lnkB(nr) prt_slot_lnk(nr, LNKB, LNKC, LNKD, LNKA) +#define prt_slot_lnkC(nr) prt_slot_lnk(nr, LNKC, LNKD, LNKA, LNKB) +#define prt_slot_lnkD(nr) prt_slot_lnk(nr, LNKD, LNKA, LNKB, LNKC) + +#define prt_slot_lnkE(nr) prt_slot_lnk(nr, LNKE, LNKF, LNKG, LNKH) +#define prt_slot_lnkF(nr) prt_slot_lnk(nr, LNKF, LNKG, LNKH, LNKE) +#define prt_slot_lnkG(nr) prt_slot_lnk(nr, LNKG, LNKH, LNKE, LNKF) +#define prt_slot_lnkH(nr) prt_slot_lnk(nr, LNKH, LNKE, LNKF, LNKG) + + Name(PRTP, package() { + prt_slot_lnkE(0x0000), + prt_slot_lnkF(0x0001), + prt_slot_lnkG(0x0002), + prt_slot_lnkH(0x0003), + prt_slot_lnkE(0x0004), + prt_slot_lnkF(0x0005), + prt_slot_lnkG(0x0006), + prt_slot_lnkH(0x0007), + prt_slot_lnkE(0x0008), + prt_slot_lnkF(0x0009), + prt_slot_lnkG(0x000a), + prt_slot_lnkH(0x000b), + prt_slot_lnkE(0x000c), + prt_slot_lnkF(0x000d), + prt_slot_lnkG(0x000e), + prt_slot_lnkH(0x000f), + prt_slot_lnkE(0x0010), + prt_slot_lnkF(0x0011), + prt_slot_lnkG(0x0012), + prt_slot_lnkH(0x0013), + prt_slot_lnkE(0x0014), + prt_slot_lnkF(0x0015), + prt_slot_lnkG(0x0016), + prt_slot_lnkH(0x0017), + prt_slot_lnkE(0x0018), + + /* INTA -> PIRQA for slot 25 - 31 + see the default value of D<N>IR */ + prt_slot_lnkA(0x0019), + prt_slot_lnkA(0x001a), + prt_slot_lnkA(0x001b), + prt_slot_lnkA(0x001c), + prt_slot_lnkA(0x001d), + + /* PCIe->PCI bridge. use PIRQ[E-H] */ + prt_slot_lnkE(0x001e), + + prt_slot_lnkA(0x001f) + }) + +#define prt_slot_gsi(nr, gsi0, gsi1, gsi2, gsi3) \ + Package() { nr##ffff, 0, gsi0, 0 }, \ + Package() { nr##ffff, 1, gsi1, 0 }, \ + Package() { nr##ffff, 2, gsi2, 0 }, \ + Package() { nr##ffff, 3, gsi3, 0 } + +#define prt_slot_gsiA(nr) prt_slot_gsi(nr, GSIA, GSIB, GSIC, GSID) +#define prt_slot_gsiB(nr) prt_slot_gsi(nr, GSIB, GSIC, GSID, GSIA) +#define prt_slot_gsiC(nr) prt_slot_gsi(nr, GSIC, GSID, GSIA, GSIB) +#define prt_slot_gsiD(nr) prt_slot_gsi(nr, GSID, GSIA, GSIB, GSIC) + +#define prt_slot_gsiE(nr) prt_slot_gsi(nr, GSIE, GSIF, GSIG, GSIH) +#define prt_slot_gsiF(nr) prt_slot_gsi(nr, GSIF, GSIG, GSIH, GSIE) +#define prt_slot_gsiG(nr) prt_slot_gsi(nr, GSIG, GSIH, GSIE, GSIF) +#define prt_slot_gsiH(nr) prt_slot_gsi(nr, GSIH, GSIE, GSIF, GSIG) + + Name(PRTA, package() { + prt_slot_gsiE(0x0000), + prt_slot_gsiF(0x0001), + prt_slot_gsiG(0x0002), + prt_slot_gsiH(0x0003), + prt_slot_gsiE(0x0004), + prt_slot_gsiF(0x0005), + prt_slot_gsiG(0x0006), + prt_slot_gsiH(0x0007), + prt_slot_gsiE(0x0008), + prt_slot_gsiF(0x0009), + prt_slot_gsiG(0x000a), + prt_slot_gsiH(0x000b), + prt_slot_gsiE(0x000c), + prt_slot_gsiF(0x000d), + prt_slot_gsiG(0x000e), + prt_slot_gsiH(0x000f), + prt_slot_gsiE(0x0010), + prt_slot_gsiF(0x0011), + prt_slot_gsiG(0x0012), + prt_slot_gsiH(0x0013), + prt_slot_gsiE(0x0014), + prt_slot_gsiF(0x0015), + prt_slot_gsiG(0x0016), + prt_slot_gsiH(0x0017), + prt_slot_gsiE(0x0018), + + /* INTA -> PIRQA for slot 25 - 31, but 30 + see the default value of D<N>IR */ + prt_slot_gsiA(0x0019), + prt_slot_gsiA(0x001a), + prt_slot_gsiA(0x001b), + prt_slot_gsiA(0x001c), + prt_slot_gsiA(0x001d), + + /* PCIe->PCI bridge. use PIRQ[E-H] */ + prt_slot_gsiE(0x001e), + + prt_slot_gsiA(0x001f) + }) + + Method(_PRT, 0, NotSerialized) { + /* PCI IRQ routing table, example from ACPI 2.0a specification, + section 6.2.8.1 */ + /* Note: we provide the same info as the PCI routing + table of the Bochs BIOS */ + If (LEqual(\PICF, Zero)) { + Return (PRTP) + } Else { + Return (PRTA) + } + } + } + + Field(PCI0.ISA.PIRQ, ByteAcc, NoLock, Preserve) { + PRQA, 8, + PRQB, 8, + PRQC, 8, + PRQD, 8, + + Offset(0x08), + PRQE, 8, + PRQF, 8, + PRQG, 8, + PRQH, 8 + } + + Method(IQST, 1, NotSerialized) { + // _STA method - get status + If (And(0x80, Arg0)) { + Return (0x09) + } + Return (0x0B) + } + Method(IQCR, 1, Serialized) { + // _CRS method - get current settings + Name(PRR0, ResourceTemplate() { + Interrupt(, Level, ActiveHigh, Shared) { 0 } + }) + CreateDWordField(PRR0, 0x05, PRRI) + Store(And(Arg0, 0x0F), PRRI) + Return (PRR0) + } + +#define define_link(link, uid, reg) \ + Device(link) { \ + Name(_HID, EISAID("PNP0C0F")) \ + Name(_UID, uid) \ + Name(_PRS, ResourceTemplate() { \ + Interrupt(, Level, ActiveHigh, Shared) { \ + 5, 10, 11 \ + } \ + }) \ + Method(_STA, 0, NotSerialized) { \ + Return (IQST(reg)) \ + } \ + Method(_DIS, 0, NotSerialized) { \ + Or(reg, 0x80, reg) \ + } \ + Method(_CRS, 0, NotSerialized) { \ + Return (IQCR(reg)) \ + } \ + Method(_SRS, 1, NotSerialized) { \ + CreateDWordField(Arg0, 0x05, PRRI) \ + Store(PRRI, reg) \ + } \ + } + + define_link(LNKA, 0, PRQA) + define_link(LNKB, 1, PRQB) + define_link(LNKC, 2, PRQC) + define_link(LNKD, 3, PRQD) + define_link(LNKE, 4, PRQE) + define_link(LNKF, 5, PRQF) + define_link(LNKG, 6, PRQG) + define_link(LNKH, 7, PRQH) + +#define define_gsi_link(link, uid, gsi) \ + Device(link) { \ + Name(_HID, EISAID("PNP0C0F")) \ + Name(_UID, uid) \ + Name(_PRS, ResourceTemplate() { \ + Interrupt(, Level, ActiveHigh, Shared) { \ + gsi \ + } \ + }) \ + Name(_CRS, ResourceTemplate() { \ + Interrupt(, Level, ActiveHigh, Shared) { \ + gsi \ + } \ + }) \ + Method(_SRS, 1, NotSerialized) { \ + } \ + } + + define_gsi_link(GSIA, 0, 0x10) + define_gsi_link(GSIB, 0, 0x11) + define_gsi_link(GSIC, 0, 0x12) + define_gsi_link(GSID, 0, 0x13) + define_gsi_link(GSIE, 0, 0x14) + define_gsi_link(GSIF, 0, 0x15) + define_gsi_link(GSIG, 0, 0x16) + define_gsi_link(GSIH, 0, 0x17) + } + +#include "hw/acpi/pc-hotplug.h" +#define CPU_STATUS_BASE ICH9_CPU_HOTPLUG_IO_BASE +#include "acpi-dsdt-cpu-hotplug.dsl" +#include "acpi-dsdt-mem-hotplug.dsl" + + +/**************************************************************** + * General purpose events + ****************************************************************/ + Scope(\_GPE) { + Name(_HID, "ACPI0006") + + Method(_L00) { + } + Method(_L01) { + } + Method(_E02) { + // CPU hotplug event + \_SB.PRSC() + } + Method(_E03) { + // Memory hotplug event + \_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD() + } + Method(_L04) { + } + Method(_L05) { + } + Method(_L06) { + } + Method(_L07) { + } + Method(_L08) { + } + Method(_L09) { + } + Method(_L0A) { + } + Method(_L0B) { + } + Method(_L0C) { + } + Method(_L0D) { + } + Method(_L0E) { + } + Method(_L0F) { + } + } +} diff --git a/qemu/hw/i386/q35-acpi-dsdt.hex.generated b/qemu/hw/i386/q35-acpi-dsdt.hex.generated new file mode 100644 index 000000000..ed9a2cc8e --- /dev/null +++ b/qemu/hw/i386/q35-acpi-dsdt.hex.generated @@ -0,0 +1,7610 @@ +static unsigned char Q35AcpiDsdtAmlCode[] = { +0x44, +0x53, +0x44, +0x54, +0xb8, +0x1d, +0x0, +0x0, +0x1, +0x35, +0x42, +0x58, +0x50, +0x43, +0x0, +0x0, +0x42, +0x58, +0x44, +0x53, +0x44, +0x54, +0x0, +0x0, +0x2, +0x0, +0x0, +0x0, +0x49, +0x4e, +0x54, +0x4c, +0x7, +0x11, +0x14, +0x20, +0x10, +0x49, +0x4, +0x5c, +0x0, +0x5b, +0x80, +0x44, +0x42, +0x47, +0x5f, +0x1, +0xb, +0x2, +0x4, +0x1, +0x5b, +0x81, +0xb, +0x44, +0x42, +0x47, +0x5f, +0x1, +0x44, +0x42, +0x47, +0x42, +0x8, +0x14, +0x2c, +0x44, +0x42, +0x55, +0x47, +0x1, +0x98, +0x68, +0x60, +0x96, +0x60, +0x60, +0x74, +0x87, +0x60, +0x1, +0x61, +0x70, +0x0, +0x62, +0xa2, +0x10, +0x95, +0x62, +0x61, +0x70, +0x83, +0x88, +0x60, +0x62, +0x0, +0x44, +0x42, +0x47, +0x42, +0x75, +0x62, +0x70, +0xa, +0xa, +0x44, +0x42, +0x47, +0x42, +0x10, +0x29, +0x5f, +0x53, +0x42, +0x5f, +0x5b, +0x80, +0x50, +0x43, +0x53, +0x54, +0x1, +0xb, +0x0, +0xae, +0xa, +0xc, +0x5b, +0x80, +0x50, +0x43, +0x53, +0x42, +0x1, +0xb, +0xc, +0xae, +0x1, +0x5b, +0x81, +0xb, +0x50, +0x43, +0x53, +0x42, +0x40, +0x50, +0x43, +0x49, +0x42, +0x8, +0x10, +0x4f, +0xc, +0x5f, +0x53, +0x42, +0x5f, +0x5b, +0x82, +0x47, +0xc, +0x50, +0x43, +0x49, +0x30, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xa, +0x8, +0x8, +0x5f, +0x43, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xa, +0x3, +0x8, +0x5f, +0x41, +0x44, +0x52, +0x0, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x1, +0x8, +0x53, +0x55, +0x50, +0x50, +0x0, +0x8, +0x43, +0x54, +0x52, +0x4c, +0x0, +0x14, +0x44, +0x9, +0x5f, +0x4f, +0x53, +0x43, +0x4, +0x8a, +0x6b, +0x0, +0x43, +0x44, +0x57, +0x31, +0xa0, +0x46, +0x7, +0x93, +0x68, +0x11, +0x13, +0xa, +0x10, +0x5b, +0x4d, +0xdb, +0x33, +0xf7, +0x1f, +0x1c, +0x40, +0x96, +0x57, +0x74, +0x41, +0xc0, +0x3d, +0xd7, +0x66, +0x8a, +0x6b, +0xa, +0x4, +0x43, +0x44, +0x57, +0x32, +0x8a, +0x6b, +0xa, +0x8, +0x43, +0x44, +0x57, +0x33, +0x70, +0x43, +0x44, +0x57, +0x32, +0x53, +0x55, +0x50, +0x50, +0x70, +0x43, +0x44, +0x57, +0x33, +0x43, +0x54, +0x52, +0x4c, +0x7b, +0x43, +0x54, +0x52, +0x4c, +0xa, +0x1d, +0x43, +0x54, +0x52, +0x4c, +0xa0, +0x10, +0x92, +0x93, +0x69, +0x1, +0x7d, +0x43, +0x44, +0x57, +0x31, +0xa, +0x8, +0x43, +0x44, +0x57, +0x31, +0xa0, +0x16, +0x92, +0x93, +0x43, +0x44, +0x57, +0x33, +0x43, +0x54, +0x52, +0x4c, +0x7d, +0x43, +0x44, +0x57, +0x31, +0xa, +0x10, +0x43, +0x44, +0x57, +0x31, +0x70, +0x43, +0x54, +0x52, +0x4c, +0x43, +0x44, +0x57, +0x33, +0xa1, +0xc, +0x7d, +0x43, +0x44, +0x57, +0x31, +0xa, +0x4, +0x43, +0x44, +0x57, +0x31, +0xa4, +0x6b, +0x10, +0x4d, +0x8, +0x5f, +0x53, +0x42, +0x5f, +0x5b, +0x82, +0x45, +0x8, +0x48, +0x50, +0x45, +0x54, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0x1, +0x3, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x0, +0x5b, +0x80, +0x48, +0x50, +0x54, +0x4d, +0x0, +0xc, +0x0, +0x0, +0xd0, +0xfe, +0xb, +0x0, +0x4, +0x5b, +0x81, +0x10, +0x48, +0x50, +0x54, +0x4d, +0x13, +0x56, +0x45, +0x4e, +0x44, +0x20, +0x50, +0x52, +0x44, +0x5f, +0x20, +0x14, +0x36, +0x5f, +0x53, +0x54, +0x41, +0x0, +0x70, +0x56, +0x45, +0x4e, +0x44, +0x60, +0x70, +0x50, +0x52, +0x44, +0x5f, +0x61, +0x7a, +0x60, +0xa, +0x10, +0x60, +0xa0, +0xc, +0x91, +0x93, +0x60, +0x0, +0x93, +0x60, +0xb, +0xff, +0xff, +0xa4, +0x0, +0xa0, +0xe, +0x91, +0x93, +0x61, +0x0, +0x94, +0x61, +0xc, +0x0, +0xe1, +0xf5, +0x5, +0xa4, +0x0, +0xa4, +0xa, +0xf, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x11, +0xa, +0xe, +0x86, +0x9, +0x0, +0x0, +0x0, +0x0, +0xd0, +0xfe, +0x0, +0x4, +0x0, +0x0, +0x79, +0x0, +0x10, +0x4c, +0x7, +0x2e, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x5b, +0x82, +0x4f, +0x6, +0x49, +0x53, +0x41, +0x5f, +0x8, +0x5f, +0x41, +0x44, +0x52, +0xc, +0x0, +0x0, +0x1f, +0x0, +0x5b, +0x80, +0x50, +0x49, +0x52, +0x51, +0x2, +0xa, +0x60, +0xa, +0xc, +0x5b, +0x80, +0x4c, +0x50, +0x43, +0x44, +0x2, +0xa, +0x80, +0xa, +0x2, +0x5b, +0x81, +0x20, +0x4c, +0x50, +0x43, +0x44, +0x0, +0x43, +0x4f, +0x4d, +0x41, +0x3, +0x0, +0x1, +0x43, +0x4f, +0x4d, +0x42, +0x3, +0x0, +0x1, +0x4c, +0x50, +0x54, +0x44, +0x2, +0x0, +0x2, +0x46, +0x44, +0x43, +0x44, +0x2, +0x5b, +0x80, +0x4c, +0x50, +0x43, +0x45, +0x2, +0xa, +0x82, +0xa, +0x2, +0x5b, +0x81, +0x1a, +0x4c, +0x50, +0x43, +0x45, +0x0, +0x43, +0x41, +0x45, +0x4e, +0x1, +0x43, +0x42, +0x45, +0x4e, +0x1, +0x4c, +0x50, +0x45, +0x4e, +0x1, +0x46, +0x44, +0x45, +0x4e, +0x1, +0x10, +0x4c, +0x1b, +0x2f, +0x3, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x49, +0x53, +0x41, +0x5f, +0x5b, +0x82, +0x2d, +0x52, +0x54, +0x43, +0x5f, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xb, +0x0, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x18, +0xa, +0x15, +0x47, +0x1, +0x70, +0x0, +0x70, +0x0, +0x10, +0x2, +0x22, +0x0, +0x1, +0x47, +0x1, +0x72, +0x0, +0x72, +0x0, +0x2, +0x6, +0x79, +0x0, +0x5b, +0x82, +0x37, +0x4b, +0x42, +0x44, +0x5f, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0x3, +0x3, +0x14, +0x9, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0xa, +0xf, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x18, +0xa, +0x15, +0x47, +0x1, +0x60, +0x0, +0x60, +0x0, +0x1, +0x1, +0x47, +0x1, +0x64, +0x0, +0x64, +0x0, +0x1, +0x1, +0x22, +0x2, +0x0, +0x79, +0x0, +0x5b, +0x82, +0x27, +0x4d, +0x4f, +0x55, +0x5f, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xf, +0x13, +0x14, +0x9, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0xa, +0xf, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x8, +0xa, +0x5, +0x22, +0x0, +0x10, +0x79, +0x0, +0x5b, +0x82, +0x4a, +0x4, +0x46, +0x44, +0x43, +0x30, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0x7, +0x0, +0x14, +0x18, +0x5f, +0x53, +0x54, +0x41, +0x0, +0x70, +0x46, +0x44, +0x45, +0x4e, +0x60, +0xa0, +0x6, +0x93, +0x60, +0x0, +0xa4, +0x0, +0xa1, +0x4, +0xa4, +0xa, +0xf, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x1b, +0xa, +0x18, +0x47, +0x1, +0xf2, +0x3, +0xf2, +0x3, +0x0, +0x4, +0x47, +0x1, +0xf7, +0x3, +0xf7, +0x3, +0x0, +0x1, +0x22, +0x40, +0x0, +0x2a, +0x4, +0x0, +0x79, +0x0, +0x5b, +0x82, +0x3e, +0x4c, +0x50, +0x54, +0x5f, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0x4, +0x0, +0x14, +0x18, +0x5f, +0x53, +0x54, +0x41, +0x0, +0x70, +0x4c, +0x50, +0x45, +0x4e, +0x60, +0xa0, +0x6, +0x93, +0x60, +0x0, +0xa4, +0x0, +0xa1, +0x4, +0xa4, +0xa, +0xf, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x10, +0xa, +0xd, +0x47, +0x1, +0x78, +0x3, +0x78, +0x3, +0x8, +0x8, +0x22, +0x80, +0x0, +0x79, +0x0, +0x5b, +0x82, +0x45, +0x4, +0x43, +0x4f, +0x4d, +0x31, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0x5, +0x1, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x1, +0x14, +0x18, +0x5f, +0x53, +0x54, +0x41, +0x0, +0x70, +0x43, +0x41, +0x45, +0x4e, +0x60, +0xa0, +0x6, +0x93, +0x60, +0x0, +0xa4, +0x0, +0xa1, +0x4, +0xa4, +0xa, +0xf, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x10, +0xa, +0xd, +0x47, +0x1, +0xf8, +0x3, +0xf8, +0x3, +0x0, +0x8, +0x22, +0x10, +0x0, +0x79, +0x0, +0x5b, +0x82, +0x46, +0x4, +0x43, +0x4f, +0x4d, +0x32, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0x5, +0x1, +0x8, +0x5f, +0x55, +0x49, +0x44, +0xa, +0x2, +0x14, +0x18, +0x5f, +0x53, +0x54, +0x41, +0x0, +0x70, +0x43, +0x42, +0x45, +0x4e, +0x60, +0xa0, +0x6, +0x93, +0x60, +0x0, +0xa4, +0x0, +0xa1, +0x4, +0xa4, +0xa, +0xf, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0x10, +0xa, +0xd, +0x47, +0x1, +0xf8, +0x2, +0xf8, +0x2, +0x0, +0x8, +0x22, +0x8, +0x0, +0x79, +0x0, +0x8, +0x50, +0x49, +0x43, +0x46, +0x0, +0x14, +0xc, +0x5f, +0x50, +0x49, +0x43, +0x1, +0x70, +0x68, +0x50, +0x49, +0x43, +0x46, +0x10, +0x8e, +0x55, +0x1, +0x5f, +0x53, +0x42, +0x5f, +0x10, +0x43, +0xea, +0x50, +0x43, +0x49, +0x30, +0x8, +0x50, +0x52, +0x54, +0x50, +0x12, +0x4b, +0x73, +0x80, +0x12, +0xb, +0x4, +0xb, +0xff, +0xff, +0x0, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xb, +0x4, +0xb, +0xff, +0xff, +0x1, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xc, +0x4, +0xb, +0xff, +0xff, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xc, +0x4, +0xb, +0xff, +0xff, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x2, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x2, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x2, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x2, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x3, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x3, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x3, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x3, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x4, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x4, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x4, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x4, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x5, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x5, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x5, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x5, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x6, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x6, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x6, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x6, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x7, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x7, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x7, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x7, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x8, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x8, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x8, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x8, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x9, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x9, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x9, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x9, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xa, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xa, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xa, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xa, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xb, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xb, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xb, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xb, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xc, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xc, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xc, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xc, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xd, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xd, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xd, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xd, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xe, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xe, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xe, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xe, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xf, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xf, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xf, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xf, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x10, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x10, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x10, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x10, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x11, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x11, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x11, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x11, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x12, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x12, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x12, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x12, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x13, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x13, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x13, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x13, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x14, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x14, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x14, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x14, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x15, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x15, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x15, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x15, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x16, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x16, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x16, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x16, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x17, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x17, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x17, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x17, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x18, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x18, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x18, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x18, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x19, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x41, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x19, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x42, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x19, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x43, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x19, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x44, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1a, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x41, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1a, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x42, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1a, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x43, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1a, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x44, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1b, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x41, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1b, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x42, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1b, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x43, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1b, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x44, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1c, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x41, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1c, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x42, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1c, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x43, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1c, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x44, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1d, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x41, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1d, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x42, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1d, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x43, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1d, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x44, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1e, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1e, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1e, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1e, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1f, +0x0, +0x0, +0x4c, +0x4e, +0x4b, +0x41, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1f, +0x0, +0x1, +0x4c, +0x4e, +0x4b, +0x42, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1f, +0x0, +0xa, +0x2, +0x4c, +0x4e, +0x4b, +0x43, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1f, +0x0, +0xa, +0x3, +0x4c, +0x4e, +0x4b, +0x44, +0x0, +0x8, +0x50, +0x52, +0x54, +0x41, +0x12, +0x4b, +0x73, +0x80, +0x12, +0xb, +0x4, +0xb, +0xff, +0xff, +0x0, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xb, +0x4, +0xb, +0xff, +0xff, +0x1, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xc, +0x4, +0xb, +0xff, +0xff, +0xa, +0x2, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xc, +0x4, +0xb, +0xff, +0xff, +0xa, +0x3, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1, +0x0, +0x0, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1, +0x0, +0x1, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x2, +0x0, +0x0, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x2, +0x0, +0x1, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x2, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x2, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x3, +0x0, +0x0, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x3, +0x0, +0x1, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x3, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x3, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x4, +0x0, +0x0, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x4, +0x0, +0x1, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x4, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x4, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x5, +0x0, +0x0, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x5, +0x0, +0x1, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x5, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x5, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x6, +0x0, +0x0, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x6, +0x0, +0x1, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x6, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x6, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x7, +0x0, +0x0, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x7, +0x0, +0x1, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x7, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x7, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x8, +0x0, +0x0, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x8, +0x0, +0x1, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x8, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x8, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x9, +0x0, +0x0, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x9, +0x0, +0x1, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x9, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x9, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xa, +0x0, +0x0, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xa, +0x0, +0x1, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xa, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xa, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xb, +0x0, +0x0, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xb, +0x0, +0x1, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xb, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xb, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xc, +0x0, +0x0, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xc, +0x0, +0x1, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xc, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xc, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xd, +0x0, +0x0, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xd, +0x0, +0x1, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xd, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xd, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xe, +0x0, +0x0, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xe, +0x0, +0x1, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xe, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xe, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xf, +0x0, +0x0, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0xf, +0x0, +0x1, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xf, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0xf, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x10, +0x0, +0x0, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x10, +0x0, +0x1, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x10, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x10, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x11, +0x0, +0x0, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x11, +0x0, +0x1, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x11, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x11, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x12, +0x0, +0x0, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x12, +0x0, +0x1, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x12, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x12, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x13, +0x0, +0x0, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x13, +0x0, +0x1, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x13, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x13, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x14, +0x0, +0x0, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x14, +0x0, +0x1, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x14, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x14, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x15, +0x0, +0x0, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x15, +0x0, +0x1, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x15, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x15, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x16, +0x0, +0x0, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x16, +0x0, +0x1, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x16, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x16, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x17, +0x0, +0x0, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x17, +0x0, +0x1, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x17, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x17, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x18, +0x0, +0x0, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x18, +0x0, +0x1, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x18, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x18, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x19, +0x0, +0x0, +0x47, +0x53, +0x49, +0x41, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x19, +0x0, +0x1, +0x47, +0x53, +0x49, +0x42, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x19, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x43, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x19, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x44, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1a, +0x0, +0x0, +0x47, +0x53, +0x49, +0x41, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1a, +0x0, +0x1, +0x47, +0x53, +0x49, +0x42, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1a, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x43, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1a, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x44, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1b, +0x0, +0x0, +0x47, +0x53, +0x49, +0x41, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1b, +0x0, +0x1, +0x47, +0x53, +0x49, +0x42, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1b, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x43, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1b, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x44, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1c, +0x0, +0x0, +0x47, +0x53, +0x49, +0x41, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1c, +0x0, +0x1, +0x47, +0x53, +0x49, +0x42, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1c, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x43, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1c, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x44, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1d, +0x0, +0x0, +0x47, +0x53, +0x49, +0x41, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1d, +0x0, +0x1, +0x47, +0x53, +0x49, +0x42, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1d, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x43, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1d, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x44, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1e, +0x0, +0x0, +0x47, +0x53, +0x49, +0x45, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1e, +0x0, +0x1, +0x47, +0x53, +0x49, +0x46, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1e, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x47, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1e, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x48, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1f, +0x0, +0x0, +0x47, +0x53, +0x49, +0x41, +0x0, +0x12, +0xd, +0x4, +0xc, +0xff, +0xff, +0x1f, +0x0, +0x1, +0x47, +0x53, +0x49, +0x42, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1f, +0x0, +0xa, +0x2, +0x47, +0x53, +0x49, +0x43, +0x0, +0x12, +0xe, +0x4, +0xc, +0xff, +0xff, +0x1f, +0x0, +0xa, +0x3, +0x47, +0x53, +0x49, +0x44, +0x0, +0x14, +0x1a, +0x5f, +0x50, +0x52, +0x54, +0x0, +0xa0, +0xc, +0x93, +0x50, +0x49, +0x43, +0x46, +0x0, +0xa4, +0x50, +0x52, +0x54, +0x50, +0xa1, +0x6, +0xa4, +0x50, +0x52, +0x54, +0x41, +0x5b, +0x81, +0x3a, +0x2f, +0x3, +0x50, +0x43, +0x49, +0x30, +0x49, +0x53, +0x41, +0x5f, +0x50, +0x49, +0x52, +0x51, +0x1, +0x50, +0x52, +0x51, +0x41, +0x8, +0x50, +0x52, +0x51, +0x42, +0x8, +0x50, +0x52, +0x51, +0x43, +0x8, +0x50, +0x52, +0x51, +0x44, +0x8, +0x0, +0x20, +0x50, +0x52, +0x51, +0x45, +0x8, +0x50, +0x52, +0x51, +0x46, +0x8, +0x50, +0x52, +0x51, +0x47, +0x8, +0x50, +0x52, +0x51, +0x48, +0x8, +0x14, +0x13, +0x49, +0x51, +0x53, +0x54, +0x1, +0xa0, +0x9, +0x7b, +0xa, +0x80, +0x68, +0x0, +0xa4, +0xa, +0x9, +0xa4, +0xa, +0xb, +0x14, +0x34, +0x49, +0x51, +0x43, +0x52, +0x9, +0x8, +0x50, +0x52, +0x52, +0x30, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x0, +0x0, +0x0, +0x0, +0x79, +0x0, +0x8a, +0x50, +0x52, +0x52, +0x30, +0xa, +0x5, +0x50, +0x52, +0x52, +0x49, +0x70, +0x7b, +0x68, +0xa, +0xf, +0x0, +0x50, +0x52, +0x52, +0x49, +0xa4, +0x50, +0x52, +0x52, +0x30, +0x5b, +0x82, +0x4c, +0x7, +0x4c, +0x4e, +0x4b, +0x41, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x0, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0x16, +0xa, +0x13, +0x89, +0xe, +0x0, +0x9, +0x3, +0x5, +0x0, +0x0, +0x0, +0xa, +0x0, +0x0, +0x0, +0xb, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0xf, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0x49, +0x51, +0x53, +0x54, +0x50, +0x52, +0x51, +0x41, +0x14, +0x11, +0x5f, +0x44, +0x49, +0x53, +0x0, +0x7d, +0x50, +0x52, +0x51, +0x41, +0xa, +0x80, +0x50, +0x52, +0x51, +0x41, +0x14, +0xf, +0x5f, +0x43, +0x52, +0x53, +0x0, +0xa4, +0x49, +0x51, +0x43, +0x52, +0x50, +0x52, +0x51, +0x41, +0x14, +0x17, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x8a, +0x68, +0xa, +0x5, +0x50, +0x52, +0x52, +0x49, +0x70, +0x50, +0x52, +0x52, +0x49, +0x50, +0x52, +0x51, +0x41, +0x5b, +0x82, +0x4c, +0x7, +0x4c, +0x4e, +0x4b, +0x42, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x1, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0x16, +0xa, +0x13, +0x89, +0xe, +0x0, +0x9, +0x3, +0x5, +0x0, +0x0, +0x0, +0xa, +0x0, +0x0, +0x0, +0xb, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0xf, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0x49, +0x51, +0x53, +0x54, +0x50, +0x52, +0x51, +0x42, +0x14, +0x11, +0x5f, +0x44, +0x49, +0x53, +0x0, +0x7d, +0x50, +0x52, +0x51, +0x42, +0xa, +0x80, +0x50, +0x52, +0x51, +0x42, +0x14, +0xf, +0x5f, +0x43, +0x52, +0x53, +0x0, +0xa4, +0x49, +0x51, +0x43, +0x52, +0x50, +0x52, +0x51, +0x42, +0x14, +0x17, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x8a, +0x68, +0xa, +0x5, +0x50, +0x52, +0x52, +0x49, +0x70, +0x50, +0x52, +0x52, +0x49, +0x50, +0x52, +0x51, +0x42, +0x5b, +0x82, +0x4d, +0x7, +0x4c, +0x4e, +0x4b, +0x43, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0xa, +0x2, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0x16, +0xa, +0x13, +0x89, +0xe, +0x0, +0x9, +0x3, +0x5, +0x0, +0x0, +0x0, +0xa, +0x0, +0x0, +0x0, +0xb, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0xf, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0x49, +0x51, +0x53, +0x54, +0x50, +0x52, +0x51, +0x43, +0x14, +0x11, +0x5f, +0x44, +0x49, +0x53, +0x0, +0x7d, +0x50, +0x52, +0x51, +0x43, +0xa, +0x80, +0x50, +0x52, +0x51, +0x43, +0x14, +0xf, +0x5f, +0x43, +0x52, +0x53, +0x0, +0xa4, +0x49, +0x51, +0x43, +0x52, +0x50, +0x52, +0x51, +0x43, +0x14, +0x17, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x8a, +0x68, +0xa, +0x5, +0x50, +0x52, +0x52, +0x49, +0x70, +0x50, +0x52, +0x52, +0x49, +0x50, +0x52, +0x51, +0x43, +0x5b, +0x82, +0x4d, +0x7, +0x4c, +0x4e, +0x4b, +0x44, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0xa, +0x3, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0x16, +0xa, +0x13, +0x89, +0xe, +0x0, +0x9, +0x3, +0x5, +0x0, +0x0, +0x0, +0xa, +0x0, +0x0, +0x0, +0xb, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0xf, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0x49, +0x51, +0x53, +0x54, +0x50, +0x52, +0x51, +0x44, +0x14, +0x11, +0x5f, +0x44, +0x49, +0x53, +0x0, +0x7d, +0x50, +0x52, +0x51, +0x44, +0xa, +0x80, +0x50, +0x52, +0x51, +0x44, +0x14, +0xf, +0x5f, +0x43, +0x52, +0x53, +0x0, +0xa4, +0x49, +0x51, +0x43, +0x52, +0x50, +0x52, +0x51, +0x44, +0x14, +0x17, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x8a, +0x68, +0xa, +0x5, +0x50, +0x52, +0x52, +0x49, +0x70, +0x50, +0x52, +0x52, +0x49, +0x50, +0x52, +0x51, +0x44, +0x5b, +0x82, +0x4d, +0x7, +0x4c, +0x4e, +0x4b, +0x45, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0xa, +0x4, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0x16, +0xa, +0x13, +0x89, +0xe, +0x0, +0x9, +0x3, +0x5, +0x0, +0x0, +0x0, +0xa, +0x0, +0x0, +0x0, +0xb, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0xf, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0x49, +0x51, +0x53, +0x54, +0x50, +0x52, +0x51, +0x45, +0x14, +0x11, +0x5f, +0x44, +0x49, +0x53, +0x0, +0x7d, +0x50, +0x52, +0x51, +0x45, +0xa, +0x80, +0x50, +0x52, +0x51, +0x45, +0x14, +0xf, +0x5f, +0x43, +0x52, +0x53, +0x0, +0xa4, +0x49, +0x51, +0x43, +0x52, +0x50, +0x52, +0x51, +0x45, +0x14, +0x17, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x8a, +0x68, +0xa, +0x5, +0x50, +0x52, +0x52, +0x49, +0x70, +0x50, +0x52, +0x52, +0x49, +0x50, +0x52, +0x51, +0x45, +0x5b, +0x82, +0x4d, +0x7, +0x4c, +0x4e, +0x4b, +0x46, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0xa, +0x5, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0x16, +0xa, +0x13, +0x89, +0xe, +0x0, +0x9, +0x3, +0x5, +0x0, +0x0, +0x0, +0xa, +0x0, +0x0, +0x0, +0xb, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0xf, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0x49, +0x51, +0x53, +0x54, +0x50, +0x52, +0x51, +0x46, +0x14, +0x11, +0x5f, +0x44, +0x49, +0x53, +0x0, +0x7d, +0x50, +0x52, +0x51, +0x46, +0xa, +0x80, +0x50, +0x52, +0x51, +0x46, +0x14, +0xf, +0x5f, +0x43, +0x52, +0x53, +0x0, +0xa4, +0x49, +0x51, +0x43, +0x52, +0x50, +0x52, +0x51, +0x46, +0x14, +0x17, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x8a, +0x68, +0xa, +0x5, +0x50, +0x52, +0x52, +0x49, +0x70, +0x50, +0x52, +0x52, +0x49, +0x50, +0x52, +0x51, +0x46, +0x5b, +0x82, +0x4d, +0x7, +0x4c, +0x4e, +0x4b, +0x47, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0xa, +0x6, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0x16, +0xa, +0x13, +0x89, +0xe, +0x0, +0x9, +0x3, +0x5, +0x0, +0x0, +0x0, +0xa, +0x0, +0x0, +0x0, +0xb, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0xf, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0x49, +0x51, +0x53, +0x54, +0x50, +0x52, +0x51, +0x47, +0x14, +0x11, +0x5f, +0x44, +0x49, +0x53, +0x0, +0x7d, +0x50, +0x52, +0x51, +0x47, +0xa, +0x80, +0x50, +0x52, +0x51, +0x47, +0x14, +0xf, +0x5f, +0x43, +0x52, +0x53, +0x0, +0xa4, +0x49, +0x51, +0x43, +0x52, +0x50, +0x52, +0x51, +0x47, +0x14, +0x17, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x8a, +0x68, +0xa, +0x5, +0x50, +0x52, +0x52, +0x49, +0x70, +0x50, +0x52, +0x52, +0x49, +0x50, +0x52, +0x51, +0x47, +0x5b, +0x82, +0x4d, +0x7, +0x4c, +0x4e, +0x4b, +0x48, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0xa, +0x7, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0x16, +0xa, +0x13, +0x89, +0xe, +0x0, +0x9, +0x3, +0x5, +0x0, +0x0, +0x0, +0xa, +0x0, +0x0, +0x0, +0xb, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0xf, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa4, +0x49, +0x51, +0x53, +0x54, +0x50, +0x52, +0x51, +0x48, +0x14, +0x11, +0x5f, +0x44, +0x49, +0x53, +0x0, +0x7d, +0x50, +0x52, +0x51, +0x48, +0xa, +0x80, +0x50, +0x52, +0x51, +0x48, +0x14, +0xf, +0x5f, +0x43, +0x52, +0x53, +0x0, +0xa4, +0x49, +0x51, +0x43, +0x52, +0x50, +0x52, +0x51, +0x48, +0x14, +0x17, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x8a, +0x68, +0xa, +0x5, +0x50, +0x52, +0x52, +0x49, +0x70, +0x50, +0x52, +0x52, +0x49, +0x50, +0x52, +0x51, +0x48, +0x5b, +0x82, +0x45, +0x4, +0x47, +0x53, +0x49, +0x41, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x0, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x10, +0x0, +0x0, +0x0, +0x79, +0x0, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x10, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0x6, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x5b, +0x82, +0x45, +0x4, +0x47, +0x53, +0x49, +0x42, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x0, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x11, +0x0, +0x0, +0x0, +0x79, +0x0, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x11, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0x6, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x5b, +0x82, +0x45, +0x4, +0x47, +0x53, +0x49, +0x43, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x0, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x12, +0x0, +0x0, +0x0, +0x79, +0x0, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x12, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0x6, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x5b, +0x82, +0x45, +0x4, +0x47, +0x53, +0x49, +0x44, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x0, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x13, +0x0, +0x0, +0x0, +0x79, +0x0, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x13, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0x6, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x5b, +0x82, +0x45, +0x4, +0x47, +0x53, +0x49, +0x45, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x0, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x14, +0x0, +0x0, +0x0, +0x79, +0x0, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x14, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0x6, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x5b, +0x82, +0x45, +0x4, +0x47, +0x53, +0x49, +0x46, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x0, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x15, +0x0, +0x0, +0x0, +0x79, +0x0, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x15, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0x6, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x5b, +0x82, +0x45, +0x4, +0x47, +0x53, +0x49, +0x47, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x0, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x16, +0x0, +0x0, +0x0, +0x79, +0x0, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x16, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0x6, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x5b, +0x82, +0x45, +0x4, +0x47, +0x53, +0x49, +0x48, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xc, +0x41, +0xd0, +0xc, +0xf, +0x8, +0x5f, +0x55, +0x49, +0x44, +0x0, +0x8, +0x5f, +0x50, +0x52, +0x53, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x17, +0x0, +0x0, +0x0, +0x79, +0x0, +0x8, +0x5f, +0x43, +0x52, +0x53, +0x11, +0xe, +0xa, +0xb, +0x89, +0x6, +0x0, +0x9, +0x1, +0x17, +0x0, +0x0, +0x0, +0x79, +0x0, +0x14, +0x6, +0x5f, +0x53, +0x52, +0x53, +0x1, +0x10, +0x4d, +0xc, +0x5f, +0x53, +0x42, +0x5f, +0x14, +0x35, +0x43, +0x50, +0x4d, +0x41, +0x1, +0x70, +0x83, +0x88, +0x43, +0x50, +0x4f, +0x4e, +0x68, +0x0, +0x60, +0x70, +0x11, +0xb, +0xa, +0x8, +0x0, +0x8, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x61, +0x70, +0x68, +0x88, +0x61, +0xa, +0x2, +0x0, +0x70, +0x68, +0x88, +0x61, +0xa, +0x3, +0x0, +0x70, +0x60, +0x88, +0x61, +0xa, +0x4, +0x0, +0xa4, +0x61, +0x14, +0x1a, +0x43, +0x50, +0x53, +0x54, +0x1, +0x70, +0x83, +0x88, +0x43, +0x50, +0x4f, +0x4e, +0x68, +0x0, +0x60, +0xa0, +0x5, +0x60, +0xa4, +0xa, +0xf, +0xa1, +0x3, +0xa4, +0x0, +0x14, +0xa, +0x43, +0x50, +0x45, +0x4a, +0x2, +0x5b, +0x22, +0xa, +0xc8, +0x14, +0x4a, +0x6, +0x50, +0x52, +0x53, +0x43, +0x0, +0x70, +0x50, +0x52, +0x53, +0x5f, +0x65, +0x70, +0x0, +0x62, +0x70, +0x0, +0x60, +0xa2, +0x46, +0x5, +0x95, +0x60, +0x87, +0x43, +0x50, +0x4f, +0x4e, +0x70, +0x83, +0x88, +0x43, +0x50, +0x4f, +0x4e, +0x60, +0x0, +0x61, +0xa0, +0xa, +0x7b, +0x60, +0xa, +0x7, +0x0, +0x7a, +0x62, +0x1, +0x62, +0xa1, +0xc, +0x70, +0x83, +0x88, +0x65, +0x7a, +0x60, +0xa, +0x3, +0x0, +0x0, +0x62, +0x70, +0x7b, +0x62, +0x1, +0x0, +0x63, +0xa0, +0x22, +0x92, +0x93, +0x61, +0x63, +0x70, +0x63, +0x88, +0x43, +0x50, +0x4f, +0x4e, +0x60, +0x0, +0xa0, +0xa, +0x93, +0x63, +0x1, +0x4e, +0x54, +0x46, +0x59, +0x60, +0x1, +0xa1, +0x8, +0x4e, +0x54, +0x46, +0x59, +0x60, +0xa, +0x3, +0x75, +0x60, +0x10, +0x44, +0x2a, +0x2e, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x5b, +0x82, +0x47, +0x29, +0x4d, +0x48, +0x50, +0x44, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xd, +0x50, +0x4e, +0x50, +0x30, +0x41, +0x30, +0x36, +0x0, +0x8, +0x5f, +0x55, +0x49, +0x44, +0xd, +0x4d, +0x65, +0x6d, +0x6f, +0x72, +0x79, +0x20, +0x68, +0x6f, +0x74, +0x70, +0x6c, +0x75, +0x67, +0x20, +0x72, +0x65, +0x73, +0x6f, +0x75, +0x72, +0x63, +0x65, +0x73, +0x0, +0x14, +0x13, +0x5f, +0x53, +0x54, +0x41, +0x0, +0xa0, +0x9, +0x93, +0x4d, +0x44, +0x4e, +0x52, +0x0, +0xa4, +0x0, +0xa4, +0xa, +0xb, +0x5b, +0x1, +0x4d, +0x4c, +0x43, +0x4b, +0x0, +0x14, +0x4a, +0x4, +0x4d, +0x53, +0x43, +0x4e, +0x0, +0xa0, +0x9, +0x93, +0x4d, +0x44, +0x4e, +0x52, +0x0, +0xa4, +0x0, +0x70, +0x0, +0x60, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0xa2, +0x25, +0x95, +0x60, +0x4d, +0x44, +0x4e, +0x52, +0x70, +0x60, +0x4d, +0x53, +0x45, +0x4c, +0xa0, +0x13, +0x93, +0x4d, +0x49, +0x4e, +0x53, +0x1, +0x4d, +0x54, +0x46, +0x59, +0x60, +0x1, +0x70, +0x1, +0x4d, +0x49, +0x4e, +0x53, +0x72, +0x60, +0x1, +0x60, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x1, +0x14, +0x2d, +0x4d, +0x52, +0x53, +0x54, +0x1, +0x70, +0x0, +0x60, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x70, +0x99, +0x68, +0x0, +0x4d, +0x53, +0x45, +0x4c, +0xa0, +0xb, +0x93, +0x4d, +0x45, +0x53, +0x5f, +0x1, +0x70, +0xa, +0xf, +0x60, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x60, +0x14, +0x41, +0x18, +0x4d, +0x43, +0x52, +0x53, +0x9, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x70, +0x99, +0x68, +0x0, +0x4d, +0x53, +0x45, +0x4c, +0x8, +0x4d, +0x52, +0x36, +0x34, +0x11, +0x33, +0xa, +0x30, +0x8a, +0x2b, +0x0, +0x0, +0xc, +0x3, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0xfe, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0xff, +0x79, +0x0, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0xe, +0x4d, +0x49, +0x4e, +0x4c, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x12, +0x4d, +0x49, +0x4e, +0x48, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x26, +0x4c, +0x45, +0x4e, +0x4c, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x2a, +0x4c, +0x45, +0x4e, +0x48, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x16, +0x4d, +0x41, +0x58, +0x4c, +0x8a, +0x4d, +0x52, +0x36, +0x34, +0xa, +0x1a, +0x4d, +0x41, +0x58, +0x48, +0x70, +0x4d, +0x52, +0x42, +0x48, +0x4d, +0x49, +0x4e, +0x48, +0x70, +0x4d, +0x52, +0x42, +0x4c, +0x4d, +0x49, +0x4e, +0x4c, +0x70, +0x4d, +0x52, +0x4c, +0x48, +0x4c, +0x45, +0x4e, +0x48, +0x70, +0x4d, +0x52, +0x4c, +0x4c, +0x4c, +0x45, +0x4e, +0x4c, +0x72, +0x4d, +0x49, +0x4e, +0x4c, +0x4c, +0x45, +0x4e, +0x4c, +0x4d, +0x41, +0x58, +0x4c, +0x72, +0x4d, +0x49, +0x4e, +0x48, +0x4c, +0x45, +0x4e, +0x48, +0x4d, +0x41, +0x58, +0x48, +0xa0, +0x14, +0x95, +0x4d, +0x41, +0x58, +0x4c, +0x4d, +0x49, +0x4e, +0x4c, +0x72, +0x4d, +0x41, +0x58, +0x48, +0x1, +0x4d, +0x41, +0x58, +0x48, +0xa0, +0x11, +0x95, +0x4d, +0x41, +0x58, +0x4c, +0x1, +0x74, +0x4d, +0x41, +0x58, +0x48, +0x1, +0x4d, +0x41, +0x58, +0x48, +0x74, +0x4d, +0x41, +0x58, +0x4c, +0x1, +0x4d, +0x41, +0x58, +0x4c, +0xa0, +0x44, +0x7, +0x93, +0x4d, +0x41, +0x58, +0x48, +0x0, +0x8, +0x4d, +0x52, +0x33, +0x32, +0x11, +0x1f, +0xa, +0x1c, +0x87, +0x17, +0x0, +0x0, +0xc, +0x3, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0x0, +0xfe, +0xff, +0xff, +0xff, +0x0, +0x0, +0x0, +0x0, +0xff, +0xff, +0xff, +0xff, +0x79, +0x0, +0x8a, +0x4d, +0x52, +0x33, +0x32, +0xa, +0xa, +0x4d, +0x49, +0x4e, +0x5f, +0x8a, +0x4d, +0x52, +0x33, +0x32, +0xa, +0xe, +0x4d, +0x41, +0x58, +0x5f, +0x8a, +0x4d, +0x52, +0x33, +0x32, +0xa, +0x16, +0x4c, +0x45, +0x4e, +0x5f, +0x70, +0x4d, +0x49, +0x4e, +0x4c, +0x4d, +0x49, +0x4e, +0x5f, +0x70, +0x4d, +0x41, +0x58, +0x4c, +0x4d, +0x41, +0x58, +0x5f, +0x70, +0x4c, +0x45, +0x4e, +0x4c, +0x4c, +0x45, +0x4e, +0x5f, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x4d, +0x52, +0x33, +0x32, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x4d, +0x52, +0x36, +0x34, +0x14, +0x24, +0x4d, +0x50, +0x58, +0x4d, +0x1, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x70, +0x99, +0x68, +0x0, +0x4d, +0x53, +0x45, +0x4c, +0x70, +0x4d, +0x50, +0x58, +0x5f, +0x60, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0xa4, +0x60, +0x14, +0x28, +0x4d, +0x4f, +0x53, +0x54, +0x4, +0x5b, +0x23, +0x4d, +0x4c, +0x43, +0x4b, +0xff, +0xff, +0x70, +0x99, +0x68, +0x0, +0x4d, +0x53, +0x45, +0x4c, +0x70, +0x69, +0x4d, +0x4f, +0x45, +0x56, +0x70, +0x6a, +0x4d, +0x4f, +0x53, +0x43, +0x5b, +0x27, +0x4d, +0x4c, +0x43, +0x4b, +0x10, +0x42, +0xa, +0x5f, +0x47, +0x50, +0x45, +0x8, +0x5f, +0x48, +0x49, +0x44, +0xd, +0x41, +0x43, +0x50, +0x49, +0x30, +0x30, +0x30, +0x36, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x30, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x31, +0x0, +0x14, +0x10, +0x5f, +0x45, +0x30, +0x32, +0x0, +0x5c, +0x2e, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x52, +0x53, +0x43, +0x14, +0x19, +0x5f, +0x45, +0x30, +0x33, +0x0, +0x5c, +0x2f, +0x4, +0x5f, +0x53, +0x42, +0x5f, +0x50, +0x43, +0x49, +0x30, +0x4d, +0x48, +0x50, +0x44, +0x4d, +0x53, +0x43, +0x4e, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x34, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x35, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x36, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x37, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x38, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x39, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x41, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x42, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x43, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x44, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x45, +0x0, +0x14, +0x6, +0x5f, +0x4c, +0x30, +0x46, +0x0 +}; diff --git a/qemu/hw/i386/smbios.c b/qemu/hw/i386/smbios.c new file mode 100644 index 000000000..1341e0234 --- /dev/null +++ b/qemu/hw/i386/smbios.c @@ -0,0 +1,1102 @@ +/* + * SMBIOS Support + * + * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. + * Copyright (C) 2013 Red Hat, Inc. + * + * Authors: + * Alex Williamson <alex.williamson@hp.com> + * Markus Armbruster <armbru@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu/config-file.h" +#include "qemu/error-report.h" +#include "sysemu/sysemu.h" +#include "sysemu/cpus.h" +#include "hw/i386/pc.h" +#include "hw/i386/smbios.h" +#include "hw/loader.h" + + +/* legacy structures and constants for <= 2.0 machines */ +struct smbios_header { + uint16_t length; + uint8_t type; +} QEMU_PACKED; + +struct smbios_field { + struct smbios_header header; + uint8_t type; + uint16_t offset; + uint8_t data[]; +} QEMU_PACKED; + +struct smbios_table { + struct smbios_header header; + uint8_t data[]; +} QEMU_PACKED; + +#define SMBIOS_FIELD_ENTRY 0 +#define SMBIOS_TABLE_ENTRY 1 + +static uint8_t *smbios_entries; +static size_t smbios_entries_len; +static bool smbios_legacy = true; +static bool smbios_uuid_encoded = true; +/* end: legacy structures & constants for <= 2.0 machines */ + + +static uint8_t *smbios_tables; +static size_t smbios_tables_len; +static unsigned smbios_table_max; +static unsigned smbios_table_cnt; +static struct smbios_entry_point ep; + +static int smbios_type4_count = 0; +static bool smbios_immutable; +static bool smbios_have_defaults; +static uint32_t smbios_cpuid_version, smbios_cpuid_features, smbios_smp_sockets; + +static DECLARE_BITMAP(have_binfile_bitmap, SMBIOS_MAX_TYPE+1); +static DECLARE_BITMAP(have_fields_bitmap, SMBIOS_MAX_TYPE+1); + +static struct { + const char *vendor, *version, *date; + bool have_major_minor, uefi; + uint8_t major, minor; +} type0; + +static struct { + const char *manufacturer, *product, *version, *serial, *sku, *family; + /* uuid is in qemu_uuid[] */ +} type1; + +static struct { + const char *manufacturer, *product, *version, *serial, *asset, *location; +} type2; + +static struct { + const char *manufacturer, *version, *serial, *asset, *sku; +} type3; + +static struct { + const char *sock_pfx, *manufacturer, *version, *serial, *asset, *part; +} type4; + +static struct { + const char *loc_pfx, *bank, *manufacturer, *serial, *asset, *part; + uint16_t speed; +} type17; + +static QemuOptsList qemu_smbios_opts = { + .name = "smbios", + .head = QTAILQ_HEAD_INITIALIZER(qemu_smbios_opts.head), + .desc = { + /* + * no elements => accept any params + * validation will happen later + */ + { /* end of list */ } + } +}; + +static const QemuOptDesc qemu_smbios_file_opts[] = { + { + .name = "file", + .type = QEMU_OPT_STRING, + .help = "binary file containing an SMBIOS element", + }, + { /* end of list */ } +}; + +static const QemuOptDesc qemu_smbios_type0_opts[] = { + { + .name = "type", + .type = QEMU_OPT_NUMBER, + .help = "SMBIOS element type", + },{ + .name = "vendor", + .type = QEMU_OPT_STRING, + .help = "vendor name", + },{ + .name = "version", + .type = QEMU_OPT_STRING, + .help = "version number", + },{ + .name = "date", + .type = QEMU_OPT_STRING, + .help = "release date", + },{ + .name = "release", + .type = QEMU_OPT_STRING, + .help = "revision number", + },{ + .name = "uefi", + .type = QEMU_OPT_BOOL, + .help = "uefi support", + }, + { /* end of list */ } +}; + +static const QemuOptDesc qemu_smbios_type1_opts[] = { + { + .name = "type", + .type = QEMU_OPT_NUMBER, + .help = "SMBIOS element type", + },{ + .name = "manufacturer", + .type = QEMU_OPT_STRING, + .help = "manufacturer name", + },{ + .name = "product", + .type = QEMU_OPT_STRING, + .help = "product name", + },{ + .name = "version", + .type = QEMU_OPT_STRING, + .help = "version number", + },{ + .name = "serial", + .type = QEMU_OPT_STRING, + .help = "serial number", + },{ + .name = "uuid", + .type = QEMU_OPT_STRING, + .help = "UUID", + },{ + .name = "sku", + .type = QEMU_OPT_STRING, + .help = "SKU number", + },{ + .name = "family", + .type = QEMU_OPT_STRING, + .help = "family name", + }, + { /* end of list */ } +}; + +static const QemuOptDesc qemu_smbios_type2_opts[] = { + { + .name = "type", + .type = QEMU_OPT_NUMBER, + .help = "SMBIOS element type", + },{ + .name = "manufacturer", + .type = QEMU_OPT_STRING, + .help = "manufacturer name", + },{ + .name = "product", + .type = QEMU_OPT_STRING, + .help = "product name", + },{ + .name = "version", + .type = QEMU_OPT_STRING, + .help = "version number", + },{ + .name = "serial", + .type = QEMU_OPT_STRING, + .help = "serial number", + },{ + .name = "asset", + .type = QEMU_OPT_STRING, + .help = "asset tag number", + },{ + .name = "location", + .type = QEMU_OPT_STRING, + .help = "location in chassis", + }, + { /* end of list */ } +}; + +static const QemuOptDesc qemu_smbios_type3_opts[] = { + { + .name = "type", + .type = QEMU_OPT_NUMBER, + .help = "SMBIOS element type", + },{ + .name = "manufacturer", + .type = QEMU_OPT_STRING, + .help = "manufacturer name", + },{ + .name = "version", + .type = QEMU_OPT_STRING, + .help = "version number", + },{ + .name = "serial", + .type = QEMU_OPT_STRING, + .help = "serial number", + },{ + .name = "asset", + .type = QEMU_OPT_STRING, + .help = "asset tag number", + },{ + .name = "sku", + .type = QEMU_OPT_STRING, + .help = "SKU number", + }, + { /* end of list */ } +}; + +static const QemuOptDesc qemu_smbios_type4_opts[] = { + { + .name = "type", + .type = QEMU_OPT_NUMBER, + .help = "SMBIOS element type", + },{ + .name = "sock_pfx", + .type = QEMU_OPT_STRING, + .help = "socket designation string prefix", + },{ + .name = "manufacturer", + .type = QEMU_OPT_STRING, + .help = "manufacturer name", + },{ + .name = "version", + .type = QEMU_OPT_STRING, + .help = "version number", + },{ + .name = "serial", + .type = QEMU_OPT_STRING, + .help = "serial number", + },{ + .name = "asset", + .type = QEMU_OPT_STRING, + .help = "asset tag number", + },{ + .name = "part", + .type = QEMU_OPT_STRING, + .help = "part number", + }, + { /* end of list */ } +}; + +static const QemuOptDesc qemu_smbios_type17_opts[] = { + { + .name = "type", + .type = QEMU_OPT_NUMBER, + .help = "SMBIOS element type", + },{ + .name = "loc_pfx", + .type = QEMU_OPT_STRING, + .help = "device locator string prefix", + },{ + .name = "bank", + .type = QEMU_OPT_STRING, + .help = "bank locator string", + },{ + .name = "manufacturer", + .type = QEMU_OPT_STRING, + .help = "manufacturer name", + },{ + .name = "serial", + .type = QEMU_OPT_STRING, + .help = "serial number", + },{ + .name = "asset", + .type = QEMU_OPT_STRING, + .help = "asset tag number", + },{ + .name = "part", + .type = QEMU_OPT_STRING, + .help = "part number", + },{ + .name = "speed", + .type = QEMU_OPT_NUMBER, + .help = "maximum capable speed", + }, + { /* end of list */ } +}; + +static void smbios_register_config(void) +{ + qemu_add_opts(&qemu_smbios_opts); +} + +machine_init(smbios_register_config); + +static void smbios_validate_table(void) +{ + uint32_t expect_t4_count = smbios_legacy ? smp_cpus : smbios_smp_sockets; + + if (smbios_type4_count && smbios_type4_count != expect_t4_count) { + error_report("Expected %d SMBIOS Type 4 tables, got %d instead", + expect_t4_count, smbios_type4_count); + exit(1); + } +} + + +/* legacy setup functions for <= 2.0 machines */ +static void smbios_add_field(int type, int offset, const void *data, size_t len) +{ + struct smbios_field *field; + + if (!smbios_entries) { + smbios_entries_len = sizeof(uint16_t); + smbios_entries = g_malloc0(smbios_entries_len); + } + smbios_entries = g_realloc(smbios_entries, smbios_entries_len + + sizeof(*field) + len); + field = (struct smbios_field *)(smbios_entries + smbios_entries_len); + field->header.type = SMBIOS_FIELD_ENTRY; + field->header.length = cpu_to_le16(sizeof(*field) + len); + + field->type = type; + field->offset = cpu_to_le16(offset); + memcpy(field->data, data, len); + + smbios_entries_len += sizeof(*field) + len; + (*(uint16_t *)smbios_entries) = + cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); +} + +static void smbios_maybe_add_str(int type, int offset, const char *data) +{ + if (data) { + smbios_add_field(type, offset, data, strlen(data) + 1); + } +} + +static void smbios_build_type_0_fields(void) +{ + smbios_maybe_add_str(0, offsetof(struct smbios_type_0, vendor_str), + type0.vendor); + smbios_maybe_add_str(0, offsetof(struct smbios_type_0, bios_version_str), + type0.version); + smbios_maybe_add_str(0, offsetof(struct smbios_type_0, + bios_release_date_str), + type0.date); + if (type0.have_major_minor) { + smbios_add_field(0, offsetof(struct smbios_type_0, + system_bios_major_release), + &type0.major, 1); + smbios_add_field(0, offsetof(struct smbios_type_0, + system_bios_minor_release), + &type0.minor, 1); + } +} + +static void smbios_build_type_1_fields(void) +{ + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, manufacturer_str), + type1.manufacturer); + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, product_name_str), + type1.product); + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, version_str), + type1.version); + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, serial_number_str), + type1.serial); + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, sku_number_str), + type1.sku); + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, family_str), + type1.family); + if (qemu_uuid_set) { + /* We don't encode the UUID in the "wire format" here because this + * function is for legacy mode and needs to keep the guest ABI, and + * because we don't know what's the SMBIOS version advertised by the + * BIOS. + */ + smbios_add_field(1, offsetof(struct smbios_type_1, uuid), + qemu_uuid, 16); + } +} + +uint8_t *smbios_get_table_legacy(size_t *length) +{ + if (!smbios_legacy) { + *length = 0; + return NULL; + } + + if (!smbios_immutable) { + smbios_build_type_0_fields(); + smbios_build_type_1_fields(); + smbios_validate_table(); + smbios_immutable = true; + } + *length = smbios_entries_len; + return smbios_entries; +} +/* end: legacy setup functions for <= 2.0 machines */ + + +static bool smbios_skip_table(uint8_t type, bool required_table) +{ + if (test_bit(type, have_binfile_bitmap)) { + return true; /* user provided their own binary blob(s) */ + } + if (test_bit(type, have_fields_bitmap)) { + return false; /* user provided fields via command line */ + } + if (smbios_have_defaults && required_table) { + return false; /* we're building tables, and this one's required */ + } + return true; +} + +#define SMBIOS_BUILD_TABLE_PRE(tbl_type, tbl_handle, tbl_required) \ + struct smbios_type_##tbl_type *t; \ + size_t t_off; /* table offset into smbios_tables */ \ + int str_index = 0; \ + do { \ + /* should we skip building this table ? */ \ + if (smbios_skip_table(tbl_type, tbl_required)) { \ + return; \ + } \ + \ + /* use offset of table t within smbios_tables */ \ + /* (pointer must be updated after each realloc) */ \ + t_off = smbios_tables_len; \ + smbios_tables_len += sizeof(*t); \ + smbios_tables = g_realloc(smbios_tables, smbios_tables_len); \ + t = (struct smbios_type_##tbl_type *)(smbios_tables + t_off); \ + \ + t->header.type = tbl_type; \ + t->header.length = sizeof(*t); \ + t->header.handle = cpu_to_le16(tbl_handle); \ + } while (0) + +#define SMBIOS_TABLE_SET_STR(tbl_type, field, value) \ + do { \ + int len = (value != NULL) ? strlen(value) + 1 : 0; \ + if (len > 1) { \ + smbios_tables = g_realloc(smbios_tables, \ + smbios_tables_len + len); \ + memcpy(smbios_tables + smbios_tables_len, value, len); \ + smbios_tables_len += len; \ + /* update pointer post-realloc */ \ + t = (struct smbios_type_##tbl_type *)(smbios_tables + t_off); \ + t->field = ++str_index; \ + } else { \ + t->field = 0; \ + } \ + } while (0) + +#define SMBIOS_BUILD_TABLE_POST \ + do { \ + size_t term_cnt, t_size; \ + \ + /* add '\0' terminator (add two if no strings defined) */ \ + term_cnt = (str_index == 0) ? 2 : 1; \ + smbios_tables = g_realloc(smbios_tables, \ + smbios_tables_len + term_cnt); \ + memset(smbios_tables + smbios_tables_len, 0, term_cnt); \ + smbios_tables_len += term_cnt; \ + \ + /* update smbios max. element size */ \ + t_size = smbios_tables_len - t_off; \ + if (t_size > smbios_table_max) { \ + smbios_table_max = t_size; \ + } \ + \ + /* update smbios element count */ \ + smbios_table_cnt++; \ + } while (0) + +static void smbios_build_type_0_table(void) +{ + SMBIOS_BUILD_TABLE_PRE(0, 0x000, false); /* optional, leave up to BIOS */ + + SMBIOS_TABLE_SET_STR(0, vendor_str, type0.vendor); + SMBIOS_TABLE_SET_STR(0, bios_version_str, type0.version); + + t->bios_starting_address_segment = cpu_to_le16(0xE800); /* from SeaBIOS */ + + SMBIOS_TABLE_SET_STR(0, bios_release_date_str, type0.date); + + t->bios_rom_size = 0; /* hardcoded in SeaBIOS with FIXME comment */ + + t->bios_characteristics = cpu_to_le64(0x08); /* Not supported */ + t->bios_characteristics_extension_bytes[0] = 0; + t->bios_characteristics_extension_bytes[1] = 0x14; /* TCD/SVVP | VM */ + if (type0.uefi) { + t->bios_characteristics_extension_bytes[1] |= 0x08; /* |= UEFI */ + } + + if (type0.have_major_minor) { + t->system_bios_major_release = type0.major; + t->system_bios_minor_release = type0.minor; + } else { + t->system_bios_major_release = 0; + t->system_bios_minor_release = 0; + } + + /* hardcoded in SeaBIOS */ + t->embedded_controller_major_release = 0xFF; + t->embedded_controller_minor_release = 0xFF; + + SMBIOS_BUILD_TABLE_POST; +} + +/* Encode UUID from the big endian encoding described on RFC4122 to the wire + * format specified by SMBIOS version 2.6. + */ +static void smbios_encode_uuid(struct smbios_uuid *uuid, const uint8_t *buf) +{ + memcpy(uuid, buf, 16); + if (smbios_uuid_encoded) { + uuid->time_low = bswap32(uuid->time_low); + uuid->time_mid = bswap16(uuid->time_mid); + uuid->time_hi_and_version = bswap16(uuid->time_hi_and_version); + } +} + +static void smbios_build_type_1_table(void) +{ + SMBIOS_BUILD_TABLE_PRE(1, 0x100, true); /* required */ + + SMBIOS_TABLE_SET_STR(1, manufacturer_str, type1.manufacturer); + SMBIOS_TABLE_SET_STR(1, product_name_str, type1.product); + SMBIOS_TABLE_SET_STR(1, version_str, type1.version); + SMBIOS_TABLE_SET_STR(1, serial_number_str, type1.serial); + if (qemu_uuid_set) { + smbios_encode_uuid(&t->uuid, qemu_uuid); + } else { + memset(&t->uuid, 0, 16); + } + t->wake_up_type = 0x06; /* power switch */ + SMBIOS_TABLE_SET_STR(1, sku_number_str, type1.sku); + SMBIOS_TABLE_SET_STR(1, family_str, type1.family); + + SMBIOS_BUILD_TABLE_POST; +} + +static void smbios_build_type_2_table(void) +{ + SMBIOS_BUILD_TABLE_PRE(2, 0x200, false); /* optional */ + + SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); + SMBIOS_TABLE_SET_STR(2, product_str, type2.product); + SMBIOS_TABLE_SET_STR(2, version_str, type2.version); + SMBIOS_TABLE_SET_STR(2, serial_number_str, type2.serial); + SMBIOS_TABLE_SET_STR(2, asset_tag_number_str, type2.asset); + t->feature_flags = 0x01; /* Motherboard */ + SMBIOS_TABLE_SET_STR(2, location_str, type2.location); + t->chassis_handle = cpu_to_le16(0x300); /* Type 3 (System enclosure) */ + t->board_type = 0x0A; /* Motherboard */ + t->contained_element_count = 0; + + SMBIOS_BUILD_TABLE_POST; +} + +static void smbios_build_type_3_table(void) +{ + SMBIOS_BUILD_TABLE_PRE(3, 0x300, true); /* required */ + + SMBIOS_TABLE_SET_STR(3, manufacturer_str, type3.manufacturer); + t->type = 0x01; /* Other */ + SMBIOS_TABLE_SET_STR(3, version_str, type3.version); + SMBIOS_TABLE_SET_STR(3, serial_number_str, type3.serial); + SMBIOS_TABLE_SET_STR(3, asset_tag_number_str, type3.asset); + t->boot_up_state = 0x03; /* Safe */ + t->power_supply_state = 0x03; /* Safe */ + t->thermal_state = 0x03; /* Safe */ + t->security_status = 0x02; /* Unknown */ + t->oem_defined = cpu_to_le32(0); + t->height = 0; + t->number_of_power_cords = 0; + t->contained_element_count = 0; + SMBIOS_TABLE_SET_STR(3, sku_number_str, type3.sku); + + SMBIOS_BUILD_TABLE_POST; +} + +static void smbios_build_type_4_table(unsigned instance) +{ + char sock_str[128]; + + SMBIOS_BUILD_TABLE_PRE(4, 0x400 + instance, true); /* required */ + + snprintf(sock_str, sizeof(sock_str), "%s%2x", type4.sock_pfx, instance); + SMBIOS_TABLE_SET_STR(4, socket_designation_str, sock_str); + t->processor_type = 0x03; /* CPU */ + t->processor_family = 0x01; /* Other */ + SMBIOS_TABLE_SET_STR(4, processor_manufacturer_str, type4.manufacturer); + t->processor_id[0] = cpu_to_le32(smbios_cpuid_version); + t->processor_id[1] = cpu_to_le32(smbios_cpuid_features); + SMBIOS_TABLE_SET_STR(4, processor_version_str, type4.version); + t->voltage = 0; + t->external_clock = cpu_to_le16(0); /* Unknown */ + /* SVVP requires max_speed and current_speed to not be unknown. */ + t->max_speed = cpu_to_le16(2000); /* 2000 MHz */ + t->current_speed = cpu_to_le16(2000); /* 2000 MHz */ + t->status = 0x41; /* Socket populated, CPU enabled */ + t->processor_upgrade = 0x01; /* Other */ + t->l1_cache_handle = cpu_to_le16(0xFFFF); /* N/A */ + t->l2_cache_handle = cpu_to_le16(0xFFFF); /* N/A */ + t->l3_cache_handle = cpu_to_le16(0xFFFF); /* N/A */ + SMBIOS_TABLE_SET_STR(4, serial_number_str, type4.serial); + SMBIOS_TABLE_SET_STR(4, asset_tag_number_str, type4.asset); + SMBIOS_TABLE_SET_STR(4, part_number_str, type4.part); + t->core_count = t->core_enabled = smp_cores; + t->thread_count = smp_threads; + t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */ + t->processor_family2 = cpu_to_le16(0x01); /* Other */ + + SMBIOS_BUILD_TABLE_POST; + smbios_type4_count++; +} + +#define ONE_KB ((ram_addr_t)1 << 10) +#define ONE_MB ((ram_addr_t)1 << 20) +#define ONE_GB ((ram_addr_t)1 << 30) + +#define MAX_T16_STD_SZ 0x80000000 /* 2T in Kilobytes */ + +static void smbios_build_type_16_table(unsigned dimm_cnt) +{ + uint64_t size_kb; + + SMBIOS_BUILD_TABLE_PRE(16, 0x1000, true); /* required */ + + t->location = 0x01; /* Other */ + t->use = 0x03; /* System memory */ + t->error_correction = 0x06; /* Multi-bit ECC (for Microsoft, per SeaBIOS) */ + size_kb = QEMU_ALIGN_UP(ram_size, ONE_KB) / ONE_KB; + if (size_kb < MAX_T16_STD_SZ) { + t->maximum_capacity = cpu_to_le32(size_kb); + t->extended_maximum_capacity = cpu_to_le64(0); + } else { + t->maximum_capacity = cpu_to_le32(MAX_T16_STD_SZ); + t->extended_maximum_capacity = cpu_to_le64(ram_size); + } + t->memory_error_information_handle = cpu_to_le16(0xFFFE); /* Not provided */ + t->number_of_memory_devices = cpu_to_le16(dimm_cnt); + + SMBIOS_BUILD_TABLE_POST; +} + +#define MAX_T17_STD_SZ 0x7FFF /* (32G - 1M), in Megabytes */ +#define MAX_T17_EXT_SZ 0x80000000 /* 2P, in Megabytes */ + +static void smbios_build_type_17_table(unsigned instance, uint64_t size) +{ + char loc_str[128]; + uint64_t size_mb; + + SMBIOS_BUILD_TABLE_PRE(17, 0x1100 + instance, true); /* required */ + + t->physical_memory_array_handle = cpu_to_le16(0x1000); /* Type 16 above */ + t->memory_error_information_handle = cpu_to_le16(0xFFFE); /* Not provided */ + t->total_width = cpu_to_le16(0xFFFF); /* Unknown */ + t->data_width = cpu_to_le16(0xFFFF); /* Unknown */ + size_mb = QEMU_ALIGN_UP(size, ONE_MB) / ONE_MB; + if (size_mb < MAX_T17_STD_SZ) { + t->size = cpu_to_le16(size_mb); + t->extended_size = cpu_to_le32(0); + } else { + assert(size_mb < MAX_T17_EXT_SZ); + t->size = cpu_to_le16(MAX_T17_STD_SZ); + t->extended_size = cpu_to_le32(size_mb); + } + t->form_factor = 0x09; /* DIMM */ + t->device_set = 0; /* Not in a set */ + snprintf(loc_str, sizeof(loc_str), "%s %d", type17.loc_pfx, instance); + SMBIOS_TABLE_SET_STR(17, device_locator_str, loc_str); + SMBIOS_TABLE_SET_STR(17, bank_locator_str, type17.bank); + t->memory_type = 0x07; /* RAM */ + t->type_detail = cpu_to_le16(0x02); /* Other */ + t->speed = cpu_to_le16(type17.speed); + SMBIOS_TABLE_SET_STR(17, manufacturer_str, type17.manufacturer); + SMBIOS_TABLE_SET_STR(17, serial_number_str, type17.serial); + SMBIOS_TABLE_SET_STR(17, asset_tag_number_str, type17.asset); + SMBIOS_TABLE_SET_STR(17, part_number_str, type17.part); + t->attributes = 0; /* Unknown */ + t->configured_clock_speed = t->speed; /* reuse value for max speed */ + t->minimum_voltage = cpu_to_le16(0); /* Unknown */ + t->maximum_voltage = cpu_to_le16(0); /* Unknown */ + t->configured_voltage = cpu_to_le16(0); /* Unknown */ + + SMBIOS_BUILD_TABLE_POST; +} + +static void smbios_build_type_19_table(unsigned instance, + uint64_t start, uint64_t size) +{ + uint64_t end, start_kb, end_kb; + + SMBIOS_BUILD_TABLE_PRE(19, 0x1300 + instance, true); /* required */ + + end = start + size - 1; + assert(end > start); + start_kb = start / ONE_KB; + end_kb = end / ONE_KB; + if (start_kb < UINT32_MAX && end_kb < UINT32_MAX) { + t->starting_address = cpu_to_le32(start_kb); + t->ending_address = cpu_to_le32(end_kb); + t->extended_starting_address = + t->extended_ending_address = cpu_to_le64(0); + } else { + t->starting_address = t->ending_address = cpu_to_le32(UINT32_MAX); + t->extended_starting_address = cpu_to_le64(start); + t->extended_ending_address = cpu_to_le64(end); + } + t->memory_array_handle = cpu_to_le16(0x1000); /* Type 16 above */ + t->partition_width = 1; /* One device per row */ + + SMBIOS_BUILD_TABLE_POST; +} + +static void smbios_build_type_32_table(void) +{ + SMBIOS_BUILD_TABLE_PRE(32, 0x2000, true); /* required */ + + memset(t->reserved, 0, 6); + t->boot_status = 0; /* No errors detected */ + + SMBIOS_BUILD_TABLE_POST; +} + +static void smbios_build_type_127_table(void) +{ + SMBIOS_BUILD_TABLE_PRE(127, 0x7F00, true); /* required */ + SMBIOS_BUILD_TABLE_POST; +} + +void smbios_set_cpuid(uint32_t version, uint32_t features) +{ + smbios_cpuid_version = version; + smbios_cpuid_features = features; +} + +#define SMBIOS_SET_DEFAULT(field, value) \ + if (!field) { \ + field = value; \ + } + +void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, + bool uuid_encoded) +{ + smbios_have_defaults = true; + smbios_legacy = legacy_mode; + smbios_uuid_encoded = uuid_encoded; + + /* drop unwanted version of command-line file blob(s) */ + if (smbios_legacy) { + g_free(smbios_tables); + /* in legacy mode, also complain if fields were given for types > 1 */ + if (find_next_bit(have_fields_bitmap, + SMBIOS_MAX_TYPE+1, 2) < SMBIOS_MAX_TYPE+1) { + error_report("can't process fields for smbios " + "types > 1 on machine versions < 2.1!"); + exit(1); + } + } else { + g_free(smbios_entries); + } + + SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type1.product, product); + SMBIOS_SET_DEFAULT(type1.version, version); + SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type2.product, product); + SMBIOS_SET_DEFAULT(type2.version, version); + SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type3.version, version); + SMBIOS_SET_DEFAULT(type4.sock_pfx, "CPU"); + SMBIOS_SET_DEFAULT(type4.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type4.version, version); + SMBIOS_SET_DEFAULT(type17.loc_pfx, "DIMM"); + SMBIOS_SET_DEFAULT(type17.manufacturer, manufacturer); +} + +static void smbios_entry_point_setup(void) +{ + memcpy(ep.anchor_string, "_SM_", 4); + memcpy(ep.intermediate_anchor_string, "_DMI_", 5); + ep.length = sizeof(struct smbios_entry_point); + ep.entry_point_revision = 0; /* formatted_area reserved, per spec v2.1+ */ + memset(ep.formatted_area, 0, 5); + + /* compliant with smbios spec v2.8 */ + ep.smbios_major_version = 2; + ep.smbios_minor_version = 8; + ep.smbios_bcd_revision = 0x28; + + /* set during table construction, but BIOS may override: */ + ep.structure_table_length = cpu_to_le16(smbios_tables_len); + ep.max_structure_size = cpu_to_le16(smbios_table_max); + ep.number_of_structures = cpu_to_le16(smbios_table_cnt); + + /* BIOS must recalculate: */ + ep.checksum = 0; + ep.intermediate_checksum = 0; + ep.structure_table_address = cpu_to_le32(0); +} + +void smbios_get_tables(uint8_t **tables, size_t *tables_len, + uint8_t **anchor, size_t *anchor_len) +{ + unsigned i, dimm_cnt, instance; + + if (smbios_legacy) { + *tables = *anchor = NULL; + *tables_len = *anchor_len = 0; + return; + } + + if (!smbios_immutable) { + smbios_build_type_0_table(); + smbios_build_type_1_table(); + smbios_build_type_2_table(); + smbios_build_type_3_table(); + + smbios_smp_sockets = DIV_ROUND_UP(smp_cpus, smp_cores * smp_threads); + assert(smbios_smp_sockets >= 1); + + for (i = 0; i < smbios_smp_sockets; i++) { + smbios_build_type_4_table(i); + } + +#define MAX_DIMM_SZ (16ll * ONE_GB) +#define GET_DIMM_SZ ((i < dimm_cnt - 1) ? MAX_DIMM_SZ \ + : ((ram_size - 1) % MAX_DIMM_SZ) + 1) + + dimm_cnt = QEMU_ALIGN_UP(ram_size, MAX_DIMM_SZ) / MAX_DIMM_SZ; + + smbios_build_type_16_table(dimm_cnt); + + for (i = 0; i < dimm_cnt; i++) { + smbios_build_type_17_table(i, GET_DIMM_SZ); + } + + for (i = 0, instance = 0; i < e820_get_num_entries(); i++) { + uint64_t address, length; + if (e820_get_entry(i, E820_RAM, &address, &length)) { + smbios_build_type_19_table(instance++, address, length); + } + } + + smbios_build_type_32_table(); + smbios_build_type_127_table(); + + smbios_validate_table(); + smbios_entry_point_setup(); + smbios_immutable = true; + } + + /* return tables blob and entry point (anchor), and their sizes */ + *tables = smbios_tables; + *tables_len = smbios_tables_len; + *anchor = (uint8_t *)&ep; + *anchor_len = sizeof(struct smbios_entry_point); +} + +static void save_opt(const char **dest, QemuOpts *opts, const char *name) +{ + const char *val = qemu_opt_get(opts, name); + + if (val) { + *dest = val; + } +} + +void smbios_entry_add(QemuOpts *opts) +{ + Error *local_err = NULL; + const char *val; + + assert(!smbios_immutable); + + val = qemu_opt_get(opts, "file"); + if (val) { + struct smbios_structure_header *header; + int size; + struct smbios_table *table; /* legacy mode only */ + + qemu_opts_validate(opts, qemu_smbios_file_opts, &local_err); + if (local_err) { + error_report_err(local_err); + exit(1); + } + + size = get_image_size(val); + if (size == -1 || size < sizeof(struct smbios_structure_header)) { + error_report("Cannot read SMBIOS file %s", val); + exit(1); + } + + /* + * NOTE: standard double '\0' terminator expected, per smbios spec. + * (except in legacy mode, where the second '\0' is implicit and + * will be inserted by the BIOS). + */ + smbios_tables = g_realloc(smbios_tables, smbios_tables_len + size); + header = (struct smbios_structure_header *)(smbios_tables + + smbios_tables_len); + + if (load_image(val, (uint8_t *)header) != size) { + error_report("Failed to load SMBIOS file %s", val); + exit(1); + } + + if (test_bit(header->type, have_fields_bitmap)) { + error_report("can't load type %d struct, fields already specified!", + header->type); + exit(1); + } + set_bit(header->type, have_binfile_bitmap); + + if (header->type == 4) { + smbios_type4_count++; + } + + smbios_tables_len += size; + if (size > smbios_table_max) { + smbios_table_max = size; + } + smbios_table_cnt++; + + /* add a copy of the newly loaded blob to legacy smbios_entries */ + /* NOTE: This code runs before smbios_set_defaults(), so we don't + * yet know which mode (legacy vs. aggregate-table) will be + * required. We therefore add the binary blob to both legacy + * (smbios_entries) and aggregate (smbios_tables) tables, and + * delete the one we don't need from smbios_set_defaults(), + * once we know which machine version has been requested. + */ + if (!smbios_entries) { + smbios_entries_len = sizeof(uint16_t); + smbios_entries = g_malloc0(smbios_entries_len); + } + smbios_entries = g_realloc(smbios_entries, smbios_entries_len + + size + sizeof(*table)); + table = (struct smbios_table *)(smbios_entries + smbios_entries_len); + table->header.type = SMBIOS_TABLE_ENTRY; + table->header.length = cpu_to_le16(sizeof(*table) + size); + memcpy(table->data, header, size); + smbios_entries_len += sizeof(*table) + size; + (*(uint16_t *)smbios_entries) = + cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); + /* end: add a copy of the newly loaded blob to legacy smbios_entries */ + + return; + } + + val = qemu_opt_get(opts, "type"); + if (val) { + unsigned long type = strtoul(val, NULL, 0); + + if (type > SMBIOS_MAX_TYPE) { + error_report("out of range!"); + exit(1); + } + + if (test_bit(type, have_binfile_bitmap)) { + error_report("can't add fields, binary file already loaded!"); + exit(1); + } + set_bit(type, have_fields_bitmap); + + switch (type) { + case 0: + qemu_opts_validate(opts, qemu_smbios_type0_opts, &local_err); + if (local_err) { + error_report_err(local_err); + exit(1); + } + save_opt(&type0.vendor, opts, "vendor"); + save_opt(&type0.version, opts, "version"); + save_opt(&type0.date, opts, "date"); + type0.uefi = qemu_opt_get_bool(opts, "uefi", false); + + val = qemu_opt_get(opts, "release"); + if (val) { + if (sscanf(val, "%hhu.%hhu", &type0.major, &type0.minor) != 2) { + error_report("Invalid release"); + exit(1); + } + type0.have_major_minor = true; + } + return; + case 1: + qemu_opts_validate(opts, qemu_smbios_type1_opts, &local_err); + if (local_err) { + error_report_err(local_err); + exit(1); + } + save_opt(&type1.manufacturer, opts, "manufacturer"); + save_opt(&type1.product, opts, "product"); + save_opt(&type1.version, opts, "version"); + save_opt(&type1.serial, opts, "serial"); + save_opt(&type1.sku, opts, "sku"); + save_opt(&type1.family, opts, "family"); + + val = qemu_opt_get(opts, "uuid"); + if (val) { + if (qemu_uuid_parse(val, qemu_uuid) != 0) { + error_report("Invalid UUID"); + exit(1); + } + qemu_uuid_set = true; + } + return; + case 2: + qemu_opts_validate(opts, qemu_smbios_type2_opts, &local_err); + if (local_err) { + error_report_err(local_err); + exit(1); + } + save_opt(&type2.manufacturer, opts, "manufacturer"); + save_opt(&type2.product, opts, "product"); + save_opt(&type2.version, opts, "version"); + save_opt(&type2.serial, opts, "serial"); + save_opt(&type2.asset, opts, "asset"); + save_opt(&type2.location, opts, "location"); + return; + case 3: + qemu_opts_validate(opts, qemu_smbios_type3_opts, &local_err); + if (local_err) { + error_report_err(local_err); + exit(1); + } + save_opt(&type3.manufacturer, opts, "manufacturer"); + save_opt(&type3.version, opts, "version"); + save_opt(&type3.serial, opts, "serial"); + save_opt(&type3.asset, opts, "asset"); + save_opt(&type3.sku, opts, "sku"); + return; + case 4: + qemu_opts_validate(opts, qemu_smbios_type4_opts, &local_err); + if (local_err) { + error_report_err(local_err); + exit(1); + } + save_opt(&type4.sock_pfx, opts, "sock_pfx"); + save_opt(&type4.manufacturer, opts, "manufacturer"); + save_opt(&type4.version, opts, "version"); + save_opt(&type4.serial, opts, "serial"); + save_opt(&type4.asset, opts, "asset"); + save_opt(&type4.part, opts, "part"); + return; + case 17: + qemu_opts_validate(opts, qemu_smbios_type17_opts, &local_err); + if (local_err) { + error_report_err(local_err); + exit(1); + } + save_opt(&type17.loc_pfx, opts, "loc_pfx"); + save_opt(&type17.bank, opts, "bank"); + save_opt(&type17.manufacturer, opts, "manufacturer"); + save_opt(&type17.serial, opts, "serial"); + save_opt(&type17.asset, opts, "asset"); + save_opt(&type17.part, opts, "part"); + type17.speed = qemu_opt_get_number(opts, "speed", 0); + return; + default: + error_report("Don't know how to build fields for SMBIOS type %ld", + type); + exit(1); + } + } + + error_report("Must specify type= or file="); + exit(1); +} diff --git a/qemu/hw/i386/xen/Makefile.objs b/qemu/hw/i386/xen/Makefile.objs new file mode 100644 index 000000000..801a68d32 --- /dev/null +++ b/qemu/hw/i386/xen/Makefile.objs @@ -0,0 +1 @@ +obj-y += xen_platform.o xen_apic.o xen_pvdevice.o diff --git a/qemu/hw/i386/xen/xen_apic.c b/qemu/hw/i386/xen/xen_apic.c new file mode 100644 index 000000000..f5acd6a09 --- /dev/null +++ b/qemu/hw/i386/xen/xen_apic.c @@ -0,0 +1,98 @@ +/* + * Xen basic APIC support + * + * Copyright (c) 2012 Citrix + * + * Authors: + * Wei Liu <wei.liu2@citrix.com> + * + * This work is licensed under the terms of the GNU GPL version 2 or + * later. See the COPYING file in the top-level directory. + */ +#include "hw/i386/apic_internal.h" +#include "hw/pci/msi.h" +#include "hw/xen/xen.h" + +static uint64_t xen_apic_mem_read(void *opaque, hwaddr addr, + unsigned size) +{ + return ~(uint64_t)0; +} + +static void xen_apic_mem_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + if (size != sizeof(uint32_t)) { + fprintf(stderr, "Xen: APIC write data size = %d, invalid\n", size); + return; + } + + xen_hvm_inject_msi(addr, data); +} + +static const MemoryRegionOps xen_apic_io_ops = { + .read = xen_apic_mem_read, + .write = xen_apic_mem_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void xen_apic_realize(DeviceState *dev, Error **errp) +{ + APICCommonState *s = APIC_COMMON(dev); + + s->vapic_control = 0; + memory_region_init_io(&s->io_memory, OBJECT(s), &xen_apic_io_ops, s, + "xen-apic-msi", APIC_SPACE_SIZE); + +#if defined(CONFIG_XEN_CTRL_INTERFACE_VERSION) \ + && CONFIG_XEN_CTRL_INTERFACE_VERSION >= 420 + msi_supported = true; +#endif +} + +static void xen_apic_set_base(APICCommonState *s, uint64_t val) +{ +} + +static void xen_apic_set_tpr(APICCommonState *s, uint8_t val) +{ +} + +static uint8_t xen_apic_get_tpr(APICCommonState *s) +{ + return 0; +} + +static void xen_apic_vapic_base_update(APICCommonState *s) +{ +} + +static void xen_apic_external_nmi(APICCommonState *s) +{ +} + +static void xen_apic_class_init(ObjectClass *klass, void *data) +{ + APICCommonClass *k = APIC_COMMON_CLASS(klass); + + k->realize = xen_apic_realize; + k->set_base = xen_apic_set_base; + k->set_tpr = xen_apic_set_tpr; + k->get_tpr = xen_apic_get_tpr; + k->vapic_base_update = xen_apic_vapic_base_update; + k->external_nmi = xen_apic_external_nmi; +} + +static const TypeInfo xen_apic_info = { + .name = "xen-apic", + .parent = TYPE_APIC_COMMON, + .instance_size = sizeof(APICCommonState), + .class_init = xen_apic_class_init, +}; + +static void xen_apic_register_types(void) +{ + type_register_static(&xen_apic_info); +} + +type_init(xen_apic_register_types) diff --git a/qemu/hw/i386/xen/xen_platform.c b/qemu/hw/i386/xen/xen_platform.c new file mode 100644 index 000000000..28b324a6f --- /dev/null +++ b/qemu/hw/i386/xen/xen_platform.c @@ -0,0 +1,450 @@ +/* + * XEN platform pci device, formerly known as the event channel device + * + * Copyright (c) 2003-2004 Intel Corp. + * Copyright (c) 2006 XenSource + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <assert.h> + +#include "hw/hw.h" +#include "hw/i386/pc.h" +#include "hw/ide.h" +#include "hw/pci/pci.h" +#include "hw/irq.h" +#include "hw/xen/xen_common.h" +#include "hw/xen/xen_backend.h" +#include "trace.h" +#include "exec/address-spaces.h" +#include "sysemu/block-backend.h" + +#include <xenguest.h> + +//#define DEBUG_PLATFORM + +#ifdef DEBUG_PLATFORM +#define DPRINTF(fmt, ...) do { \ + fprintf(stderr, "xen_platform: " fmt, ## __VA_ARGS__); \ +} while (0) +#else +#define DPRINTF(fmt, ...) do { } while (0) +#endif + +#define PFFLAG_ROM_LOCK 1 /* Sets whether ROM memory area is RW or RO */ + +typedef struct PCIXenPlatformState { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + MemoryRegion fixed_io; + MemoryRegion bar; + MemoryRegion mmio_bar; + uint8_t flags; /* used only for version_id == 2 */ + int drivers_blacklisted; + uint16_t driver_product_version; + + /* Log from guest drivers */ + char log_buffer[4096]; + int log_buffer_off; +} PCIXenPlatformState; + +#define TYPE_XEN_PLATFORM "xen-platform" +#define XEN_PLATFORM(obj) \ + OBJECT_CHECK(PCIXenPlatformState, (obj), TYPE_XEN_PLATFORM) + +#define XEN_PLATFORM_IOPORT 0x10 + +/* Send bytes to syslog */ +static void log_writeb(PCIXenPlatformState *s, char val) +{ + if (val == '\n' || s->log_buffer_off == sizeof(s->log_buffer) - 1) { + /* Flush buffer */ + s->log_buffer[s->log_buffer_off] = 0; + trace_xen_platform_log(s->log_buffer); + s->log_buffer_off = 0; + } else { + s->log_buffer[s->log_buffer_off++] = val; + } +} + +/* Xen Platform, Fixed IOPort */ +#define UNPLUG_ALL_IDE_DISKS 1 +#define UNPLUG_ALL_NICS 2 +#define UNPLUG_AUX_IDE_DISKS 4 + +static void unplug_nic(PCIBus *b, PCIDevice *d, void *o) +{ + /* We have to ignore passthrough devices */ + if (pci_get_word(d->config + PCI_CLASS_DEVICE) == + PCI_CLASS_NETWORK_ETHERNET + && strcmp(d->name, "xen-pci-passthrough") != 0) { + object_unparent(OBJECT(d)); + } +} + +static void pci_unplug_nics(PCIBus *bus) +{ + pci_for_each_device(bus, 0, unplug_nic, NULL); +} + +static void unplug_disks(PCIBus *b, PCIDevice *d, void *o) +{ + /* We have to ignore passthrough devices */ + if (pci_get_word(d->config + PCI_CLASS_DEVICE) == + PCI_CLASS_STORAGE_IDE + && strcmp(d->name, "xen-pci-passthrough") != 0) { + pci_piix3_xen_ide_unplug(DEVICE(d)); + } +} + +static void pci_unplug_disks(PCIBus *bus) +{ + pci_for_each_device(bus, 0, unplug_disks, NULL); +} + +static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t val) +{ + PCIXenPlatformState *s = opaque; + + switch (addr) { + case 0: { + PCIDevice *pci_dev = PCI_DEVICE(s); + /* Unplug devices. Value is a bitmask of which devices to + unplug, with bit 0 the IDE devices, bit 1 the network + devices, and bit 2 the non-primary-master IDE devices. */ + if (val & UNPLUG_ALL_IDE_DISKS) { + DPRINTF("unplug disks\n"); + blk_drain_all(); + blk_flush_all(); + pci_unplug_disks(pci_dev->bus); + } + if (val & UNPLUG_ALL_NICS) { + DPRINTF("unplug nics\n"); + pci_unplug_nics(pci_dev->bus); + } + if (val & UNPLUG_AUX_IDE_DISKS) { + DPRINTF("unplug auxiliary disks not supported\n"); + } + break; + } + case 2: + switch (val) { + case 1: + DPRINTF("Citrix Windows PV drivers loaded in guest\n"); + break; + case 0: + DPRINTF("Guest claimed to be running PV product 0?\n"); + break; + default: + DPRINTF("Unknown PV product %d loaded in guest\n", val); + break; + } + s->driver_product_version = val; + break; + } +} + +static void platform_fixed_ioport_writel(void *opaque, uint32_t addr, + uint32_t val) +{ + switch (addr) { + case 0: + /* PV driver version */ + break; + } +} + +static void platform_fixed_ioport_writeb(void *opaque, uint32_t addr, uint32_t val) +{ + PCIXenPlatformState *s = opaque; + + switch (addr) { + case 0: /* Platform flags */ { + hvmmem_type_t mem_type = (val & PFFLAG_ROM_LOCK) ? + HVMMEM_ram_ro : HVMMEM_ram_rw; + if (xc_hvm_set_mem_type(xen_xc, xen_domid, mem_type, 0xc0, 0x40)) { + DPRINTF("unable to change ro/rw state of ROM memory area!\n"); + } else { + s->flags = val & PFFLAG_ROM_LOCK; + DPRINTF("changed ro/rw state of ROM memory area. now is %s state.\n", + (mem_type == HVMMEM_ram_ro ? "ro":"rw")); + } + break; + } + case 2: + log_writeb(s, val); + break; + } +} + +static uint32_t platform_fixed_ioport_readw(void *opaque, uint32_t addr) +{ + PCIXenPlatformState *s = opaque; + + switch (addr) { + case 0: + if (s->drivers_blacklisted) { + /* The drivers will recognise this magic number and refuse + * to do anything. */ + return 0xd249; + } else { + /* Magic value so that you can identify the interface. */ + return 0x49d2; + } + default: + return 0xffff; + } +} + +static uint32_t platform_fixed_ioport_readb(void *opaque, uint32_t addr) +{ + PCIXenPlatformState *s = opaque; + + switch (addr) { + case 0: + /* Platform flags */ + return s->flags; + case 2: + /* Version number */ + return 1; + default: + return 0xff; + } +} + +static void platform_fixed_ioport_reset(void *opaque) +{ + PCIXenPlatformState *s = opaque; + + platform_fixed_ioport_writeb(s, 0, 0); +} + +static uint64_t platform_fixed_ioport_read(void *opaque, + hwaddr addr, + unsigned size) +{ + switch (size) { + case 1: + return platform_fixed_ioport_readb(opaque, addr); + case 2: + return platform_fixed_ioport_readw(opaque, addr); + default: + return -1; + } +} + +static void platform_fixed_ioport_write(void *opaque, hwaddr addr, + + uint64_t val, unsigned size) +{ + switch (size) { + case 1: + platform_fixed_ioport_writeb(opaque, addr, val); + break; + case 2: + platform_fixed_ioport_writew(opaque, addr, val); + break; + case 4: + platform_fixed_ioport_writel(opaque, addr, val); + break; + } +} + + +static const MemoryRegionOps platform_fixed_io_ops = { + .read = platform_fixed_ioport_read, + .write = platform_fixed_ioport_write, + .valid = { + .unaligned = true, + }, + .impl = { + .min_access_size = 1, + .max_access_size = 4, + .unaligned = true, + }, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void platform_fixed_ioport_init(PCIXenPlatformState* s) +{ + memory_region_init_io(&s->fixed_io, OBJECT(s), &platform_fixed_io_ops, s, + "xen-fixed", 16); + memory_region_add_subregion(get_system_io(), XEN_PLATFORM_IOPORT, + &s->fixed_io); +} + +/* Xen Platform PCI Device */ + +static uint64_t xen_platform_ioport_readb(void *opaque, hwaddr addr, + unsigned int size) +{ + if (addr == 0) { + return platform_fixed_ioport_readb(opaque, 0); + } else { + return ~0u; + } +} + +static void xen_platform_ioport_writeb(void *opaque, hwaddr addr, + uint64_t val, unsigned int size) +{ + PCIXenPlatformState *s = opaque; + + switch (addr) { + case 0: /* Platform flags */ + platform_fixed_ioport_writeb(opaque, 0, (uint32_t)val); + break; + case 8: + log_writeb(s, (uint32_t)val); + break; + default: + break; + } +} + +static const MemoryRegionOps xen_pci_io_ops = { + .read = xen_platform_ioport_readb, + .write = xen_platform_ioport_writeb, + .impl.min_access_size = 1, + .impl.max_access_size = 1, +}; + +static void platform_ioport_bar_setup(PCIXenPlatformState *d) +{ + memory_region_init_io(&d->bar, OBJECT(d), &xen_pci_io_ops, d, + "xen-pci", 0x100); +} + +static uint64_t platform_mmio_read(void *opaque, hwaddr addr, + unsigned size) +{ + DPRINTF("Warning: attempted read from physical address " + "0x" TARGET_FMT_plx " in xen platform mmio space\n", addr); + + return 0; +} + +static void platform_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + DPRINTF("Warning: attempted write of 0x%"PRIx64" to physical " + "address 0x" TARGET_FMT_plx " in xen platform mmio space\n", + val, addr); +} + +static const MemoryRegionOps platform_mmio_handler = { + .read = &platform_mmio_read, + .write = &platform_mmio_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void platform_mmio_setup(PCIXenPlatformState *d) +{ + memory_region_init_io(&d->mmio_bar, OBJECT(d), &platform_mmio_handler, d, + "xen-mmio", 0x1000000); +} + +static int xen_platform_post_load(void *opaque, int version_id) +{ + PCIXenPlatformState *s = opaque; + + platform_fixed_ioport_writeb(s, 0, s->flags); + + return 0; +} + +static const VMStateDescription vmstate_xen_platform = { + .name = "platform", + .version_id = 4, + .minimum_version_id = 4, + .post_load = xen_platform_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, PCIXenPlatformState), + VMSTATE_UINT8(flags, PCIXenPlatformState), + VMSTATE_END_OF_LIST() + } +}; + +static int xen_platform_initfn(PCIDevice *dev) +{ + PCIXenPlatformState *d = XEN_PLATFORM(dev); + uint8_t *pci_conf; + + pci_conf = dev->config; + + pci_set_word(pci_conf + PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + + pci_config_set_prog_interface(pci_conf, 0); + + pci_conf[PCI_INTERRUPT_PIN] = 1; + + platform_ioport_bar_setup(d); + pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &d->bar); + + /* reserve 16MB mmio address for share memory*/ + platform_mmio_setup(d); + pci_register_bar(dev, 1, PCI_BASE_ADDRESS_MEM_PREFETCH, + &d->mmio_bar); + + platform_fixed_ioport_init(d); + + return 0; +} + +static void platform_reset(DeviceState *dev) +{ + PCIXenPlatformState *s = XEN_PLATFORM(dev); + + platform_fixed_ioport_reset(s); +} + +static void xen_platform_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->init = xen_platform_initfn; + k->vendor_id = PCI_VENDOR_ID_XEN; + k->device_id = PCI_DEVICE_ID_XEN_PLATFORM; + k->class_id = PCI_CLASS_OTHERS << 8 | 0x80; + k->subsystem_vendor_id = PCI_VENDOR_ID_XEN; + k->subsystem_id = PCI_DEVICE_ID_XEN_PLATFORM; + k->revision = 1; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + dc->desc = "XEN platform pci device"; + dc->reset = platform_reset; + dc->vmsd = &vmstate_xen_platform; +} + +static const TypeInfo xen_platform_info = { + .name = TYPE_XEN_PLATFORM, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PCIXenPlatformState), + .class_init = xen_platform_class_init, +}; + +static void xen_platform_register_types(void) +{ + type_register_static(&xen_platform_info); +} + +type_init(xen_platform_register_types) diff --git a/qemu/hw/i386/xen/xen_pvdevice.c b/qemu/hw/i386/xen/xen_pvdevice.c new file mode 100644 index 000000000..c2189473b --- /dev/null +++ b/qemu/hw/i386/xen/xen_pvdevice.c @@ -0,0 +1,135 @@ +/* Copyright (c) Citrix Systems Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, + * with or without modification, are permitted provided + * that the following conditions are met: + * + * * Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the + * following disclaimer in the documentation and/or other + * materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "hw/hw.h" +#include "hw/pci/pci.h" +#include "trace.h" + +#define TYPE_XEN_PV_DEVICE "xen-pvdevice" + +#define XEN_PV_DEVICE(obj) \ + OBJECT_CHECK(XenPVDevice, (obj), TYPE_XEN_PV_DEVICE) + +typedef struct XenPVDevice { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + uint16_t vendor_id; + uint16_t device_id; + uint8_t revision; + uint32_t size; + MemoryRegion mmio; +} XenPVDevice; + +static uint64_t xen_pv_mmio_read(void *opaque, hwaddr addr, + unsigned size) +{ + trace_xen_pv_mmio_read(addr); + + return ~(uint64_t)0; +} + +static void xen_pv_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + trace_xen_pv_mmio_write(addr); +} + +static const MemoryRegionOps xen_pv_mmio_ops = { + .read = &xen_pv_mmio_read, + .write = &xen_pv_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static int xen_pv_init(PCIDevice *pci_dev) +{ + XenPVDevice *d = XEN_PV_DEVICE(pci_dev); + uint8_t *pci_conf; + + /* device-id property must always be supplied */ + if (d->device_id == 0xffff) + return -1; + + pci_conf = pci_dev->config; + + pci_set_word(pci_conf + PCI_VENDOR_ID, d->vendor_id); + pci_set_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID, d->vendor_id); + pci_set_word(pci_conf + PCI_DEVICE_ID, d->device_id); + pci_set_word(pci_conf + PCI_SUBSYSTEM_ID, d->device_id); + pci_set_byte(pci_conf + PCI_REVISION_ID, d->revision); + + pci_set_word(pci_conf + PCI_COMMAND, PCI_COMMAND_MEMORY); + + pci_config_set_prog_interface(pci_conf, 0); + + pci_conf[PCI_INTERRUPT_PIN] = 1; + + memory_region_init_io(&d->mmio, NULL, &xen_pv_mmio_ops, d, + "mmio", d->size); + + pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_MEM_PREFETCH, + &d->mmio); + + return 0; +} + +static Property xen_pv_props[] = { + DEFINE_PROP_UINT16("vendor-id", XenPVDevice, vendor_id, PCI_VENDOR_ID_XEN), + DEFINE_PROP_UINT16("device-id", XenPVDevice, device_id, 0xffff), + DEFINE_PROP_UINT8("revision", XenPVDevice, revision, 0x01), + DEFINE_PROP_UINT32("size", XenPVDevice, size, 0x400000), + DEFINE_PROP_END_OF_LIST() +}; + +static void xen_pv_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->init = xen_pv_init; + k->class_id = PCI_CLASS_SYSTEM_OTHER; + dc->desc = "Xen PV Device"; + dc->props = xen_pv_props; +} + +static const TypeInfo xen_pv_type_info = { + .name = TYPE_XEN_PV_DEVICE, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(XenPVDevice), + .class_init = xen_pv_class_init, +}; + +static void xen_pv_register_types(void) +{ + type_register_static(&xen_pv_type_info); +} + +type_init(xen_pv_register_types) |