diff options
Diffstat (limited to 'qemu/hw/ppc/spapr.c')
-rw-r--r-- | qemu/hw/ppc/spapr.c | 1087 |
1 files changed, 787 insertions, 300 deletions
diff --git a/qemu/hw/ppc/spapr.c b/qemu/hw/ppc/spapr.c index a6f19473c..b69995e0d 100644 --- a/qemu/hw/ppc/spapr.c +++ b/qemu/hw/ppc/spapr.c @@ -24,15 +24,19 @@ * THE SOFTWARE. * */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "sysemu/sysemu.h" #include "sysemu/numa.h" #include "hw/hw.h" #include "hw/fw-path-provider.h" #include "elf.h" #include "net/net.h" +#include "sysemu/device_tree.h" #include "sysemu/block-backend.h" #include "sysemu/cpus.h" #include "sysemu/kvm.h" +#include "sysemu/device_tree.h" #include "kvm_ppc.h" #include "migration/migration.h" #include "mmu-hash64.h" @@ -60,6 +64,7 @@ #include "hw/nmi.h" #include "hw/compat.h" +#include "qemu/cutils.h" #include <libfdt.h> @@ -73,7 +78,7 @@ * * We load our kernel at 4M, leaving space for SLOF initial image */ -#define FDT_MAX_SIZE 0x40000 +#define FDT_MAX_SIZE 0x100000 #define RTAS_MAX_SIZE 0x10000 #define RTAS_MAX_ADDR 0x80000000 /* RTAS must stay below that */ #define FW_MAX_SIZE 0x400000 @@ -85,8 +90,6 @@ #define TIMEBASE_FREQ 512000000ULL -#define MAX_CPUS 255 - #define PHANDLE_XICP 0x00001111 #define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) @@ -110,7 +113,7 @@ static XICSState *try_create_xics(const char *type, int nr_servers, } static XICSState *xics_system_init(MachineState *machine, - int nr_servers, int nr_irqs) + int nr_servers, int nr_irqs, Error **errp) { XICSState *icp = NULL; @@ -121,13 +124,15 @@ static XICSState *xics_system_init(MachineState *machine, icp = try_create_xics(TYPE_KVM_XICS, nr_servers, nr_irqs, &err); } if (machine_kernel_irqchip_required(machine) && !icp) { - error_report("kernel_irqchip requested but unavailable: %s", - error_get_pretty(err)); + error_reportf_err(err, + "kernel_irqchip requested but unavailable: "); + } else { + error_free(err); } } if (!icp) { - icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs, &error_abort); + icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs, errp); } return icp; @@ -373,8 +378,16 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, qemu_uuid[14], qemu_uuid[15]); _FDT((fdt_property_string(fdt, "vm,uuid", buf))); + if (qemu_uuid_set) { + _FDT((fdt_property_string(fdt, "system-id", buf))); + } g_free(buf); + if (qemu_get_vm_name()) { + _FDT((fdt_property_string(fdt, "ibm,partition-name", + qemu_get_vm_name()))); + } + _FDT((fdt_property_cell(fdt, "#address-cells", 0x2))); _FDT((fdt_property_cell(fdt, "#size-cells", 0x2))); @@ -427,6 +440,10 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, _FDT((fdt_property_cell(fdt, "rtas-event-scan-rate", RTAS_EVENT_SCAN_RATE))); + if (msi_nonbroken) { + _FDT((fdt_property(fdt, "ibm,change-msix-capable", NULL, 0))); + } + /* * According to PAPR, rtas ibm,os-term does not guarantee a return * back to the guest cpu. @@ -481,10 +498,11 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, * Older KVM versions with older guest kernels were broken with the * magic page, don't allow the guest to map it. */ - kvmppc_get_hypercall(first_cpu->env_ptr, hypercall, - sizeof(hypercall)); - _FDT((fdt_property(fdt, "hcall-instructions", hypercall, - sizeof(hypercall)))); + if (!kvmppc_get_hypercall(first_cpu->env_ptr, hypercall, + sizeof(hypercall))) { + _FDT((fdt_property(fdt, "hcall-instructions", hypercall, + sizeof(hypercall)))); + } } _FDT((fdt_end_node(fdt))); } @@ -495,44 +513,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, return fdt; } -int spapr_h_cas_compose_response(sPAPRMachineState *spapr, - target_ulong addr, target_ulong size) -{ - void *fdt, *fdt_skel; - sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 }; - - size -= sizeof(hdr); - - /* Create sceleton */ - fdt_skel = g_malloc0(size); - _FDT((fdt_create(fdt_skel, size))); - _FDT((fdt_begin_node(fdt_skel, ""))); - _FDT((fdt_end_node(fdt_skel))); - _FDT((fdt_finish(fdt_skel))); - fdt = g_malloc0(size); - _FDT((fdt_open_into(fdt_skel, fdt, size))); - g_free(fdt_skel); - - /* Fix skeleton up */ - _FDT((spapr_fixup_cpu_dt(fdt, spapr))); - - /* Pack resulting tree */ - _FDT((fdt_pack(fdt))); - - if (fdt_totalsize(fdt) + sizeof(hdr) > size) { - trace_spapr_cas_failed(size); - return -1; - } - - cpu_physical_memory_write(addr, &hdr, sizeof(hdr)); - cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt)); - trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr)); - g_free(fdt); - - return 0; -} - -static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start, +static int spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start, hwaddr size) { uint32_t associativity[] = { @@ -555,6 +536,7 @@ static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start, sizeof(mem_reg_property)))); _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity, sizeof(associativity)))); + return off; } static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt) @@ -620,11 +602,27 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; uint32_t page_sizes_prop[64]; size_t page_sizes_prop_size; - QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL); - unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0; - uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; + uint32_t vcpus_per_socket = smp_threads * smp_cores; uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; + /* Note: we keep CI large pages off for now because a 64K capable guest + * provisioned with large pages might otherwise try to map a qemu + * framebuffer (or other kind of memory mapped PCI BAR) using 64K pages + * even if that qemu runs on a 4k host. + * + * We can later add this bit back when we are confident this is not + * an issue (!HV KVM or 64K host) + */ + uint8_t pa_features_206[] = { 6, 0, + 0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 }; + uint8_t pa_features_207[] = { 24, 0, + 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0, + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00 }; + uint8_t *pa_features; + size_t pa_size; + _FDT((fdt_setprop_cell(fdt, offset, "reg", index))); _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu"))); @@ -653,6 +651,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq))); _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq))); + _FDT((fdt_setprop_cell(fdt, offset, "slb-size", env->slb_nr))); _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr))); _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); @@ -690,8 +689,21 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, page_sizes_prop, page_sizes_prop_size))); } + /* Do the ibm,pa-features property, adjust it for ci-large-pages */ + if (env->mmu_model == POWERPC_MMU_2_06) { + pa_features = pa_features_206; + pa_size = sizeof(pa_features_206); + } else /* env->mmu_model == POWERPC_MMU_2_07 */ { + pa_features = pa_features_207; + pa_size = sizeof(pa_features_207); + } + if (env->ci_large_pages) { + pa_features[3] |= 0x20; + } + _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size))); + _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", - cs->cpu_index / cpus_per_socket))); + cs->cpu_index / vcpus_per_socket))); _FDT((fdt_setprop(fdt, offset, "ibm,pft-size", pft_size_prop, sizeof(pft_size_prop)))); @@ -738,12 +750,162 @@ static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr) } +/* + * Adds ibm,dynamic-reconfiguration-memory node. + * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation + * of this device tree node. + */ +static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) +{ + MachineState *machine = MACHINE(spapr); + int ret, i, offset; + uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; + uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)}; + uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size; + uint32_t *int_buf, *cur_index, buf_len; + int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; + + /* + * Don't create the node if there are no DR LMBs. + */ + if (!nr_lmbs) { + return 0; + } + + /* + * Allocate enough buffer size to fit in ibm,dynamic-memory + * or ibm,associativity-lookup-arrays + */ + buf_len = MAX(nr_lmbs * SPAPR_DR_LMB_LIST_ENTRY_SIZE + 1, nr_nodes * 4 + 2) + * sizeof(uint32_t); + cur_index = int_buf = g_malloc0(buf_len); + + offset = fdt_add_subnode(fdt, 0, "ibm,dynamic-reconfiguration-memory"); + + ret = fdt_setprop(fdt, offset, "ibm,lmb-size", prop_lmb_size, + sizeof(prop_lmb_size)); + if (ret < 0) { + goto out; + } + + ret = fdt_setprop_cell(fdt, offset, "ibm,memory-flags-mask", 0xff); + if (ret < 0) { + goto out; + } + + ret = fdt_setprop_cell(fdt, offset, "ibm,memory-preservation-time", 0x0); + if (ret < 0) { + goto out; + } + + /* ibm,dynamic-memory */ + int_buf[0] = cpu_to_be32(nr_lmbs); + cur_index++; + for (i = 0; i < nr_lmbs; i++) { + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + uint64_t addr = i * lmb_size + spapr->hotplug_memory.base;; + uint32_t *dynamic_memory = cur_index; + + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr/lmb_size); + g_assert(drc); + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + dynamic_memory[0] = cpu_to_be32(addr >> 32); + dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff); + dynamic_memory[2] = cpu_to_be32(drck->get_index(drc)); + dynamic_memory[3] = cpu_to_be32(0); /* reserved */ + dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL)); + if (addr < machine->ram_size || + memory_region_present(get_system_memory(), addr)) { + dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED); + } else { + dynamic_memory[5] = cpu_to_be32(0); + } + + cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE; + } + ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len); + if (ret < 0) { + goto out; + } + + /* ibm,associativity-lookup-arrays */ + cur_index = int_buf; + int_buf[0] = cpu_to_be32(nr_nodes); + int_buf[1] = cpu_to_be32(4); /* Number of entries per associativity list */ + cur_index += 2; + for (i = 0; i < nr_nodes; i++) { + uint32_t associativity[] = { + cpu_to_be32(0x0), + cpu_to_be32(0x0), + cpu_to_be32(0x0), + cpu_to_be32(i) + }; + memcpy(cur_index, associativity, sizeof(associativity)); + cur_index += 4; + } + ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf, + (cur_index - int_buf) * sizeof(uint32_t)); +out: + g_free(int_buf); + return ret; +} + +int spapr_h_cas_compose_response(sPAPRMachineState *spapr, + target_ulong addr, target_ulong size, + bool cpu_update, bool memory_update) +{ + void *fdt, *fdt_skel; + sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 }; + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine()); + + size -= sizeof(hdr); + + /* Create sceleton */ + fdt_skel = g_malloc0(size); + _FDT((fdt_create(fdt_skel, size))); + _FDT((fdt_begin_node(fdt_skel, ""))); + _FDT((fdt_end_node(fdt_skel))); + _FDT((fdt_finish(fdt_skel))); + fdt = g_malloc0(size); + _FDT((fdt_open_into(fdt_skel, fdt, size))); + g_free(fdt_skel); + + /* Fixup cpu nodes */ + if (cpu_update) { + _FDT((spapr_fixup_cpu_dt(fdt, spapr))); + } + + /* Generate ibm,dynamic-reconfiguration-memory node if required */ + if (memory_update && smc->dr_lmb_enabled) { + _FDT((spapr_populate_drconf_memory(spapr, fdt))); + } + + /* Pack resulting tree */ + _FDT((fdt_pack(fdt))); + + if (fdt_totalsize(fdt) + sizeof(hdr) > size) { + trace_spapr_cas_failed(size); + return -1; + } + + cpu_physical_memory_write(addr, &hdr, sizeof(hdr)); + cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt)); + trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr)); + g_free(fdt); + + return 0; +} + static void spapr_finalize_fdt(sPAPRMachineState *spapr, hwaddr fdt_addr, hwaddr rtas_addr, hwaddr rtas_size) { MachineState *machine = MACHINE(qdev_get_machine()); + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); const char *boot_device = machine->boot_order; int ret, i; size_t cb = 0; @@ -768,13 +930,20 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr, exit(1); } - QLIST_FOREACH(phb, &spapr->phbs, list) { - ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt); + if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)) { + ret = spapr_rng_populate_dt(fdt); + if (ret < 0) { + fprintf(stderr, "could not set up rng device in the fdt\n"); + exit(1); + } } - if (ret < 0) { - fprintf(stderr, "couldn't setup PCI devices in fdt\n"); - exit(1); + QLIST_FOREACH(phb, &spapr->phbs, list) { + ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt); + if (ret < 0) { + error_report("couldn't setup PCI devices in fdt"); + exit(1); + } } /* RTAS */ @@ -814,6 +983,10 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr, spapr_populate_chosen_stdout(fdt, spapr->vio_bus); } + if (smc->dr_lmb_enabled) { + _FDT(spapr_drc_populate_dt(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_LMB)); + } + _FDT((fdt_pack(fdt))); if (fdt_totalsize(fdt) > FDT_MAX_SIZE) { @@ -822,6 +995,7 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr, exit(1); } + qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt)); cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt)); g_free(bootlist); @@ -851,45 +1025,93 @@ static void emulate_spapr_hypercall(PowerPCCPU *cpu) #define CLEAN_HPTE(_hpte) ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY)) #define DIRTY_HPTE(_hpte) ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY)) -static void spapr_reset_htab(sPAPRMachineState *spapr) +/* + * Get the fd to access the kernel htab, re-opening it if necessary + */ +static int get_htab_fd(sPAPRMachineState *spapr) { - long shift; - int index; + if (spapr->htab_fd >= 0) { + return spapr->htab_fd; + } + + spapr->htab_fd = kvmppc_get_htab_fd(false); + if (spapr->htab_fd < 0) { + error_report("Unable to open fd for reading hash table from KVM: %s", + strerror(errno)); + } - /* allocate hash page table. For now we always make this 16mb, - * later we should probably make it scale to the size of guest - * RAM */ + return spapr->htab_fd; +} - shift = kvmppc_reset_htab(spapr->htab_shift); +static void close_htab_fd(sPAPRMachineState *spapr) +{ + if (spapr->htab_fd >= 0) { + close(spapr->htab_fd); + } + spapr->htab_fd = -1; +} - if (shift > 0) { - /* Kernel handles htab, we don't need to allocate one */ - spapr->htab_shift = shift; - kvmppc_kern_htab = true; +static int spapr_hpt_shift_for_ramsize(uint64_t ramsize) +{ + int shift; + + /* We aim for a hash table of size 1/128 the size of RAM (rounded + * up). The PAPR recommendation is actually 1/64 of RAM size, but + * that's much more than is needed for Linux guests */ + shift = ctz64(pow2ceil(ramsize)) - 7; + shift = MAX(shift, 18); /* Minimum architected size */ + shift = MIN(shift, 46); /* Maximum architected size */ + return shift; +} - /* Tell readers to update their file descriptor */ - if (spapr->htab_fd >= 0) { - spapr->htab_fd_stale = true; +static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift, + Error **errp) +{ + long rc; + + /* Clean up any HPT info from a previous boot */ + g_free(spapr->htab); + spapr->htab = NULL; + spapr->htab_shift = 0; + close_htab_fd(spapr); + + rc = kvmppc_reset_htab(shift); + if (rc < 0) { + /* kernel-side HPT needed, but couldn't allocate one */ + error_setg_errno(errp, errno, + "Failed to allocate KVM HPT of order %d (try smaller maxmem?)", + shift); + /* This is almost certainly fatal, but if the caller really + * wants to carry on with shift == 0, it's welcome to try */ + } else if (rc > 0) { + /* kernel-side HPT allocated */ + if (rc != shift) { + error_setg(errp, + "Requested order %d HPT, but kernel allocated order %ld (try smaller maxmem?)", + shift, rc); } + + spapr->htab_shift = shift; + spapr->htab = NULL; } else { + /* kernel-side HPT not needed, allocate in userspace instead */ + size_t size = 1ULL << shift; + int i; + + spapr->htab = qemu_memalign(size, size); if (!spapr->htab) { - /* Allocate an htab if we don't yet have one */ - spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr)); + error_setg_errno(errp, errno, + "Could not allocate HPT of order %d", shift); + return; } - /* And clear it */ - memset(spapr->htab, 0, HTAB_SIZE(spapr)); + memset(spapr->htab, 0, size); + spapr->htab_shift = shift; - for (index = 0; index < HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; index++) { - DIRTY_HPTE(HPTE(spapr->htab, index)); + for (i = 0; i < size / HASH_PTE_SIZE_64; i++) { + DIRTY_HPTE(HPTE(spapr->htab, i)); } } - - /* Update the RMA size if necessary */ - if (spapr->vrma_adjust) { - spapr->rma_size = kvmppc_rma_size(spapr_node0_size(), - spapr->htab_shift); - } } static int find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque) @@ -909,39 +1131,26 @@ static int find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque) return 0; } -/* - * A guest reset will cause spapr->htab_fd to become stale if being used. - * Reopen the file descriptor to make sure the whole HTAB is properly read. - */ -static int spapr_check_htab_fd(sPAPRMachineState *spapr) -{ - int rc = 0; - - if (spapr->htab_fd_stale) { - close(spapr->htab_fd); - spapr->htab_fd = kvmppc_get_htab_fd(false); - if (spapr->htab_fd < 0) { - error_report("Unable to open fd for reading hash table from KVM: " - "%s", strerror(errno)); - rc = -1; - } - spapr->htab_fd_stale = false; - } - - return rc; -} - static void ppc_spapr_reset(void) { - sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + MachineState *machine = MACHINE(qdev_get_machine()); + sPAPRMachineState *spapr = SPAPR_MACHINE(machine); PowerPCCPU *first_ppc_cpu; uint32_t rtas_limit; /* Check for unknown sysbus devices */ foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL); - /* Reset the hash table & recalc the RMA */ - spapr_reset_htab(spapr); + /* Allocate and/or reset the hash page table */ + spapr_reallocate_hpt(spapr, + spapr_hpt_shift_for_ramsize(machine->maxram_size), + &error_fatal); + + /* Update the RMA size if necessary */ + if (spapr->vrma_adjust) { + spapr->rma_size = kvmppc_rma_size(spapr_node0_size(), + spapr->htab_shift); + } qemu_devices_reset(); @@ -987,24 +1196,8 @@ static void spapr_cpu_reset(void *opaque) env->spr[SPR_HIOR] = 0; - env->external_htab = (uint8_t *)spapr->htab; - if (kvm_enabled() && !env->external_htab) { - /* - * HV KVM, set external_htab to 1 so our ppc_hash64_load_hpte* - * functions do the right thing. - */ - env->external_htab = (void *)1; - } - env->htab_base = -1; - /* - * htab_mask is the mask used to normalize hash value to PTEG index. - * htab_shift is log2 of hash table size. - * We have 8 hpte per group, and each hpte is 16 bytes. - * ie have 128 bytes per hpte entry. - */ - env->htab_mask = (1ULL << (spapr->htab_shift - 7)) - 1; - env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab | - (spapr->htab_shift - 18); + ppc_hash64_set_external_hpt(cpu, spapr->htab, spapr->htab_shift, + &error_fatal); } static void spapr_create_nvram(sPAPRMachineState *spapr) @@ -1013,7 +1206,8 @@ static void spapr_create_nvram(sPAPRMachineState *spapr) DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0); if (dinfo) { - qdev_prop_set_drive_nofail(dev, "drive", blk_by_legacy_dinfo(dinfo)); + qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(dinfo), + &error_fatal); } qdev_init_nofail(dev); @@ -1033,7 +1227,7 @@ static void spapr_rtc_create(sPAPRMachineState *spapr) } /* Returns whether we want to use VGA or not */ -static int spapr_vga_init(PCIBus *pci_bus) +static bool spapr_vga_init(PCIBus *pci_bus, Error **errp) { switch (vga_interface_type) { case VGA_NONE: @@ -1041,11 +1235,12 @@ static int spapr_vga_init(PCIBus *pci_bus) case VGA_DEVICE: return true; case VGA_STD: + case VGA_VIRTIO: return pci_vga_init(pci_bus) != NULL; default: - fprintf(stderr, "This vga model is not supported," - "currently it only supports -vga std\n"); - exit(0); + error_setg(errp, + "Unsupported VGA mode, only -vga std or -vga virtio is supported"); + return false; } } @@ -1099,14 +1294,6 @@ static int htab_save_setup(QEMUFile *f, void *opaque) spapr->htab_first_pass = true; } else { assert(kvm_enabled()); - - spapr->htab_fd = kvmppc_get_htab_fd(false); - spapr->htab_fd_stale = false; - if (spapr->htab_fd < 0) { - fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n", - strerror(errno)); - return -1; - } } @@ -1116,6 +1303,7 @@ static int htab_save_setup(QEMUFile *f, void *opaque) static void htab_save_first_pass(QEMUFile *f, sPAPRMachineState *spapr, int64_t max_ns) { + bool has_timeout = max_ns != -1; int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; int index = spapr->htab_save_index; int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); @@ -1149,7 +1337,8 @@ static void htab_save_first_pass(QEMUFile *f, sPAPRMachineState *spapr, qemu_put_buffer(f, HPTE(spapr->htab, chunkstart), HASH_PTE_SIZE_64 * n_valid); - if ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) { + if (has_timeout && + (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) { break; } } @@ -1246,6 +1435,7 @@ static int htab_save_later_pass(QEMUFile *f, sPAPRMachineState *spapr, static int htab_save_iterate(QEMUFile *f, void *opaque) { sPAPRMachineState *spapr = opaque; + int fd; int rc = 0; /* Iteration header */ @@ -1254,13 +1444,12 @@ static int htab_save_iterate(QEMUFile *f, void *opaque) if (!spapr->htab) { assert(kvm_enabled()); - rc = spapr_check_htab_fd(spapr); - if (rc < 0) { - return rc; + fd = get_htab_fd(spapr); + if (fd < 0) { + return fd; } - rc = kvmppc_save_htab(f, spapr->htab_fd, - MAX_KVM_BUF_SIZE, MAX_ITERATION_NS); + rc = kvmppc_save_htab(f, fd, MAX_KVM_BUF_SIZE, MAX_ITERATION_NS); if (rc < 0) { return rc; } @@ -1281,6 +1470,7 @@ static int htab_save_iterate(QEMUFile *f, void *opaque) static int htab_save_complete(QEMUFile *f, void *opaque) { sPAPRMachineState *spapr = opaque; + int fd; /* Iteration header */ qemu_put_be32(f, 0); @@ -1290,18 +1480,20 @@ static int htab_save_complete(QEMUFile *f, void *opaque) assert(kvm_enabled()); - rc = spapr_check_htab_fd(spapr); - if (rc < 0) { - return rc; + fd = get_htab_fd(spapr); + if (fd < 0) { + return fd; } - rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, -1); + rc = kvmppc_save_htab(f, fd, MAX_KVM_BUF_SIZE, -1); if (rc < 0) { return rc; } - close(spapr->htab_fd); - spapr->htab_fd = -1; + close_htab_fd(spapr); } else { + if (spapr->htab_first_pass) { + htab_save_first_pass(f, spapr, -1); + } htab_save_later_pass(f, spapr, -1); } @@ -1320,15 +1512,19 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id) int fd = -1; if (version_id < 1 || version_id > 1) { - fprintf(stderr, "htab_load() bad version\n"); + error_report("htab_load() bad version"); return -EINVAL; } section_hdr = qemu_get_be32(f); if (section_hdr) { - /* First section, just the hash shift */ - if (spapr->htab_shift != section_hdr) { + Error *local_err = NULL; + + /* First section gives the htab size */ + spapr_reallocate_hpt(spapr, section_hdr, &local_err); + if (local_err) { + error_report_err(local_err); return -EINVAL; } return 0; @@ -1339,8 +1535,8 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id) fd = kvmppc_get_htab_fd(true); if (fd < 0) { - fprintf(stderr, "Unable to open fd to restore KVM hash table: %s\n", - strerror(errno)); + error_report("Unable to open fd to restore KVM hash table: %s", + strerror(errno)); } } @@ -1360,9 +1556,9 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id) if ((index + n_valid + n_invalid) > (HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) { /* Bad index in stream */ - fprintf(stderr, "htab_load() bad index %d (%hd+%hd entries) " - "in htab stream (htab_shift=%d)\n", index, n_valid, n_invalid, - spapr->htab_shift); + error_report( + "htab_load() bad index %d (%hd+%hd entries) in htab stream (htab_shift=%d)", + index, n_valid, n_invalid, spapr->htab_shift); return -EINVAL; } @@ -1398,7 +1594,7 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id) static SaveVMHandlers savevm_htab_handlers = { .save_live_setup = htab_save_setup, .save_live_iterate = htab_save_iterate, - .save_live_complete = htab_save_complete, + .save_live_complete_precopy = htab_save_complete, .load_state = htab_load, }; @@ -1409,26 +1605,24 @@ static void spapr_boot_set(void *opaque, const char *boot_device, machine->boot_order = g_strdup(boot_device); } -static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu) +static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, + Error **errp) { CPUPPCState *env = &cpu->env; /* Set time-base frequency to 512 MHz */ cpu_ppc_tb_init(env, TIMEBASE_FREQ); - /* PAPR always has exception vectors in RAM not ROM. To ensure this, - * MSR[IP] should never be set. - */ - env->msr_mask &= ~(1 << 6); - - /* Tell KVM that we're in PAPR mode */ - if (kvm_enabled()) { - kvmppc_set_papr(cpu); - } + /* Enable PAPR mode in TCG or KVM */ + cpu_ppc_set_papr(cpu); if (cpu->max_compat) { - if (ppc_set_compat(cpu, cpu->max_compat) < 0) { - exit(1); + Error *local_err = NULL; + + ppc_set_compat(cpu, cpu->max_compat, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; } } @@ -1437,10 +1631,84 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu) qemu_register_reset(spapr_cpu_reset, cpu); } +/* + * Reset routine for LMB DR devices. + * + * Unlike PCI DR devices, LMB DR devices explicitly register this reset + * routine. Reset for PCI DR devices will be handled by PHB reset routine + * when it walks all its children devices. LMB devices reset occurs + * as part of spapr_ppc_reset(). + */ +static void spapr_drc_reset(void *opaque) +{ + sPAPRDRConnector *drc = opaque; + DeviceState *d = DEVICE(drc); + + if (d) { + device_reset(d); + } +} + +static void spapr_create_lmb_dr_connectors(sPAPRMachineState *spapr) +{ + MachineState *machine = MACHINE(spapr); + uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; + uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size; + int i; + + for (i = 0; i < nr_lmbs; i++) { + sPAPRDRConnector *drc; + uint64_t addr; + + addr = i * lmb_size + spapr->hotplug_memory.base; + drc = spapr_dr_connector_new(OBJECT(spapr), SPAPR_DR_CONNECTOR_TYPE_LMB, + addr/lmb_size); + qemu_register_reset(spapr_drc_reset, drc); + } +} + +/* + * If RAM size, maxmem size and individual node mem sizes aren't aligned + * to SPAPR_MEMORY_BLOCK_SIZE(256MB), then refuse to start the guest + * since we can't support such unaligned sizes with DRCONF_MEMORY. + */ +static void spapr_validate_node_memory(MachineState *machine, Error **errp) +{ + int i; + + if (machine->ram_size % SPAPR_MEMORY_BLOCK_SIZE) { + error_setg(errp, "Memory size 0x" RAM_ADDR_FMT + " is not aligned to %llu MiB", + machine->ram_size, + SPAPR_MEMORY_BLOCK_SIZE / M_BYTE); + return; + } + + if (machine->maxram_size % SPAPR_MEMORY_BLOCK_SIZE) { + error_setg(errp, "Maximum memory size 0x" RAM_ADDR_FMT + " is not aligned to %llu MiB", + machine->ram_size, + SPAPR_MEMORY_BLOCK_SIZE / M_BYTE); + return; + } + + for (i = 0; i < nb_numa_nodes; i++) { + if (numa_info[i].node_mem % SPAPR_MEMORY_BLOCK_SIZE) { + error_setg(errp, + "Node %d memory size 0x%" PRIx64 + " is not aligned to %llu MiB", + i, numa_info[i].node_mem, + SPAPR_MEMORY_BLOCK_SIZE / M_BYTE); + return; + } + } +} + /* pSeries LPAR / sPAPR hardware init */ static void ppc_spapr_init(MachineState *machine) { sPAPRMachineState *spapr = SPAPR_MACHINE(machine); + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; @@ -1459,7 +1727,7 @@ static void ppc_spapr_init(MachineState *machine) bool kernel_le = false; char *filename; - msi_supported = true; + msi_nonbroken = true; QLIST_INIT(&spapr->phbs); @@ -1494,30 +1762,23 @@ static void ppc_spapr_init(MachineState *machine) } if (spapr->rma_size > node0_size) { - fprintf(stderr, "Error: Numa node 0 has to span the RMA (%#08"HWADDR_PRIx")\n", - spapr->rma_size); + error_report("Numa node 0 has to span the RMA (%#08"HWADDR_PRIx")", + spapr->rma_size); exit(1); } /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */ load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD; - /* We aim for a hash table of size 1/128 the size of RAM. The - * normal rule of thumb is 1/64 the size of RAM, but that's much - * more than needed for the Linux guests we support. */ - spapr->htab_shift = 18; /* Minimum architected size */ - while (spapr->htab_shift <= 46) { - if ((1ULL << (spapr->htab_shift + 7)) >= machine->ram_size) { - break; - } - spapr->htab_shift++; - } - /* Set up Interrupt Controller before we create the VCPUs */ spapr->icp = xics_system_init(machine, DIV_ROUND_UP(max_cpus * kvmppc_smt_threads(), smp_threads), - XICS_IRQS); + XICS_IRQS, &error_fatal); + + if (smc->dr_lmb_enabled) { + spapr_validate_node_memory(machine, &error_fatal); + } /* init CPUs */ if (machine->cpu_model == NULL) { @@ -1526,15 +1787,16 @@ static void ppc_spapr_init(MachineState *machine) for (i = 0; i < smp_cpus; i++) { cpu = cpu_ppc_init(machine->cpu_model); if (cpu == NULL) { - fprintf(stderr, "Unable to find PowerPC CPU definition\n"); + error_report("Unable to find PowerPC CPU definition"); exit(1); } - spapr_cpu_init(spapr, cpu); + spapr_cpu_init(spapr, cpu, &error_fatal); } if (kvm_enabled()) { /* Enable H_LOGICAL_CI_* so SLOF can talk to in-kernel devices */ kvmppc_enable_logical_ci_hcalls(); + kvmppc_enable_set_mode_hcall(); } /* allocate RAM */ @@ -1550,6 +1812,29 @@ static void ppc_spapr_init(MachineState *machine) memory_region_add_subregion(sysmem, 0, rma_region); } + /* initialize hotplug memory address space */ + if (machine->ram_size < machine->maxram_size) { + ram_addr_t hotplug_mem_size = machine->maxram_size - machine->ram_size; + + if (machine->ram_slots > SPAPR_MAX_RAM_SLOTS) { + error_report("Specified number of memory slots %" + PRIu64" exceeds max supported %d", + machine->ram_slots, SPAPR_MAX_RAM_SLOTS); + exit(1); + } + + spapr->hotplug_memory.base = ROUND_UP(machine->ram_size, + SPAPR_HOTPLUG_MEM_ALIGN); + memory_region_init(&spapr->hotplug_memory.mr, OBJECT(spapr), + "hotplug-memory", hotplug_mem_size); + memory_region_add_subregion(sysmem, spapr->hotplug_memory.base, + &spapr->hotplug_memory.mr); + } + + if (smc->dr_lmb_enabled) { + spapr_create_lmb_dr_connectors(spapr); + } + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin"); if (!filename) { error_report("Could not find LPAR rtas '%s'", "spapr-rtas.bin"); @@ -1610,13 +1895,17 @@ static void ppc_spapr_init(MachineState *machine) } /* Graphics */ - if (spapr_vga_init(phb->bus)) { + if (spapr_vga_init(phb->bus, &error_fatal)) { spapr->has_graphics = true; machine->usb |= defaults_enabled() && !machine->usb_disabled; } if (machine->usb) { - pci_create_simple(phb->bus, -1, "pci-ohci"); + if (smc->use_ohci_by_default) { + pci_create_simple(phb->bus, -1, "pci-ohci"); + } else { + pci_create_simple(phb->bus, -1, "nec-usb-xhci"); + } if (spapr->has_graphics) { USBBus *usb_bus = usb_bus_find(-1); @@ -1627,8 +1916,9 @@ static void ppc_spapr_init(MachineState *machine) } if (spapr->rma_size < (MIN_RMA_SLOF << 20)) { - fprintf(stderr, "qemu: pSeries SLOF firmware requires >= " - "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF); + error_report( + "pSeries SLOF firmware requires >= %ldM guest RMA (Real Mode Area memory)", + MIN_RMA_SLOF); exit(1); } @@ -1636,16 +1926,18 @@ static void ppc_spapr_init(MachineState *machine) uint64_t lowaddr = 0; kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL, - NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0); + NULL, &lowaddr, NULL, 1, PPC_ELF_MACHINE, + 0, 0); if (kernel_size == ELF_LOAD_WRONG_ENDIAN) { kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL, - NULL, &lowaddr, NULL, 0, ELF_MACHINE, 0); + NULL, &lowaddr, NULL, 0, PPC_ELF_MACHINE, + 0, 0); kernel_le = kernel_size > 0; } if (kernel_size < 0) { - fprintf(stderr, "qemu: error loading %s: %s\n", - kernel_filename, load_elf_strerror(kernel_size)); + error_report("error loading %s: %s", + kernel_filename, load_elf_strerror(kernel_size)); exit(1); } @@ -1658,8 +1950,8 @@ static void ppc_spapr_init(MachineState *machine) initrd_size = load_image_targphys(initrd_filename, initrd_base, load_limit - initrd_base); if (initrd_size < 0) { - fprintf(stderr, "qemu: could not load initial ram disk '%s'\n", - initrd_filename); + error_report("could not load initial ram disk '%s'", + initrd_filename); exit(1); } } else { @@ -1796,6 +2088,9 @@ static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp) static void spapr_machine_initfn(Object *obj) { + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); + + spapr->htab_fd = -1; object_property_add_str(obj, "kvm-type", spapr_get_kvm_type, spapr_set_kvm_type, NULL); object_property_set_description(obj, "kvm-type", @@ -1803,6 +2098,13 @@ static void spapr_machine_initfn(Object *obj) NULL); } +static void spapr_machine_finalizefn(Object *obj) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(obj); + + g_free(spapr->kvm_type); +} + static void ppc_cpu_do_nmi_on_cpu(void *arg) { CPUState *cs = arg; @@ -1820,22 +2122,177 @@ static void spapr_nmi(NMIState *n, int cpu_index, Error **errp) } } +static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size, + uint32_t node, Error **errp) +{ + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + uint32_t nr_lmbs = size/SPAPR_MEMORY_BLOCK_SIZE; + int i, fdt_offset, fdt_size; + void *fdt; + + /* + * Check for DRC connectors and send hotplug notification to the + * guest only in case of hotplugged memory. This allows cold plugged + * memory to be specified at boot time. + */ + if (!dev->hotplugged) { + return; + } + + for (i = 0; i < nr_lmbs; i++) { + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr/SPAPR_MEMORY_BLOCK_SIZE); + g_assert(drc); + + fdt = create_device_tree(&fdt_size); + fdt_offset = spapr_populate_memory_node(fdt, node, addr, + SPAPR_MEMORY_BLOCK_SIZE); + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, errp); + addr += SPAPR_MEMORY_BLOCK_SIZE; + } + spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, nr_lmbs); +} + +static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + uint32_t node, Error **errp) +{ + Error *local_err = NULL; + sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev); + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + uint64_t align = memory_region_get_alignment(mr); + uint64_t size = memory_region_size(mr); + uint64_t addr; + + if (size % SPAPR_MEMORY_BLOCK_SIZE) { + error_setg(&local_err, "Hotplugged memory size must be a multiple of " + "%lld MB", SPAPR_MEMORY_BLOCK_SIZE/M_BYTE); + goto out; + } + + pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err); + if (local_err) { + goto out; + } + + addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err); + if (local_err) { + pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr); + goto out; + } + + spapr_add_lmbs(dev, addr, size, node, &error_abort); + +out: + error_propagate(errp, local_err); +} + +static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine()); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + int node; + + if (!smc->dr_lmb_enabled) { + error_setg(errp, "Memory hotplug not supported for this machine"); + return; + } + node = object_property_get_int(OBJECT(dev), PC_DIMM_NODE_PROP, errp); + if (*errp) { + return; + } + if (node < 0 || node >= MAX_NODES) { + error_setg(errp, "Invaild node %d", node); + return; + } + + /* + * Currently PowerPC kernel doesn't allow hot-adding memory to + * memory-less node, but instead will silently add the memory + * to the first node that has some memory. This causes two + * unexpected behaviours for the user. + * + * - Memory gets hotplugged to a different node than what the user + * specified. + * - Since pc-dimm subsystem in QEMU still thinks that memory belongs + * to memory-less node, a reboot will set things accordingly + * and the previously hotplugged memory now ends in the right node. + * This appears as if some memory moved from one node to another. + * + * So until kernel starts supporting memory hotplug to memory-less + * nodes, just prevent such attempts upfront in QEMU. + */ + if (nb_numa_nodes && !numa_info[node].node_mem) { + error_setg(errp, "Can't hotplug memory to memory-less node %d", + node); + return; + } + + spapr_memory_plug(hotplug_dev, dev, node, errp); + } +} + +static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + error_setg(errp, "Memory hot unplug not supported by sPAPR"); + } +} + +static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine, + DeviceState *dev) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + return HOTPLUG_HANDLER(machine); + } + return NULL; +} + +static unsigned spapr_cpu_index_to_socket_id(unsigned cpu_index) +{ + /* Allocate to NUMA nodes on a "socket" basis (not that concept of + * socket means much for the paravirtualized PAPR platform) */ + return cpu_index / smp_threads / smp_cores; +} + static void spapr_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); + sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(oc); FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc); NMIClass *nc = NMI_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); + + mc->desc = "pSeries Logical Partition (PAPR compliant)"; + /* + * We set up the default / latest behaviour here. The class_init + * functions for the specific versioned machine types can override + * these details for backwards compatibility + */ mc->init = ppc_spapr_init; mc->reset = ppc_spapr_reset; mc->block_default_type = IF_SCSI; - mc->max_cpus = MAX_CPUS; + mc->max_cpus = MAX_CPUMASK_BITS; mc->no_parallel = 1; mc->default_boot_order = ""; mc->default_ram_size = 512 * M_BYTE; mc->kvm_type = spapr_kvm_type; mc->has_dynamic_sysbus = true; + mc->pci_allow_0_address = true; + mc->get_hotplug_handler = spapr_get_hotpug_handler; + hc->plug = spapr_machine_device_plug; + hc->unplug = spapr_machine_device_unplug; + mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id; + smc->dr_lmb_enabled = true; fwc->get_dev_path = spapr_get_fw_dev_path; nc->nmi_monitor_handler = spapr_nmi; } @@ -1846,153 +2303,183 @@ static const TypeInfo spapr_machine_info = { .abstract = true, .instance_size = sizeof(sPAPRMachineState), .instance_init = spapr_machine_initfn, + .instance_finalize = spapr_machine_finalizefn, .class_size = sizeof(sPAPRMachineClass), .class_init = spapr_machine_class_init, .interfaces = (InterfaceInfo[]) { { TYPE_FW_PATH_PROVIDER }, { TYPE_NMI }, + { TYPE_HOTPLUG_HANDLER }, { } }, }; -#define SPAPR_COMPAT_2_3 \ - HW_COMPAT_2_3 \ - {\ - .driver = "spapr-pci-host-bridge",\ - .property = "dynamic-reconfiguration",\ - .value = "off",\ - }, - -#define SPAPR_COMPAT_2_2 \ - SPAPR_COMPAT_2_3 \ - HW_COMPAT_2_2 \ - {\ - .driver = TYPE_SPAPR_PCI_HOST_BRIDGE,\ - .property = "mem_win_size",\ - .value = "0x20000000",\ - }, - -#define SPAPR_COMPAT_2_1 \ - SPAPR_COMPAT_2_2 \ - HW_COMPAT_2_1 +#define DEFINE_SPAPR_MACHINE(suffix, verstr, latest) \ + static void spapr_machine_##suffix##_class_init(ObjectClass *oc, \ + void *data) \ + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ + spapr_machine_##suffix##_class_options(mc); \ + if (latest) { \ + mc->alias = "pseries"; \ + mc->is_default = 1; \ + } \ + } \ + static void spapr_machine_##suffix##_instance_init(Object *obj) \ + { \ + MachineState *machine = MACHINE(obj); \ + spapr_machine_##suffix##_instance_options(machine); \ + } \ + static const TypeInfo spapr_machine_##suffix##_info = { \ + .name = MACHINE_TYPE_NAME("pseries-" verstr), \ + .parent = TYPE_SPAPR_MACHINE, \ + .class_init = spapr_machine_##suffix##_class_init, \ + .instance_init = spapr_machine_##suffix##_instance_init, \ + }; \ + static void spapr_machine_register_##suffix(void) \ + { \ + type_register(&spapr_machine_##suffix##_info); \ + } \ + type_init(spapr_machine_register_##suffix) -static void spapr_compat_2_3(Object *obj) +/* + * pseries-2.6 + */ +static void spapr_machine_2_6_instance_options(MachineState *machine) { - savevm_skip_section_footers(); - global_state_set_optional(); } -static void spapr_compat_2_2(Object *obj) +static void spapr_machine_2_6_class_options(MachineClass *mc) { - spapr_compat_2_3(obj); + /* Defaults for the latest behaviour inherited from the base class */ } -static void spapr_compat_2_1(Object *obj) -{ - spapr_compat_2_2(obj); -} +DEFINE_SPAPR_MACHINE(2_6, "2.6", true); + +/* + * pseries-2.5 + */ +#define SPAPR_COMPAT_2_5 \ + HW_COMPAT_2_5 \ + { \ + .driver = "spapr-vlan", \ + .property = "use-rx-buffer-pools", \ + .value = "off", \ + }, -static void spapr_machine_2_3_instance_init(Object *obj) +static void spapr_machine_2_5_instance_options(MachineState *machine) { - spapr_compat_2_3(obj); - spapr_machine_initfn(obj); } -static void spapr_machine_2_2_instance_init(Object *obj) +static void spapr_machine_2_5_class_options(MachineClass *mc) { - spapr_compat_2_2(obj); - spapr_machine_initfn(obj); + sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_2_6_class_options(mc); + smc->use_ohci_by_default = true; + SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_5); } -static void spapr_machine_2_1_instance_init(Object *obj) +DEFINE_SPAPR_MACHINE(2_5, "2.5", false); + +/* + * pseries-2.4 + */ +#define SPAPR_COMPAT_2_4 \ + SPAPR_COMPAT_2_5 \ + HW_COMPAT_2_4 + +static void spapr_machine_2_4_instance_options(MachineState *machine) { - spapr_compat_2_1(obj); - spapr_machine_initfn(obj); + spapr_machine_2_5_instance_options(machine); } -static void spapr_machine_2_1_class_init(ObjectClass *oc, void *data) +static void spapr_machine_2_4_class_options(MachineClass *mc) { - MachineClass *mc = MACHINE_CLASS(oc); - static GlobalProperty compat_props[] = { - SPAPR_COMPAT_2_1 - { /* end of list */ } - }; + sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); - mc->name = "pseries-2.1"; - mc->desc = "pSeries Logical Partition (PAPR compliant) v2.1"; - mc->compat_props = compat_props; + spapr_machine_2_5_class_options(mc); + smc->dr_lmb_enabled = false; + SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_4); } -static const TypeInfo spapr_machine_2_1_info = { - .name = TYPE_SPAPR_MACHINE "2.1", - .parent = TYPE_SPAPR_MACHINE, - .class_init = spapr_machine_2_1_class_init, - .instance_init = spapr_machine_2_1_instance_init, -}; +DEFINE_SPAPR_MACHINE(2_4, "2.4", false); + +/* + * pseries-2.3 + */ +#define SPAPR_COMPAT_2_3 \ + SPAPR_COMPAT_2_4 \ + HW_COMPAT_2_3 \ + {\ + .driver = "spapr-pci-host-bridge",\ + .property = "dynamic-reconfiguration",\ + .value = "off",\ + }, -static void spapr_machine_2_2_class_init(ObjectClass *oc, void *data) +static void spapr_machine_2_3_instance_options(MachineState *machine) { - static GlobalProperty compat_props[] = { - SPAPR_COMPAT_2_2 - { /* end of list */ } - }; - MachineClass *mc = MACHINE_CLASS(oc); + spapr_machine_2_4_instance_options(machine); + savevm_skip_section_footers(); + global_state_set_optional(); + savevm_skip_configuration(); +} - mc->name = "pseries-2.2"; - mc->desc = "pSeries Logical Partition (PAPR compliant) v2.2"; - mc->compat_props = compat_props; +static void spapr_machine_2_3_class_options(MachineClass *mc) +{ + spapr_machine_2_4_class_options(mc); + SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_3); } +DEFINE_SPAPR_MACHINE(2_3, "2.3", false); -static const TypeInfo spapr_machine_2_2_info = { - .name = TYPE_SPAPR_MACHINE "2.2", - .parent = TYPE_SPAPR_MACHINE, - .class_init = spapr_machine_2_2_class_init, - .instance_init = spapr_machine_2_2_instance_init, -}; +/* + * pseries-2.2 + */ -static void spapr_machine_2_3_class_init(ObjectClass *oc, void *data) +#define SPAPR_COMPAT_2_2 \ + SPAPR_COMPAT_2_3 \ + HW_COMPAT_2_2 \ + {\ + .driver = TYPE_SPAPR_PCI_HOST_BRIDGE,\ + .property = "mem_win_size",\ + .value = "0x20000000",\ + }, + +static void spapr_machine_2_2_instance_options(MachineState *machine) { - static GlobalProperty compat_props[] = { - SPAPR_COMPAT_2_3 - { /* end of list */ } - }; - MachineClass *mc = MACHINE_CLASS(oc); + spapr_machine_2_3_instance_options(machine); + machine->suppress_vmdesc = true; +} - mc->name = "pseries-2.3"; - mc->desc = "pSeries Logical Partition (PAPR compliant) v2.3"; - mc->compat_props = compat_props; +static void spapr_machine_2_2_class_options(MachineClass *mc) +{ + spapr_machine_2_3_class_options(mc); + SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_2); } +DEFINE_SPAPR_MACHINE(2_2, "2.2", false); -static const TypeInfo spapr_machine_2_3_info = { - .name = TYPE_SPAPR_MACHINE "2.3", - .parent = TYPE_SPAPR_MACHINE, - .class_init = spapr_machine_2_3_class_init, - .instance_init = spapr_machine_2_3_instance_init, -}; +/* + * pseries-2.1 + */ +#define SPAPR_COMPAT_2_1 \ + SPAPR_COMPAT_2_2 \ + HW_COMPAT_2_1 -static void spapr_machine_2_4_class_init(ObjectClass *oc, void *data) +static void spapr_machine_2_1_instance_options(MachineState *machine) { - MachineClass *mc = MACHINE_CLASS(oc); - - mc->name = "pseries-2.4"; - mc->desc = "pSeries Logical Partition (PAPR compliant) v2.4"; - mc->alias = "pseries"; - mc->is_default = 1; + spapr_machine_2_2_instance_options(machine); } -static const TypeInfo spapr_machine_2_4_info = { - .name = TYPE_SPAPR_MACHINE "2.4", - .parent = TYPE_SPAPR_MACHINE, - .class_init = spapr_machine_2_4_class_init, -}; +static void spapr_machine_2_1_class_options(MachineClass *mc) +{ + spapr_machine_2_2_class_options(mc); + SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_1); +} +DEFINE_SPAPR_MACHINE(2_1, "2.1", false); static void spapr_machine_register_types(void) { type_register_static(&spapr_machine_info); - type_register_static(&spapr_machine_2_1_info); - type_register_static(&spapr_machine_2_2_info); - type_register_static(&spapr_machine_2_3_info); - type_register_static(&spapr_machine_2_4_info); } type_init(spapr_machine_register_types) |