From 613e7a764501a236cdfce39561f9bcf60c78cf49 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Thu, 30 Jul 2015 20:25:15 +0530 Subject: spapr: Provide an error message when migration fails due to htab_shift mismatch Include an error message when migration fails due to mismatch in htab_shift values at source and target. This should provide a bit more verbose message in addition to the current migration failure message that reads like: qemu-system-ppc64: error while loading state for instance 0x0 of device 'spapr/htab' After this patch, the failure message will look like this: qemu-system-ppc64: htab_shift mismatch: source 29 target 24 qemu-system-ppc64: error while loading state for instance 0x0 of device 'spapr/htab' Signed-off-by: Bharata B Rao Signed-off-by: David Gibson --- hw/ppc/spapr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 05926a3eb2..dd58eb433a 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1329,6 +1329,8 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id) if (section_hdr) { /* First section, just the hash shift */ if (spapr->htab_shift != section_hdr) { + error_report("htab_shift mismatch: source %d target %d", + section_hdr, spapr->htab_shift); return -EINVAL; } return 0; -- cgit v1.2.3 From fb0fc8f62c16b5b0910545f56c64aaafc91533ce Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 12 Aug 2015 13:15:56 +1000 Subject: spapr: Create pseries-2.5 machine Add pseries-2.5 machine version. Signed-off-by: Bharata B Rao [Altered to merge before memory hotplug -- dwg] [Altered to work with b9f072d01 -- dwg] Signed-off-by: David Gibson --- hw/ppc/spapr.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index dd58eb433a..df76cb7a39 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1976,7 +1976,7 @@ static void spapr_machine_2_4_class_init(ObjectClass *oc, void *data) mc->desc = "pSeries Logical Partition (PAPR compliant) v2.4"; mc->alias = "pseries"; - mc->is_default = 1; + mc->is_default = 0; } static const TypeInfo spapr_machine_2_4_info = { @@ -1985,6 +1985,22 @@ static const TypeInfo spapr_machine_2_4_info = { .class_init = spapr_machine_2_4_class_init, }; +static void spapr_machine_2_5_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + + mc->name = "pseries-2.5"; + mc->desc = "pSeries Logical Partition (PAPR compliant) v2.5"; + mc->alias = "pseries"; + mc->is_default = 1; +} + +static const TypeInfo spapr_machine_2_5_info = { + .name = MACHINE_TYPE_NAME("pseries-2.5"), + .parent = TYPE_SPAPR_MACHINE, + .class_init = spapr_machine_2_5_class_init, +}; + static void spapr_machine_register_types(void) { type_register_static(&spapr_machine_info); @@ -1992,6 +2008,7 @@ static void spapr_machine_register_types(void) type_register_static(&spapr_machine_2_2_info); type_register_static(&spapr_machine_2_3_info); type_register_static(&spapr_machine_2_4_info); + type_register_static(&spapr_machine_2_5_info); } type_init(spapr_machine_register_types) -- cgit v1.2.3 From 2c1aaa819a0d68a51086f5d7e8f9a0114ae0305c Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Tue, 1 Sep 2015 11:23:19 +1000 Subject: spapr: Add /ibm,partition-name QEMU has a notion of the guest name, so if it's present we might as well put that into the device tree as /ibm,partition-name. This is specificed by PAPR. Signed-off-by: Sam Bobroff Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index df76cb7a39..09962c4f8e 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -375,6 +375,11 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, _FDT((fdt_property_string(fdt, "vm,uuid", buf))); g_free(buf); + if (qemu_get_vm_name()) { + _FDT((fdt_property_string(fdt, "ibm,partition-name", + qemu_get_vm_name()))); + } + _FDT((fdt_property_cell(fdt, "#address-cells", 0x2))); _FDT((fdt_property_cell(fdt, "#size-cells", 0x2))); -- cgit v1.2.3 From a95f99224c08efcf91b4259c34754f69d962bf23 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Tue, 1 Sep 2015 11:23:34 +1000 Subject: spapr: Add /rtas/ibm,change-msix-capable QEMU is MSI-X capable and makes it available via ibm,change-msi, so we should indicate this by adding /rtas/ibm,change-msix-capable to the device tree. This is specificed by PAPR. Signed-off-by: Sam Bobroff Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 09962c4f8e..ed0abd8498 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -432,6 +432,10 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, _FDT((fdt_property_cell(fdt, "rtas-event-scan-rate", RTAS_EVENT_SCAN_RATE))); + if (msi_supported) { + _FDT((fdt_property(fdt, "ibm,change-msix-capable", NULL, 0))); + } + /* * According to PAPR, rtas ibm,os-term does not guarantee a return * back to the guest cpu. -- cgit v1.2.3 From b359bd6a424b8de7db994d7120e87a7465b69337 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Tue, 1 Sep 2015 11:23:47 +1000 Subject: spapr: Make ibm, change-msi respect 3 return values Currently, rtas_ibm_change_msi() always returns four values even if less are specified. Correct this by only returning the fourth parameter if it was requested. This is specified by PAPR. Signed-off-by: Sam Bobroff Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr_pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index a2feb4c985..6782fd0b4d 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -375,7 +375,9 @@ out: rtas_st(rets, 0, RTAS_OUT_SUCCESS); rtas_st(rets, 1, req_num); rtas_st(rets, 2, ++seq_num); - rtas_st(rets, 3, ret_intr_type); + if (nret > 3) { + rtas_st(rets, 3, ret_intr_type); + } trace_spapr_pci_rtas_ibm_change_msi(config_addr, func, req_num, irq); } -- cgit v1.2.3 From e39432282e2d2db42645c2ce183dfcaa1488752b Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Tue, 1 Sep 2015 11:24:37 +1000 Subject: spapr: SPLPAR Characteristics Improve the SPLPAR Characteristics information: Add MaxPlatProcs: set to max_cpus, the maximum CPUs that could be addded to the system. Add DesMem: set to the initial memory of the system. Add DesProcs: set to smp_cpus, the inital number of CPUs in the system. These tokens and values are specified by PAPR. Signed-off-by: Sam Bobroff Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr_rtas.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 9869bc9587..5cbf9a071a 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -34,6 +34,7 @@ #include "hw/ppc/spapr.h" #include "hw/ppc/spapr_vio.h" #include "qapi-event.h" +#include "hw/boards.h" #include #include "hw/ppc/spapr_drc.h" @@ -240,8 +241,14 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, switch (parameter) { case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS: { - char *param_val = g_strdup_printf("MaxEntCap=%d,MaxPlatProcs=%d", - max_cpus, smp_cpus); + char *param_val = g_strdup_printf("MaxEntCap=%d," + "DesMem=%llu," + "DesProcs=%d," + "MaxPlatProcs=%d", + max_cpus, + current_machine->ram_size / M_BYTE, + smp_cpus, + max_cpus); rtas_st_buffer(buffer, length, (uint8_t *)param_val, strlen(param_val)); g_free(param_val); break; -- cgit v1.2.3 From ad440b4ae0727dbef5cace419d94d774de96886c Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 1 Sep 2015 11:25:35 +1000 Subject: spapr: add dumpdtb support dumpdtb (-machine dumpdtb=) allows one to inspect the generated device tree of machine types that generate device trees. This is useful for a) seeing what's there b) debugging/testing device tree generator patches. It can be used as follows $QEMU_CMDLINE -machine dumpdtb=dtb dtc -I dtb -O dts dtb Signed-off-by: Andrew Jones Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index ed0abd8498..a69d7e4754 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -30,6 +30,7 @@ #include "hw/fw-path-provider.h" #include "elf.h" #include "net/net.h" +#include "sysemu/device_tree.h" #include "sysemu/block-backend.h" #include "sysemu/cpus.h" #include "sysemu/kvm.h" @@ -831,6 +832,7 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr, exit(1); } + qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt)); cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt)); g_free(bootlist); -- cgit v1.2.3 From 627c2ef7898794a28d706ecdf094491bebbb083a Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 3 Sep 2015 10:08:23 +1000 Subject: spapr_drc: Fix potential undefined behaviour The DRC_INDEX_ID_MASK macro does a left shift on ~0, which is a signed quantity, and therefore undefined behaviour according to the C spec. In particular this causes warnings from the clang sanitizer. This fixes it by calculating the same mask without using ~0 (I think the new method is a more common idiom for generating masks anyway). For good measure I also use 1ULL to force the expression's type to unsigned long long, which should be good for assigning to anything we're going to want to. Reported-by: Peter Maydell Signed-off-by: David Gibson Reviewed-by: Alexey Kardashevskiy --- hw/ppc/spapr_drc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index ee874326ee..8cbcf4d346 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -32,7 +32,7 @@ #define DRC_CONTAINER_PATH "/dr-connector" #define DRC_INDEX_TYPE_SHIFT 28 -#define DRC_INDEX_ID_MASK (~(~0 << DRC_INDEX_TYPE_SHIFT)) +#define DRC_INDEX_ID_MASK ((1ULL << DRC_INDEX_TYPE_SHIFT) - 1) static sPAPRDRConnectorTypeShift get_type_shift(sPAPRDRConnectorType type) { -- cgit v1.2.3 From aaf87c6616370685a7cff6a21616fc5db7495014 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 1 Sep 2015 11:29:02 +1000 Subject: ppc/spapr: Use qemu_log_mask() for hcall_dprintf() To see the output of the hcall_dprintf statements, you currently have to enable the DEBUG_SPAPR_HCALLS macro in include/hw/ppc/spapr.h. This is ugly because a) not every user who wants to debug guest problems can or wants to recompile QEMU to be able to see such issues, and b) since this macro is disabled by default, the code in the hcall_dprintf() brackets tends to bitrot until somebody temporarily enables that macro again. Since the hcall_dprintf statements except one indicate guest problems, let's always use qemu_log_mask(LOG_GUEST_ERROR, ...) for this macro instead. One spot indicated an unimplemented host feature, so this is changed into qemu_log_mask(LOG_UNIMP, ...) instead. Now it's possible to see all those messages by simply adding the CLI parameter "-d guest_errors,unimp", without the need to re-compile the binary. Signed-off-by: Thomas Huth Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr_hcall.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 652ddf6e37..71fc9f23a1 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -971,7 +971,8 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, } } - hcall_dprintf("Unimplemented hcall 0x" TARGET_FMT_lx "\n", opcode); + qemu_log_mask(LOG_UNIMP, "Unimplemented SPAPR hcall 0x" TARGET_FMT_lx "\n", + opcode); return H_FUNCTION; } -- cgit v1.2.3 From e6fc9568c865f2f81499475a4e322cd563fdfd90 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Tue, 1 Sep 2015 09:53:52 +1000 Subject: spapr_rtas: Prevent QEMU crash during hotplug without a prior device_add If drmgr is used in the guest to hotplug a device before a device_add has been issued via the QEMU monitor, QEMU segfaults in configure_connector call. This occurs due to accessing of NULL FDT which otherwise would have been created and associated with the DRC during device_add command. Check for NULL FDT and return failure from configure_connector call. As per PAPR+, an error value of -9003 seems appropriate for this failure. Signed-off-by: Bharata B Rao Cc: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr_rtas.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'hw') diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 5cbf9a071a..2f8e25cfb2 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -522,6 +522,12 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu, drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); fdt = drck->get_fdt(drc, NULL); + if (!fdt) { + DPRINTF("rtas_ibm_configure_connector: Missing FDT for DRC index: %xh\n", + drc_index); + rc = SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE; + goto out; + } ccs = spapr_ccs_find(spapr, drc_index); if (!ccs) { -- cgit v1.2.3 From a14aa92b20c5482b9b694901304b8100b3c4b5a1 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 1 Sep 2015 11:05:12 +1000 Subject: sPAPR: Introduce rtas_ldq() This introduces rtas_ldq() to load 64-bits parameter from continuous two 4-bytes memory chunk of RTAS parameter buffer, to simplify the code. Signed-off-by: Gavin Shan Reviewed-by: Thomas Huth Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr_pci.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 6782fd0b4d..54292c9d81 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -140,7 +140,7 @@ static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, return; } - buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + buid = rtas_ldq(args, 1); size = rtas_ld(args, 3); addr = rtas_ld(args, 0); @@ -206,7 +206,7 @@ static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, return; } - buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + buid = rtas_ldq(args, 1); val = rtas_ld(args, 4); size = rtas_ld(args, 3); addr = rtas_ld(args, 0); @@ -269,7 +269,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong rets) { uint32_t config_addr = rtas_ld(args, 0); - uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + uint64_t buid = rtas_ldq(args, 1); unsigned int func = rtas_ld(args, 3); unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */ unsigned int seq_num = rtas_ld(args, 5); @@ -391,7 +391,7 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, target_ulong rets) { uint32_t config_addr = rtas_ld(args, 0); - uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + uint64_t buid = rtas_ldq(args, 1); unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3); sPAPRPHBState *phb = NULL; PCIDevice *pdev = NULL; @@ -440,7 +440,7 @@ static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu, goto param_error_exit; } - buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + buid = rtas_ldq(args, 1); addr = rtas_ld(args, 0); option = rtas_ld(args, 3); @@ -484,7 +484,7 @@ static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu, goto param_error_exit; } - buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + buid = rtas_ldq(args, 1); sphb = spapr_pci_find_phb(spapr, buid); if (!sphb) { goto param_error_exit; @@ -539,7 +539,7 @@ static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu, goto param_error_exit; } - buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + buid = rtas_ldq(args, 1); sphb = spapr_pci_find_phb(spapr, buid); if (!sphb) { goto param_error_exit; @@ -584,7 +584,7 @@ static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu, goto param_error_exit; } - buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + buid = rtas_ldq(args, 1); option = rtas_ld(args, 3); sphb = spapr_pci_find_phb(spapr, buid); if (!sphb) { @@ -619,7 +619,7 @@ static void rtas_ibm_configure_pe(PowerPCCPU *cpu, goto param_error_exit; } - buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + buid = rtas_ldq(args, 1); sphb = spapr_pci_find_phb(spapr, buid); if (!sphb) { goto param_error_exit; @@ -654,7 +654,7 @@ static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu, goto param_error_exit; } - buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + buid = rtas_ldq(args, 1); sphb = spapr_pci_find_phb(spapr, buid); if (!sphb) { goto param_error_exit; -- cgit v1.2.3 From 785652dc4db2023aeda4e381eb08e0beae67b870 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 13 Aug 2015 14:53:02 +0200 Subject: pseries: define coldplugged devices as "configured" When a device is hotplugged, attach() sets "configured" to false, waiting an action from the OS to configure it and then to call ibm,configure-connector. On ibm,configure-connector, the hypervisor sets "configured" to true. In case of coldplugged device, attach() sets "configured" to false, but firmware and OS never call the ibm,configure-connector in this case, so it remains set to false. It could be harmless, but when we unplug a device, hypervisor waits the device becomes configured because for it, a not configured device is a device being configured, so it waits the end of configuration to unplug it... and it never happens, so it is never unplugged. This patch set by default coldplugged device to "configured=true", hotplugged device to "configured=false". Signed-off-by: Laurent Vivier Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr_drc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 8cbcf4d346..9ce844ab1e 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -310,7 +310,7 @@ static void attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, drc->dev = d; drc->fdt = fdt; drc->fdt_start_offset = fdt_start_offset; - drc->configured = false; + drc->configured = coldplug; object_property_add_link(OBJECT(drc), "device", object_get_typename(OBJECT(drc->dev)), -- cgit v1.2.3 From 22419c2a90b859dcab49f9472259ad8a3ce091d6 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 8 Sep 2015 11:21:31 +1000 Subject: pseries: Fix incorrect calculation of threads per socket for chip-id The device tree presented to pseries machine type guests includes an ibm,chip-id property which gives essentially the socket number of each vcpu core (individual vcpu threads don't get a node in the device tree). To calculate this, it uses a vcpus_per_socket variable computed as (smp_cpus / #sockets). This is correct for the usual case where smp_cpus == smp_threads * smp_cores * #sockets. However, you can start QEMU with the number of cores and threads mismatching the total number of vcpus (whether that _should_ be permitted is a topic for another day). It's a bit hard to say what the "real" number of vcpus per socket here is, but for most purposes (smp_threads * smp_cores) will more meaningfully match how QEMU behaves with respect to socket boundaries. Signed-off-by: David Gibson Reviewed-by: Alexey Kardashevskiy --- hw/ppc/spapr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index a69d7e4754..ac0d2fe892 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -630,9 +630,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; uint32_t page_sizes_prop[64]; size_t page_sizes_prop_size; - QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL); - unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0; - uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; + uint32_t vcpus_per_socket = smp_threads * smp_cores; uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; _FDT((fdt_setprop_cell(fdt, offset, "reg", index))); @@ -701,7 +699,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, } _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", - cs->cpu_index / cpus_per_socket))); + cs->cpu_index / vcpus_per_socket))); _FDT((fdt_setprop(fdt, offset, "ibm,pft-size", pft_size_prop, sizeof(pft_size_prop)))); -- cgit v1.2.3 From ef9971dd69bdd84b0987b0e1e4f421223b080afd Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 8 Sep 2015 11:25:13 +1000 Subject: spapr: Enable in-kernel H_SET_MODE handling For setting debug watchpoints, sPAPR guests use H_SET_MODE hypercall. The existing QEMU H_SET_MODE handler does not support this but the KVM handler in HV KVM does. However it is not enabled. This enables the in-kernel H_SET_MODE handler which handles: - Completed Instruction Address Breakpoint Register - Watch point 0 registers. The rest is still handled in QEMU. Reported-by: Anton Blanchard Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index ac0d2fe892..d49f322fcc 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1546,6 +1546,7 @@ static void ppc_spapr_init(MachineState *machine) if (kvm_enabled()) { /* Enable H_LOGICAL_CI_* so SLOF can talk to in-kernel devices */ kvmppc_enable_logical_ci_hcalls(); + kvmppc_enable_set_mode_hcall(); } /* allocate RAM */ -- cgit v1.2.3 From a8ad731a001d41582c9cec4015f73ab3bc11a28d Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 15 Sep 2015 16:34:59 -0500 Subject: spapr_pci: fix device tree props for MSI/MSI-X PAPR requires ibm,req#msi and ibm,req#msi-x to be present in the device node to define the number of msi/msi-x interrupts the device supports, respectively. Currently we have ibm,req#msi-x hardcoded to a non-sensical constant that happens to be 2, and are missing ibm,req#msi entirely. The result of that is that msi-x capable devices get limited to 2 msi-x interrupts (which can impact performance), and msi-only devices likely wouldn't work at all. Additionally, if devices expect a minimum that exceeds 2, the guest driver may fail to load entirely. SLOF still owns the generation of these properties at boot-time (although other device properties have since been offloaded to QEMU), but for hotplugged devices we rely on the values generated by QEMU and thus hit the limitations above. Fix this by generating these properties in QEMU as expected by guests. In the future it may make sense to modify SLOF to pass through these values directly as we do with other props since we're duplicating SLOF code. Cc: qemu-ppc@nongnu.org Cc: qemu-stable@nongnu.org Cc: David Gibson Cc: Nikunj A Dadhania Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr_pci.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 54292c9d81..d2188c854d 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -957,6 +957,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, int pci_status, err; char *buf = NULL; uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev); + uint32_t max_msi, max_msix; if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) == PCI_HEADER_TYPE_BRIDGE) { @@ -1037,8 +1038,15 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, RESOURCE_CELLS_ADDRESS)); _FDT(fdt_setprop_cell(fdt, offset, "#size-cells", RESOURCE_CELLS_SIZE)); - _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x", - RESOURCE_CELLS_SIZE)); + + max_msi = msi_nr_vectors_allocated(dev); + if (max_msi) { + _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi", max_msi)); + } + max_msix = dev->msix_entries_nr; + if (max_msix) { + _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x", max_msix)); + } populate_resource_props(dev, &rp); _FDT(fdt_setprop(fdt, offset, "reg", (uint8_t *)rp.reg, rp.reg_len)); -- cgit v1.2.3 From 9d1852ce11c888e3ad5096be505d14045d8b49ae Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 10 Sep 2015 16:11:03 -0500 Subject: spapr_drc: don't allow 'empty' DRCs to be unisolated or allocated Logical resources start with allocation-state:UNUSABLE / isolation-state:ISOLATED. During hotplug, guests will transition them to allocation-state:USABLE, and then to isolation-state:UNISOLATED. For cases where we cannot transition to allocation-state:USABLE, in this case due to no device/resource being association with the logical DRC, we should return an error -3. For physical DRCs, we default to allocation-state:USABLE and stay there, so in this case we should report an error -3 when the guest attempts to make the isolation-state:ISOLATED transition for a DRC with no device associated. These are as documented in PAPR 2.7, 13.5.3.4. We also ensure allocation-state:USABLE when the guest attempts transition to isolation-state:UNISOLATED to deal with misbehaving guests attempting to bring online an unallocated logical resource. This is as documented in PAPR 2.7, 13.7. Currently we implement no such error logic. Fix this by handling these error cases as PAPR defines. Cc: Bharata B Rao Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr_drc.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'hw') diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 9ce844ab1e..faf8760d2e 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -66,6 +66,16 @@ static int set_isolation_state(sPAPRDRConnector *drc, DPRINTFN("drc: %x, set_isolation_state: %x", get_index(drc), state); + if (state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) { + /* cannot unisolate a non-existant resource, and, or resources + * which are in an 'UNUSABLE' allocation state. (PAPR 2.7, 13.5.3.5) + */ + if (!drc->dev || + drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { + return RTAS_OUT_NO_SUCH_INDICATOR; + } + } + drc->isolation_state = state; if (drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) { @@ -107,6 +117,17 @@ static int set_allocation_state(sPAPRDRConnector *drc, DPRINTFN("drc: %x, set_allocation_state: %x", get_index(drc), state); + if (state == SPAPR_DR_ALLOCATION_STATE_USABLE) { + /* if there's no resource/device associated with the DRC, there's + * no way for us to put it in an allocation state consistent with + * being 'USABLE'. PAPR 2.7, 13.5.3.4 documents that this should + * result in an RTAS return code of -3 / "no such indicator" + */ + if (!drc->dev) { + return RTAS_OUT_NO_SUCH_INDICATOR; + } + } + if (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI) { drc->allocation_state = state; if (drc->awaiting_release && -- cgit v1.2.3 From 4a1c9cf0073e733b421e7b82ad673e7cf6ed8454 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 29 Jun 2015 14:14:27 +0530 Subject: spapr: Initialize hotplug memory address space Initialize a hotplug memory region under which all the hotplugged memory is accommodated. Also enable memory hotplug by setting CONFIG_MEM_HOTPLUG. Modelled on i386 memory hotplug. Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index d49f322fcc..2fb5e3643c 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1562,6 +1562,24 @@ static void ppc_spapr_init(MachineState *machine) memory_region_add_subregion(sysmem, 0, rma_region); } + /* initialize hotplug memory address space */ + if (machine->ram_size < machine->maxram_size) { + ram_addr_t hotplug_mem_size = machine->maxram_size - machine->ram_size; + + if (machine->ram_slots > SPAPR_MAX_RAM_SLOTS) { + error_report("unsupported amount of memory slots: %"PRIu64, + machine->ram_slots); + exit(EXIT_FAILURE); + } + + spapr->hotplug_memory.base = ROUND_UP(machine->ram_size, + SPAPR_HOTPLUG_MEM_ALIGN); + memory_region_init(&spapr->hotplug_memory.mr, OBJECT(spapr), + "hotplug-memory", hotplug_mem_size); + memory_region_add_subregion(sysmem, spapr->hotplug_memory.base, + &spapr->hotplug_memory.mr); + } + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin"); if (!filename) { error_report("Could not find LPAR rtas '%s'", "spapr-rtas.bin"); -- cgit v1.2.3 From 0cb688d22b3941af02fee78ba21dc3a39c367e0b Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 10 Sep 2015 16:11:02 -0500 Subject: spapr_drc: use RTAS return codes for methods called by RTAS Certain methods in sPAPRDRConnector objects are only ever called by RTAS and in many cases are responsible for the logic that determines the RTAS return codes. Rather than having a level of indirection requiring RTAS code to re-interpret return values from such methods to determine the appropriate return code, just pass them through directly. This requires changing method return types to uint32_t to match the type of values currently passed to RTAS helpers. In the case of read accesses like drc->entity_sense() where we weren't previously reporting any errors, just the read value, we modify the function to return RTAS return code, and pass the read value back via reference. Suggested-by: Bharata B Rao Suggested-by: David Gibson Cc: Bharata B Rao Signed-off-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr_drc.c | 37 +++++++++++++++++++------------------ hw/ppc/spapr_rtas.c | 40 ++++++++++++++++++++++------------------ 2 files changed, 41 insertions(+), 36 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index faf8760d2e..b7b9891714 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -15,6 +15,7 @@ #include "hw/qdev.h" #include "qapi/visitor.h" #include "qemu/error-report.h" +#include "hw/ppc/spapr.h" /* for RTAS return codes */ /* #define DEBUG_SPAPR_DRC */ @@ -59,8 +60,8 @@ static uint32_t get_index(sPAPRDRConnector *drc) (drc->id & DRC_INDEX_ID_MASK); } -static int set_isolation_state(sPAPRDRConnector *drc, - sPAPRDRIsolationState state) +static uint32_t set_isolation_state(sPAPRDRConnector *drc, + sPAPRDRIsolationState state) { sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); @@ -99,19 +100,19 @@ static int set_isolation_state(sPAPRDRConnector *drc, drc->configured = false; } - return 0; + return RTAS_OUT_SUCCESS; } -static int set_indicator_state(sPAPRDRConnector *drc, - sPAPRDRIndicatorState state) +static uint32_t set_indicator_state(sPAPRDRConnector *drc, + sPAPRDRIndicatorState state) { DPRINTFN("drc: %x, set_indicator_state: %x", get_index(drc), state); drc->indicator_state = state; - return 0; + return RTAS_OUT_SUCCESS; } -static int set_allocation_state(sPAPRDRConnector *drc, - sPAPRDRAllocationState state) +static uint32_t set_allocation_state(sPAPRDRConnector *drc, + sPAPRDRAllocationState state) { sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); @@ -137,7 +138,7 @@ static int set_allocation_state(sPAPRDRConnector *drc, drc->detach_cb_opaque, NULL); } } - return 0; + return RTAS_OUT_SUCCESS; } static uint32_t get_type(sPAPRDRConnector *drc) @@ -178,10 +179,8 @@ static void set_configured(sPAPRDRConnector *drc) * based on the current allocation/indicator/power states * for the DR connector. */ -static sPAPRDREntitySense entity_sense(sPAPRDRConnector *drc) +static uint32_t entity_sense(sPAPRDRConnector *drc, sPAPRDREntitySense *state) { - sPAPRDREntitySense state; - if (drc->dev) { if (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI && drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) { @@ -190,7 +189,7 @@ static sPAPRDREntitySense entity_sense(sPAPRDRConnector *drc) * Otherwise, report the state as USABLE/PRESENT, * as we would for PCI. */ - state = SPAPR_DR_ENTITY_SENSE_UNUSABLE; + *state = SPAPR_DR_ENTITY_SENSE_UNUSABLE; } else { /* this assumes all PCI devices are assigned to * a 'live insertion' power domain, where QEMU @@ -198,21 +197,21 @@ static sPAPRDREntitySense entity_sense(sPAPRDRConnector *drc) * to the guest. present, non-PCI resources are * unaffected by power state. */ - state = SPAPR_DR_ENTITY_SENSE_PRESENT; + *state = SPAPR_DR_ENTITY_SENSE_PRESENT; } } else { if (drc->type == SPAPR_DR_CONNECTOR_TYPE_PCI) { /* PCI devices, and only PCI devices, use EMPTY * in cases where we'd otherwise use UNUSABLE */ - state = SPAPR_DR_ENTITY_SENSE_EMPTY; + *state = SPAPR_DR_ENTITY_SENSE_EMPTY; } else { - state = SPAPR_DR_ENTITY_SENSE_UNUSABLE; + *state = SPAPR_DR_ENTITY_SENSE_UNUSABLE; } } DPRINTFN("drc: %x, entity_sense: %x", get_index(drc), state); - return state; + return RTAS_OUT_SUCCESS; } static void prop_get_index(Object *obj, Visitor *v, void *opaque, @@ -245,7 +244,9 @@ static void prop_get_entity_sense(Object *obj, Visitor *v, void *opaque, { sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj); sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - uint32_t value = (uint32_t)drck->entity_sense(drc); + uint32_t value; + + drck->entity_sense(drc, &value); visit_type_uint32(v, &value, name, errp); } diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 2f8e25cfb2..34b12a3b99 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -372,12 +372,13 @@ static void rtas_set_indicator(PowerPCCPU *cpu, sPAPRMachineState *spapr, uint32_t sensor_type; uint32_t sensor_index; uint32_t sensor_state; + uint32_t ret = RTAS_OUT_SUCCESS; sPAPRDRConnector *drc; sPAPRDRConnectorClass *drck; if (nargs != 3 || nret != 1) { - rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); - return; + ret = RTAS_OUT_PARAM_ERROR; + goto out; } sensor_type = rtas_ld(args, 0); @@ -393,8 +394,8 @@ static void rtas_set_indicator(PowerPCCPU *cpu, sPAPRMachineState *spapr, if (!drc) { DPRINTF("rtas_set_indicator: invalid sensor/DRC index: %xh\n", sensor_index); - rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); - return; + ret = RTAS_OUT_PARAM_ERROR; + goto out; } drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); @@ -413,19 +414,20 @@ static void rtas_set_indicator(PowerPCCPU *cpu, sPAPRMachineState *spapr, spapr_ccs_remove(spapr, ccs); } } - drck->set_isolation_state(drc, sensor_state); + ret = drck->set_isolation_state(drc, sensor_state); break; case RTAS_SENSOR_TYPE_DR: - drck->set_indicator_state(drc, sensor_state); + ret = drck->set_indicator_state(drc, sensor_state); break; case RTAS_SENSOR_TYPE_ALLOCATION_STATE: - drck->set_allocation_state(drc, sensor_state); + ret = drck->set_allocation_state(drc, sensor_state); break; default: goto out_unimplemented; } - rtas_st(rets, 0, RTAS_OUT_SUCCESS); +out: + rtas_st(rets, 0, ret); return; out_unimplemented: @@ -442,13 +444,14 @@ static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPRMachineState *spapr, { uint32_t sensor_type; uint32_t sensor_index; + uint32_t sensor_state = 0; sPAPRDRConnector *drc; sPAPRDRConnectorClass *drck; - uint32_t entity_sense; + uint32_t ret = RTAS_OUT_SUCCESS; if (nargs != 2 || nret != 2) { - rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); - return; + ret = RTAS_OUT_PARAM_ERROR; + goto out; } sensor_type = rtas_ld(args, 0); @@ -458,22 +461,23 @@ static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPRMachineState *spapr, /* currently only DR-related sensors are implemented */ DPRINTF("rtas_get_sensor_state: sensor/indicator not implemented: %d\n", sensor_type); - rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); - return; + ret = RTAS_OUT_NOT_SUPPORTED; + goto out; } drc = spapr_dr_connector_by_index(sensor_index); if (!drc) { DPRINTF("rtas_get_sensor_state: invalid sensor/DRC index: %xh\n", sensor_index); - rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); - return; + ret = RTAS_OUT_PARAM_ERROR; + goto out; } drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - entity_sense = drck->entity_sense(drc); + ret = drck->entity_sense(drc, &sensor_state); - rtas_st(rets, 0, RTAS_OUT_SUCCESS); - rtas_st(rets, 1, entity_sense); +out: + rtas_st(rets, 0, ret); + rtas_st(rets, 1, sensor_state); } /* configure-connector work area offsets, int32_t units for field -- cgit v1.2.3 From 94649d423e4647fca3bc3e8b2b363d6d2adee9ce Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 16 Sep 2015 16:57:51 +1000 Subject: spapr: Don't use QOM [*] syntax for DR connectors. The dynamic reconfiguration (hotplug) code for the pseries machine type uses a "DR connector" QOM object for each resource it will be possible to hotplug. Each of these is added to its owner using object_property_add_child(owner, "dr-connector[*], ...); That works ok, mostly, but it means that the property indices are arbitrary, depending on the order in which the connectors are constructed. That might line up to something useful, but it doesn't have to. It will get worse once we add hotplug RAM support. That will add a DR connector object for every 256MB of potential memory. So if maxmem=2T, for example, there are 8192 objects under the same parent. The QOM interfaces aren't really designed for this. In particular object_property_add() with [*] has O(n^2) time complexity (in the number of existing children): first it has a linear search through array indices to find a free slot, each of which is attempted to a recursive call to object_property_add() with a specific [N]. Those calls are O(n) because there's a linear search through all properties to check for duplicates. By using a meaningful index value, which we already know is unique we can avoid the [*] special behaviour. That lets us reduce the total time for creating the DR objects from O(n^3) to O(n^2). O(n^2) is still kind of crappy, but it's enough to reduce the startup time of qemu (with in-progress memory hotplug support) with maxmem=2T from ~20 minutes to ~4 seconds. Signed-off-by: David Gibson Cc: Bharata B Rao Tested-by: Bharata B Rao Reviewed-by: Alexey Kardashevskiy --- hw/ppc/spapr_drc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index b7b9891714..5d6ea7ce41 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -473,14 +473,17 @@ sPAPRDRConnector *spapr_dr_connector_new(Object *owner, { sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(object_new(TYPE_SPAPR_DR_CONNECTOR)); + char *prop_name; g_assert(type); drc->type = type; drc->id = id; drc->owner = owner; - object_property_add_child(owner, "dr-connector[*]", OBJECT(drc), NULL); + prop_name = g_strdup_printf("dr-connector[%"PRIu32"]", get_index(drc)); + object_property_add_child(owner, prop_name, OBJECT(drc), NULL); object_property_set_bool(OBJECT(drc), true, "realized", NULL); + g_free(prop_name); /* human-readable name for a DRC to encode into the DT * description. this is mainly only used within a guest in place -- cgit v1.2.3 From 38b02bd846672f33bc2eabcb9847c4b78069e097 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 6 Aug 2015 13:37:24 +1000 Subject: spapr: Use QEMU limit for maximum CPUs number sPAPR uses hard coded limit of maximum 255 supported CPUs which is exactly the same as QEMU-wide limit which is MAX_CPUMASK_BITS and also defined as 255. This makes use of a global CPU number limit for the "pseries" machine. In order to anticipate future increase of the MAX_CPUMASK_BITS (or to help debugging large systems), this also bumps the FDT_MAX_SIZE limit from 256K to 1M assuming that 1 CPU core needs roughly 512 bytes in the device tree so the new limit can cover up to 2048 CPU cores. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 2fb5e3643c..33e22cea65 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -74,7 +74,7 @@ * * We load our kernel at 4M, leaving space for SLOF initial image */ -#define FDT_MAX_SIZE 0x40000 +#define FDT_MAX_SIZE 0x100000 #define RTAS_MAX_SIZE 0x10000 #define RTAS_MAX_ADDR 0x80000000 /* RTAS must stay below that */ #define FW_MAX_SIZE 0x400000 @@ -86,8 +86,6 @@ #define TIMEBASE_FREQ 512000000ULL -#define MAX_CPUS 255 - #define PHANDLE_XICP 0x00001111 #define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) @@ -1859,7 +1857,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) mc->init = ppc_spapr_init; mc->reset = ppc_spapr_reset; mc->block_default_type = IF_SCSI; - mc->max_cpus = MAX_CPUS; + mc->max_cpus = MAX_CPUMASK_BITS; mc->no_parallel = 1; mc->default_boot_order = ""; mc->default_ram_size = 512 * M_BYTE; -- cgit v1.2.3 From 224245bf524189789d231f38434c9f8fd57a249c Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 12 Aug 2015 13:16:48 +1000 Subject: spapr: Add LMB DR connectors Enable memory hotplug for pseries 2.4 and add LMB DR connectors. With memory hotplug, enforce RAM size, NUMA node memory size and maxmem to be a multiple of SPAPR_MEMORY_BLOCK_SIZE (256M) since that's the granularity in which LMBs are represented and hot-added. LMB DR connectors will be used by the memory hotplug code. Signed-off-by: Bharata B Rao Signed-off-by: Michael Roth [spapr_drc_reset implementation] [since this missed the 2.4 cutoff, changing to only enable for 2.5] Signed-off-by: David Gibson --- hw/ppc/spapr.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 33e22cea65..940a82fc71 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -61,6 +61,7 @@ #include "hw/nmi.h" #include "hw/compat.h" +#include "qemu-common.h" #include @@ -1446,10 +1447,84 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu) qemu_register_reset(spapr_cpu_reset, cpu); } +/* + * Reset routine for LMB DR devices. + * + * Unlike PCI DR devices, LMB DR devices explicitly register this reset + * routine. Reset for PCI DR devices will be handled by PHB reset routine + * when it walks all its children devices. LMB devices reset occurs + * as part of spapr_ppc_reset(). + */ +static void spapr_drc_reset(void *opaque) +{ + sPAPRDRConnector *drc = opaque; + DeviceState *d = DEVICE(drc); + + if (d) { + device_reset(d); + } +} + +static void spapr_create_lmb_dr_connectors(sPAPRMachineState *spapr) +{ + MachineState *machine = MACHINE(spapr); + uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; + uint32_t nr_rma_lmbs = spapr->rma_size/lmb_size; + uint32_t nr_lmbs = machine->maxram_size/lmb_size - nr_rma_lmbs; + uint32_t nr_assigned_lmbs = machine->ram_size/lmb_size - nr_rma_lmbs; + int i; + + for (i = 0; i < nr_lmbs; i++) { + sPAPRDRConnector *drc; + uint64_t addr; + + if (i < nr_assigned_lmbs) { + addr = (i + nr_rma_lmbs) * lmb_size; + } else { + addr = (i - nr_assigned_lmbs) * lmb_size + + spapr->hotplug_memory.base; + } + drc = spapr_dr_connector_new(OBJECT(spapr), SPAPR_DR_CONNECTOR_TYPE_LMB, + addr/lmb_size); + qemu_register_reset(spapr_drc_reset, drc); + } +} + +/* + * If RAM size, maxmem size and individual node mem sizes aren't aligned + * to SPAPR_MEMORY_BLOCK_SIZE(256MB), then refuse to start the guest + * since we can't support such unaligned sizes with DRCONF_MEMORY. + */ +static void spapr_validate_node_memory(MachineState *machine) +{ + int i; + + if (machine->maxram_size % SPAPR_MEMORY_BLOCK_SIZE || + machine->ram_size % SPAPR_MEMORY_BLOCK_SIZE) { + error_report("Can't support memory configuration where RAM size " + "0x" RAM_ADDR_FMT " or maxmem size " + "0x" RAM_ADDR_FMT " isn't aligned to %llu MB", + machine->ram_size, machine->maxram_size, + SPAPR_MEMORY_BLOCK_SIZE/M_BYTE); + exit(EXIT_FAILURE); + } + + for (i = 0; i < nb_numa_nodes; i++) { + if (numa_info[i].node_mem % SPAPR_MEMORY_BLOCK_SIZE) { + error_report("Can't support memory configuration where memory size" + " %" PRIx64 " of node %d isn't aligned to %llu MB", + numa_info[i].node_mem, i, + SPAPR_MEMORY_BLOCK_SIZE/M_BYTE); + exit(EXIT_FAILURE); + } + } +} + /* pSeries LPAR / sPAPR hardware init */ static void ppc_spapr_init(MachineState *machine) { sPAPRMachineState *spapr = SPAPR_MACHINE(machine); + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; @@ -1528,6 +1603,10 @@ static void ppc_spapr_init(MachineState *machine) smp_threads), XICS_IRQS); + if (smc->dr_lmb_enabled) { + spapr_validate_node_memory(machine); + } + /* init CPUs */ if (machine->cpu_model == NULL) { machine->cpu_model = kvm_enabled() ? "host" : "POWER7"; @@ -1578,6 +1657,10 @@ static void ppc_spapr_init(MachineState *machine) &spapr->hotplug_memory.mr); } + if (smc->dr_lmb_enabled) { + spapr_create_lmb_dr_connectors(spapr); + } + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin"); if (!filename) { error_report("Could not find LPAR rtas '%s'", "spapr-rtas.bin"); @@ -1851,6 +1934,7 @@ static void spapr_nmi(NMIState *n, int cpu_index, Error **errp) static void spapr_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); + sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(oc); FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc); NMIClass *nc = NMI_CLASS(oc); @@ -1865,6 +1949,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) mc->has_dynamic_sysbus = true; mc->pci_allow_0_address = true; + smc->dr_lmb_enabled = false; fwc->get_dev_path = spapr_get_fw_dev_path; nc->nmi_monitor_handler = spapr_nmi; } @@ -2014,11 +2099,13 @@ static const TypeInfo spapr_machine_2_4_info = { static void spapr_machine_2_5_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); + sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(oc); mc->name = "pseries-2.5"; mc->desc = "pSeries Logical Partition (PAPR compliant) v2.5"; mc->alias = "pseries"; mc->is_default = 1; + smc->dr_lmb_enabled = true; } static const TypeInfo spapr_machine_2_5_info = { -- cgit v1.2.3 From 03d196b7c57f22f796197f221f9d95336debee9e Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 13 Jul 2015 10:34:00 +1000 Subject: spapr: Support ibm,dynamic-reconfiguration-memory Parse ibm,architecture.vec table obtained from the guest and enable memory node configuration via ibm,dynamic-reconfiguration-memory if guest supports it. This is in preparation to support memory hotplug for sPAPR guests. This changes the way memory node configuration is done. Currently all memory nodes are built upfront. But after this patch, only memory@0 node for RMA is built upfront. Guest kernel boots with just that and rest of the memory nodes (via memory@XXX or ibm,dynamic-reconfiguration-memory) are built when guest does ibm,client-architecture-support call. Note: This patch needs a SLOF enhancement which is already part of SLOF binary in QEMU. Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 210 ++++++++++++++++++++++++++++++++++++++++----------- hw/ppc/spapr_hcall.c | 51 +++++++++++-- 2 files changed, 212 insertions(+), 49 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 940a82fc71..2f49f97924 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -504,44 +504,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, return fdt; } -int spapr_h_cas_compose_response(sPAPRMachineState *spapr, - target_ulong addr, target_ulong size) -{ - void *fdt, *fdt_skel; - sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 }; - - size -= sizeof(hdr); - - /* Create sceleton */ - fdt_skel = g_malloc0(size); - _FDT((fdt_create(fdt_skel, size))); - _FDT((fdt_begin_node(fdt_skel, ""))); - _FDT((fdt_end_node(fdt_skel))); - _FDT((fdt_finish(fdt_skel))); - fdt = g_malloc0(size); - _FDT((fdt_open_into(fdt_skel, fdt, size))); - g_free(fdt_skel); - - /* Fix skeleton up */ - _FDT((spapr_fixup_cpu_dt(fdt, spapr))); - - /* Pack resulting tree */ - _FDT((fdt_pack(fdt))); - - if (fdt_totalsize(fdt) + sizeof(hdr) > size) { - trace_spapr_cas_failed(size); - return -1; - } - - cpu_physical_memory_write(addr, &hdr, sizeof(hdr)); - cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt)); - trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr)); - g_free(fdt); - - return 0; -} - -static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start, +static int spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start, hwaddr size) { uint32_t associativity[] = { @@ -564,6 +527,7 @@ static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start, sizeof(mem_reg_property)))); _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity, sizeof(associativity)))); + return off; } static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt) @@ -595,7 +559,6 @@ static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt) } if (!mem_start) { /* ppc_spapr_init() checks for rma_size <= node0_size already */ - spapr_populate_memory_node(fdt, i, 0, spapr->rma_size); mem_start += spapr->rma_size; node_size -= spapr->rma_size; } @@ -745,6 +708,154 @@ static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr) } +/* + * Adds ibm,dynamic-reconfiguration-memory node. + * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation + * of this device tree node. + */ +static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) +{ + MachineState *machine = MACHINE(spapr); + int ret, i, offset; + uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; + uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)}; + uint32_t nr_rma_lmbs = spapr->rma_size/lmb_size; + uint32_t nr_lmbs = machine->maxram_size/lmb_size - nr_rma_lmbs; + uint32_t nr_assigned_lmbs = machine->ram_size/lmb_size - nr_rma_lmbs; + uint32_t *int_buf, *cur_index, buf_len; + + /* Allocate enough buffer size to fit in ibm,dynamic-memory */ + buf_len = nr_lmbs * SPAPR_DR_LMB_LIST_ENTRY_SIZE * sizeof(uint32_t) + + sizeof(uint32_t); + cur_index = int_buf = g_malloc0(buf_len); + + offset = fdt_add_subnode(fdt, 0, "ibm,dynamic-reconfiguration-memory"); + + ret = fdt_setprop(fdt, offset, "ibm,lmb-size", prop_lmb_size, + sizeof(prop_lmb_size)); + if (ret < 0) { + goto out; + } + + ret = fdt_setprop_cell(fdt, offset, "ibm,memory-flags-mask", 0xff); + if (ret < 0) { + goto out; + } + + ret = fdt_setprop_cell(fdt, offset, "ibm,memory-preservation-time", 0x0); + if (ret < 0) { + goto out; + } + + /* ibm,dynamic-memory */ + int_buf[0] = cpu_to_be32(nr_lmbs); + cur_index++; + for (i = 0; i < nr_lmbs; i++) { + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + uint64_t addr; + uint32_t *dynamic_memory = cur_index; + + if (i < nr_assigned_lmbs) { + addr = (i + nr_rma_lmbs) * lmb_size; + } else { + addr = (i - nr_assigned_lmbs) * lmb_size + + spapr->hotplug_memory.base; + } + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr/lmb_size); + g_assert(drc); + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + dynamic_memory[0] = cpu_to_be32(addr >> 32); + dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff); + dynamic_memory[2] = cpu_to_be32(drck->get_index(drc)); + dynamic_memory[3] = cpu_to_be32(0); /* reserved */ + dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL)); + if (addr < machine->ram_size || + memory_region_present(get_system_memory(), addr)) { + dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED); + } else { + dynamic_memory[5] = cpu_to_be32(0); + } + + cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE; + } + ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len); + if (ret < 0) { + goto out; + } + + /* ibm,associativity-lookup-arrays */ + cur_index = int_buf; + int_buf[0] = cpu_to_be32(nb_numa_nodes); + int_buf[1] = cpu_to_be32(4); /* Number of entries per associativity list */ + cur_index += 2; + for (i = 0; i < nb_numa_nodes; i++) { + uint32_t associativity[] = { + cpu_to_be32(0x0), + cpu_to_be32(0x0), + cpu_to_be32(0x0), + cpu_to_be32(i) + }; + memcpy(cur_index, associativity, sizeof(associativity)); + cur_index += 4; + } + ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf, + (cur_index - int_buf) * sizeof(uint32_t)); +out: + g_free(int_buf); + return ret; +} + +int spapr_h_cas_compose_response(sPAPRMachineState *spapr, + target_ulong addr, target_ulong size, + bool cpu_update, bool memory_update) +{ + void *fdt, *fdt_skel; + sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 }; + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine()); + + size -= sizeof(hdr); + + /* Create sceleton */ + fdt_skel = g_malloc0(size); + _FDT((fdt_create(fdt_skel, size))); + _FDT((fdt_begin_node(fdt_skel, ""))); + _FDT((fdt_end_node(fdt_skel))); + _FDT((fdt_finish(fdt_skel))); + fdt = g_malloc0(size); + _FDT((fdt_open_into(fdt_skel, fdt, size))); + g_free(fdt_skel); + + /* Fixup cpu nodes */ + if (cpu_update) { + _FDT((spapr_fixup_cpu_dt(fdt, spapr))); + } + + /* Generate memory nodes or ibm,dynamic-reconfiguration-memory node */ + if (memory_update && smc->dr_lmb_enabled) { + _FDT((spapr_populate_drconf_memory(spapr, fdt))); + } else { + _FDT((spapr_populate_memory(spapr, fdt))); + } + + /* Pack resulting tree */ + _FDT((fdt_pack(fdt))); + + if (fdt_totalsize(fdt) + sizeof(hdr) > size) { + trace_spapr_cas_failed(size); + return -1; + } + + cpu_physical_memory_write(addr, &hdr, sizeof(hdr)); + cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt)); + trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr)); + g_free(fdt); + + return 0; +} + static void spapr_finalize_fdt(sPAPRMachineState *spapr, hwaddr fdt_addr, hwaddr rtas_addr, @@ -763,10 +874,23 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr, /* open out the base tree into a temp buffer for the final tweaks */ _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE))); - ret = spapr_populate_memory(spapr, fdt); - if (ret < 0) { - fprintf(stderr, "couldn't setup memory nodes in fdt\n"); - exit(1); + /* + * Add memory@0 node to represent RMA. Rest of the memory is either + * represented by memory nodes or ibm,dynamic-reconfiguration-memory + * node later during ibm,client-architecture-support call. + * + * If NUMA is configured, ensure that memory@0 ends up in the + * first memory-less node. + */ + if (nb_numa_nodes) { + for (i = 0; i < nb_numa_nodes; ++i) { + if (numa_info[i].node_mem) { + spapr_populate_memory_node(fdt, i, 0, spapr->rma_size); + break; + } + } + } else { + spapr_populate_memory_node(fdt, 0, 0, spapr->rma_size); } ret = spapr_populate_vdevice(spapr->vio_bus, fdt); diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 71fc9f23a1..cebceea69b 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -808,6 +808,32 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPRMachineState *spapr, return ret; } +/* + * Return the offset to the requested option vector @vector in the + * option vector table @table. + */ +static target_ulong cas_get_option_vector(int vector, target_ulong table) +{ + int i; + char nr_vectors, nr_entries; + + if (!table) { + return 0; + } + + nr_vectors = (ldl_phys(&address_space_memory, table) >> 24) + 1; + if (!vector || vector > nr_vectors) { + return 0; + } + table++; /* skip nr option vectors */ + + for (i = 0; i < vector - 1; i++) { + nr_entries = ldl_phys(&address_space_memory, table) >> 24; + table += nr_entries + 2; + } + return table; +} + typedef struct { PowerPCCPU *cpu; uint32_t cpu_version; @@ -828,19 +854,22 @@ static void do_set_compat(void *arg) ((cpuver) == CPU_POWERPC_LOGICAL_2_06_PLUS) ? 2061 : \ ((cpuver) == CPU_POWERPC_LOGICAL_2_07) ? 2070 : 0) +#define OV5_DRCONF_MEMORY 0x20 + static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { - target_ulong list = args[0]; + target_ulong list = args[0], ov_table; PowerPCCPUClass *pcc_ = POWERPC_CPU_GET_CLASS(cpu_); CPUState *cs; - bool cpu_match = false; + bool cpu_match = false, cpu_update = true, memory_update = false; unsigned old_cpu_version = cpu_->cpu_version; unsigned compat_lvl = 0, cpu_version = 0; unsigned max_lvl = get_compat_level(cpu_->max_compat); int counter; + char ov5_byte2; /* Parse PVR list */ for (counter = 0; counter < 512; ++counter) { @@ -890,8 +919,6 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, } } - /* For the future use: here @list points to the first capability */ - /* Parsing finished */ trace_spapr_cas_pvr(cpu_->cpu_version, cpu_match, cpu_version, pcc_->pcr_mask); @@ -915,14 +942,26 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, } if (!cpu_version) { - return H_SUCCESS; + cpu_update = false; } + /* For the future use: here @ov_table points to the first option vector */ + ov_table = list; + + list = cas_get_option_vector(5, ov_table); if (!list) { return H_SUCCESS; } - if (spapr_h_cas_compose_response(spapr, args[1], args[2])) { + /* @list now points to OV 5 */ + list += 2; + ov5_byte2 = rtas_ld(list, 0) >> 24; + if (ov5_byte2 & OV5_DRCONF_MEMORY) { + memory_update = true; + } + + if (spapr_h_cas_compose_response(spapr, args[1], args[2], + cpu_update, memory_update)) { qemu_system_reset_request(); } -- cgit v1.2.3 From ce881f774d69d941eb999c25f0cb1f72cd228795 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 29 Jun 2015 14:14:30 +0530 Subject: spapr: Make hash table size a factor of maxram_size The hash table size is dependent on ram_size, but since with hotplug the memory can grow till maxram_size. Hence make hash table size dependent on maxram_size. This allows to hotplug huge amounts of memory to the guest. Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 2f49f97924..d21e95cf6b 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1715,7 +1715,7 @@ static void ppc_spapr_init(MachineState *machine) * more than needed for the Linux guests we support. */ spapr->htab_shift = 18; /* Minimum architected size */ while (spapr->htab_shift <= 46) { - if ((1ULL << (spapr->htab_shift + 7)) >= machine->ram_size) { + if ((1ULL << (spapr->htab_shift + 7)) >= machine->maxram_size) { break; } spapr->htab_shift++; -- cgit v1.2.3 From c20d332a85c95245e3b720bfea1bd02e3a311463 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Tue, 1 Sep 2015 11:22:35 +1000 Subject: spapr: Memory hotplug support Make use of pc-dimm infrastructure to support memory hotplug for PowerPC. Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++ hw/ppc/spapr_events.c | 8 ++-- 2 files changed, 123 insertions(+), 3 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index d21e95cf6b..5b462290a5 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -34,6 +34,7 @@ #include "sysemu/block-backend.h" #include "sysemu/cpus.h" #include "sysemu/kvm.h" +#include "sysemu/device_tree.h" #include "kvm_ppc.h" #include "migration/migration.h" #include "mmu-hash64.h" @@ -862,6 +863,7 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr, hwaddr rtas_size) { MachineState *machine = MACHINE(qdev_get_machine()); + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); const char *boot_device = machine->boot_order; int ret, i; size_t cb = 0; @@ -945,6 +947,10 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr, spapr_populate_chosen_stdout(fdt, spapr->vio_bus); } + if (smc->dr_lmb_enabled) { + _FDT(spapr_drc_populate_dt(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_LMB)); + } + _FDT((fdt_pack(fdt))); if (fdt_totalsize(fdt) > FDT_MAX_SIZE) { @@ -2055,12 +2061,120 @@ static void spapr_nmi(NMIState *n, int cpu_index, Error **errp) } } +static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size, + uint32_t node, Error **errp) +{ + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + uint32_t nr_lmbs = size/SPAPR_MEMORY_BLOCK_SIZE; + int i, fdt_offset, fdt_size; + void *fdt; + + /* + * Check for DRC connectors and send hotplug notification to the + * guest only in case of hotplugged memory. This allows cold plugged + * memory to be specified at boot time. + */ + if (!dev->hotplugged) { + return; + } + + for (i = 0; i < nr_lmbs; i++) { + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, + addr/SPAPR_MEMORY_BLOCK_SIZE); + g_assert(drc); + + fdt = create_device_tree(&fdt_size); + fdt_offset = spapr_populate_memory_node(fdt, node, addr, + SPAPR_MEMORY_BLOCK_SIZE); + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, errp); + spapr_hotplug_req_add_event(drc); + addr += SPAPR_MEMORY_BLOCK_SIZE; + } +} + +static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + uint32_t node, Error **errp) +{ + Error *local_err = NULL; + sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev); + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MemoryRegion *mr = ddc->get_memory_region(dimm); + uint64_t align = memory_region_get_alignment(mr); + uint64_t size = memory_region_size(mr); + uint64_t addr; + + if (size % SPAPR_MEMORY_BLOCK_SIZE) { + error_setg(&local_err, "Hotplugged memory size must be a multiple of " + "%lld MB", SPAPR_MEMORY_BLOCK_SIZE/M_BYTE); + goto out; + } + + pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err); + if (local_err) { + goto out; + } + + addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err); + if (local_err) { + pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr); + goto out; + } + + spapr_add_lmbs(dev, addr, size, node, &error_abort); + +out: + error_propagate(errp, local_err); +} + +static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine()); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + uint32_t node; + + if (!smc->dr_lmb_enabled) { + error_setg(errp, "Memory hotplug not supported for this machine"); + return; + } + node = object_property_get_int(OBJECT(dev), PC_DIMM_NODE_PROP, errp); + if (*errp) { + return; + } + + spapr_memory_plug(hotplug_dev, dev, node, errp); + } +} + +static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + error_setg(errp, "Memory hot unplug not supported by sPAPR"); + } +} + +static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine, + DeviceState *dev) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + return HOTPLUG_HANDLER(machine); + } + return NULL; +} + static void spapr_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(oc); FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc); NMIClass *nc = NMI_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); mc->init = ppc_spapr_init; mc->reset = ppc_spapr_reset; @@ -2072,6 +2186,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) mc->kvm_type = spapr_kvm_type; mc->has_dynamic_sysbus = true; mc->pci_allow_0_address = true; + mc->get_hotplug_handler = spapr_get_hotpug_handler; + hc->plug = spapr_machine_device_plug; + hc->unplug = spapr_machine_device_unplug; smc->dr_lmb_enabled = false; fwc->get_dev_path = spapr_get_fw_dev_path; @@ -2089,6 +2206,7 @@ static const TypeInfo spapr_machine_info = { .interfaces = (InterfaceInfo[]) { { TYPE_FW_PATH_PROVIDER }, { TYPE_NMI }, + { TYPE_HOTPLUG_HANDLER }, { } }, }; diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index f626eb7b34..98bf7aedfc 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -427,14 +427,16 @@ static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action) hp->hdr.section_length = cpu_to_be16(sizeof(*hp)); hp->hdr.section_version = 1; /* includes extended modifier */ hp->hotplug_action = hp_action; - + hp->drc.index = cpu_to_be32(drck->get_index(drc)); + hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX; switch (drc_type) { case SPAPR_DR_CONNECTOR_TYPE_PCI: - hp->drc.index = cpu_to_be32(drck->get_index(drc)); - hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX; hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI; break; + case SPAPR_DR_CONNECTOR_TYPE_LMB: + hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_MEMORY; + break; default: /* we shouldn't be signaling hotplug events for resources * that don't support them -- cgit v1.2.3 From b556854bd8524c26b8be98ab1bfdf0826831e793 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 29 Jun 2015 14:14:32 +0530 Subject: spapr: Don't allow memory hotplug to memory less nodes Currently PowerPC kernel doesn't allow hot-adding memory to memory-less node, but instead will silently add the memory to the first node that has some memory. This causes two unexpected behaviours for the user. - Memory gets hotplugged to a different node than what the user specified. - Since pc-dimm subsystem in QEMU still thinks that memory belongs to memory-less node, a reboot will set things accordingly and the previously hotplugged memory now ends in the right node. This appears as if some memory moved from one node to another. So until kernel starts supporting memory hotplug to memory-less nodes, just prevent such attempts upfront in QEMU. Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 5b462290a5..1170028578 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2136,7 +2136,7 @@ static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine()); if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { - uint32_t node; + int node; if (!smc->dr_lmb_enabled) { error_setg(errp, "Memory hotplug not supported for this machine"); @@ -2147,6 +2147,28 @@ static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, return; } + /* + * Currently PowerPC kernel doesn't allow hot-adding memory to + * memory-less node, but instead will silently add the memory + * to the first node that has some memory. This causes two + * unexpected behaviours for the user. + * + * - Memory gets hotplugged to a different node than what the user + * specified. + * - Since pc-dimm subsystem in QEMU still thinks that memory belongs + * to memory-less node, a reboot will set things accordingly + * and the previously hotplugged memory now ends in the right node. + * This appears as if some memory moved from one node to another. + * + * So until kernel starts supporting memory hotplug to memory-less + * nodes, just prevent such attempts upfront in QEMU. + */ + if (nb_numa_nodes && !numa_info[node].node_mem) { + error_setg(errp, "Can't hotplug memory to memory-less node %d", + node); + return; + } + spapr_memory_plug(hotplug_dev, dev, node, errp); } } -- cgit v1.2.3 From 19a35c9e1b964f420ee07141f049e6c96c63b740 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 3 Aug 2015 11:05:39 +0530 Subject: spapr: Provide better error message when slots exceed max allowed Currently when user specifies more slots than allowed max of SPAPR_MAX_RAM_SLOTS (32), we error out like this: qemu-system-ppc64: unsupported amount of memory slots: 64 Let the user know about the max allowed slots like this: qemu-system-ppc64: Specified number of memory slots 64 exceeds max supported 32 Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 1170028578..5c81e19ac7 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1774,8 +1774,8 @@ static void ppc_spapr_init(MachineState *machine) ram_addr_t hotplug_mem_size = machine->maxram_size - machine->ram_size; if (machine->ram_slots > SPAPR_MAX_RAM_SLOTS) { - error_report("unsupported amount of memory slots: %"PRIu64, - machine->ram_slots); + error_report("Specified number of memory slots %"PRIu64" exceeds max supported %d\n", + machine->ram_slots, SPAPR_MAX_RAM_SLOTS); exit(EXIT_FAILURE); } -- cgit v1.2.3 From 6663864e950d40c467ae4ab81c4dac64d7a8d9e6 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 3 Aug 2015 11:05:40 +0530 Subject: spapr: Populate ibm,associativity-lookup-arrays correctly for non-NUMA When NUMA isn't configured explicitly, assume node 0 is present for the purpose of creating ibm,associativity-lookup-arrays property under ibm,dynamic-reconfiguration-memory DT node. This ensures that the associativity index property is correctly updated in ibm,dynamic-memory for the LMB that is hotplugged. Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 5c81e19ac7..64d07cd634 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -724,6 +724,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) uint32_t nr_lmbs = machine->maxram_size/lmb_size - nr_rma_lmbs; uint32_t nr_assigned_lmbs = machine->ram_size/lmb_size - nr_rma_lmbs; uint32_t *int_buf, *cur_index, buf_len; + int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; /* Allocate enough buffer size to fit in ibm,dynamic-memory */ buf_len = nr_lmbs * SPAPR_DR_LMB_LIST_ENTRY_SIZE * sizeof(uint32_t) + @@ -789,10 +790,10 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) /* ibm,associativity-lookup-arrays */ cur_index = int_buf; - int_buf[0] = cpu_to_be32(nb_numa_nodes); + int_buf[0] = cpu_to_be32(nr_nodes); int_buf[1] = cpu_to_be32(4); /* Number of entries per associativity list */ cur_index += 2; - for (i = 0; i < nb_numa_nodes; i++) { + for (i = 0; i < nr_nodes; i++) { uint32_t associativity[] = { cpu_to_be32(0x0), cpu_to_be32(0x0), -- cgit v1.2.3 From e8f986fc57a664a74b9f685b466506366a15201b Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 3 Aug 2015 11:05:41 +0530 Subject: spapr: Revert to memory@XXXX representation for non-hotplugged memory Don't represent non-hotluggable memory under drconf node. With this we don't have to create DRC objects for them. The effect of this patch is that we revert back to memory@XXXX representation for all the memory specified with -m option and represent the cold plugged memory and hot-pluggable memory under ibm,dynamic-reconfiguration-memory. Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 47 +++++++++-------------------------------------- 1 file changed, 9 insertions(+), 38 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 64d07cd634..402c00d24e 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -560,6 +560,7 @@ static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt) } if (!mem_start) { /* ppc_spapr_init() checks for rma_size <= node0_size already */ + spapr_populate_memory_node(fdt, i, 0, spapr->rma_size); mem_start += spapr->rma_size; node_size -= spapr->rma_size; } @@ -720,9 +721,7 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) int ret, i, offset; uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)}; - uint32_t nr_rma_lmbs = spapr->rma_size/lmb_size; - uint32_t nr_lmbs = machine->maxram_size/lmb_size - nr_rma_lmbs; - uint32_t nr_assigned_lmbs = machine->ram_size/lmb_size - nr_rma_lmbs; + uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size; uint32_t *int_buf, *cur_index, buf_len; int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; @@ -755,15 +754,9 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) for (i = 0; i < nr_lmbs; i++) { sPAPRDRConnector *drc; sPAPRDRConnectorClass *drck; - uint64_t addr; + uint64_t addr = i * lmb_size + spapr->hotplug_memory.base;; uint32_t *dynamic_memory = cur_index; - if (i < nr_assigned_lmbs) { - addr = (i + nr_rma_lmbs) * lmb_size; - } else { - addr = (i - nr_assigned_lmbs) * lmb_size + - spapr->hotplug_memory.base; - } drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, addr/lmb_size); g_assert(drc); @@ -838,8 +831,6 @@ int spapr_h_cas_compose_response(sPAPRMachineState *spapr, /* Generate memory nodes or ibm,dynamic-reconfiguration-memory node */ if (memory_update && smc->dr_lmb_enabled) { _FDT((spapr_populate_drconf_memory(spapr, fdt))); - } else { - _FDT((spapr_populate_memory(spapr, fdt))); } /* Pack resulting tree */ @@ -877,23 +868,10 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr, /* open out the base tree into a temp buffer for the final tweaks */ _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE))); - /* - * Add memory@0 node to represent RMA. Rest of the memory is either - * represented by memory nodes or ibm,dynamic-reconfiguration-memory - * node later during ibm,client-architecture-support call. - * - * If NUMA is configured, ensure that memory@0 ends up in the - * first memory-less node. - */ - if (nb_numa_nodes) { - for (i = 0; i < nb_numa_nodes; ++i) { - if (numa_info[i].node_mem) { - spapr_populate_memory_node(fdt, i, 0, spapr->rma_size); - break; - } - } - } else { - spapr_populate_memory_node(fdt, 0, 0, spapr->rma_size); + ret = spapr_populate_memory(spapr, fdt); + if (ret < 0) { + fprintf(stderr, "couldn't setup memory nodes in fdt\n"); + exit(1); } ret = spapr_populate_vdevice(spapr->vio_bus, fdt); @@ -1600,21 +1578,14 @@ static void spapr_create_lmb_dr_connectors(sPAPRMachineState *spapr) { MachineState *machine = MACHINE(spapr); uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; - uint32_t nr_rma_lmbs = spapr->rma_size/lmb_size; - uint32_t nr_lmbs = machine->maxram_size/lmb_size - nr_rma_lmbs; - uint32_t nr_assigned_lmbs = machine->ram_size/lmb_size - nr_rma_lmbs; + uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size; int i; for (i = 0; i < nr_lmbs; i++) { sPAPRDRConnector *drc; uint64_t addr; - if (i < nr_assigned_lmbs) { - addr = (i + nr_rma_lmbs) * lmb_size; - } else { - addr = (i - nr_assigned_lmbs) * lmb_size + - spapr->hotplug_memory.base; - } + addr = i * lmb_size + spapr->hotplug_memory.base; drc = spapr_dr_connector_new(OBJECT(spapr), SPAPR_DR_CONNECTOR_TYPE_LMB, addr/lmb_size); qemu_register_reset(spapr_drc_reset, drc); -- cgit v1.2.3 From 7a36ae7a9f4f136d40fe1da4aab66b364d4aa56d Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 3 Aug 2015 11:05:42 +0530 Subject: spapr: Support hotplug by specifying DRC count Support hotplug identifier type RTAS_LOG_V6_HP_ID_DRC_COUNT that allows hotplugging of DRCs by specifying the DRC count. While we are here, rename spapr_hotplug_req_add_event() to spapr_hotplug_req_add_by_index() spapr_hotplug_req_remove_event() to spapr_hotplug_req_remove_by_index() so that they match with spapr_hotplug_req_add_by_count(). Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 2 +- hw/ppc/spapr_events.c | 47 ++++++++++++++++++++++++++++++++++++++--------- hw/ppc/spapr_pci.c | 4 ++-- 3 files changed, 41 insertions(+), 12 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 402c00d24e..d00513fc34 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2062,7 +2062,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size, drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, errp); - spapr_hotplug_req_add_event(drc); + spapr_hotplug_req_add_by_index(drc); addr += SPAPR_MEMORY_BLOCK_SIZE; } } diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 98bf7aedfc..744ea62ac0 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -386,7 +386,9 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq)); } -static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action) +static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, + sPAPRDRConnectorType drc_type, + uint32_t drc) { sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); struct hp_log_full *new_hp; @@ -395,8 +397,6 @@ static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action) struct rtas_event_log_v6_maina *maina; struct rtas_event_log_v6_mainb *mainb; struct rtas_event_log_v6_hp *hp; - sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - sPAPRDRConnectorType drc_type = drck->get_type(drc); new_hp = g_malloc0(sizeof(struct hp_log_full)); hdr = &new_hp->hdr; @@ -427,8 +427,7 @@ static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action) hp->hdr.section_length = cpu_to_be16(sizeof(*hp)); hp->hdr.section_version = 1; /* includes extended modifier */ hp->hotplug_action = hp_action; - hp->drc.index = cpu_to_be32(drck->get_index(drc)); - hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX; + hp->hotplug_identifier = hp_id; switch (drc_type) { case SPAPR_DR_CONNECTOR_TYPE_PCI: @@ -445,19 +444,49 @@ static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action) return; } + if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT) { + hp->drc.count = cpu_to_be32(drc); + } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_INDEX) { + hp->drc.index = cpu_to_be32(drc); + } + rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq)); } -void spapr_hotplug_req_add_event(sPAPRDRConnector *drc) +void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc) +{ + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + sPAPRDRConnectorType drc_type = drck->get_type(drc); + uint32 index = drck->get_index(drc); + + spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX, + RTAS_LOG_V6_HP_ACTION_ADD, drc_type, index); +} + +void spapr_hotplug_req_remove_by_index(sPAPRDRConnector *drc) +{ + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + sPAPRDRConnectorType drc_type = drck->get_type(drc); + uint32 index = drck->get_index(drc); + + spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX, + RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, index); +} + +void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type, + uint32_t count) { - spapr_hotplug_req_event(drc, RTAS_LOG_V6_HP_ACTION_ADD); + spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT, + RTAS_LOG_V6_HP_ACTION_ADD, drc_type, count); } -void spapr_hotplug_req_remove_event(sPAPRDRConnector *drc) +void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type, + uint32_t count) { - spapr_hotplug_req_event(drc, RTAS_LOG_V6_HP_ACTION_REMOVE); + spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT, + RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, count); } static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index d2188c854d..b088491c9e 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1187,7 +1187,7 @@ static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler, return; } if (plugged_dev->hotplugged) { - spapr_hotplug_req_add_event(drc); + spapr_hotplug_req_add_by_index(drc); } } @@ -1215,7 +1215,7 @@ static void spapr_phb_hot_unplug_child(HotplugHandler *plug_handler, error_propagate(errp, local_err); return; } - spapr_hotplug_req_remove_event(drc); + spapr_hotplug_req_remove_by_index(drc); } } -- cgit v1.2.3 From 0a4178692c2375a4516da7b71629bd08ee8697ee Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 3 Aug 2015 11:05:43 +0530 Subject: spapr: Move memory hotplug to RTAS_LOG_V6_HP_ID_DRC_COUNT type Till now memory hotplug used RTAS_LOG_V6_HP_ID_DRC_INDEX hotplug type which meant that we generated one hotplug type of EPOW event for every 256MB (SPAPR_MEMORY_BLOCK_SIZE). This quickly overruns the kernel rtas log buffer thus resulting in loss of memory hotplug events. Switch to RTAS_LOG_V6_HP_ID_DRC_COUNT hotplug type for memory so that we generate only one event per hotplug request. Signed-off-by: Bharata B Rao Reviewed-by: Michael Roth Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index d00513fc34..533c7adae1 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2062,9 +2062,9 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size, drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, errp); - spapr_hotplug_req_add_by_index(drc); addr += SPAPR_MEMORY_BLOCK_SIZE; } + spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, nr_lmbs); } static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, -- cgit v1.2.3 From 20bb648dca6d7fe8cdd1941194e7851950b25dc5 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 8 Sep 2015 11:21:52 +1000 Subject: spapr: Fix default NUMA node allocation for threads At present, if guest numa nodes are requested, but the cpus in each node are not specified, spapr just uses the default behaviour or assigning each vcpu round-robin to nodes. If smp_threads != 1, that will assign adjacent threads in a core to different NUMA nodes. As well as being just weird, that's a configuration that can't be represented in the device tree we give to the guest, which means the guest and qemu end up with different ideas of the NUMA topology. This patch implements mc->cpu_index_to_socket_id in the spapr code to make sure vcpus get assigned to nodes only at the socket granularity. Signed-off-by: David Gibson Reviewed-by: Alexey Kardashevskiy --- hw/ppc/spapr.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 533c7adae1..59fc814640 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2162,6 +2162,13 @@ static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine, return NULL; } +static unsigned spapr_cpu_index_to_socket_id(unsigned cpu_index) +{ + /* Allocate to NUMA nodes on a "socket" basis (not that concept of + * socket means much for the paravirtualized PAPR platform) */ + return cpu_index / smp_threads / smp_cores; +} + static void spapr_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -2183,6 +2190,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) mc->get_hotplug_handler = spapr_get_hotpug_handler; hc->plug = spapr_machine_device_plug; hc->unplug = spapr_machine_device_unplug; + mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id; smc->dr_lmb_enabled = false; fwc->get_dev_path = spapr_get_fw_dev_path; -- cgit v1.2.3 From ef001f069e0f175a036929782c5c63053df9569a Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 15 Sep 2015 21:34:20 +0200 Subject: ppc/spapr: Fix buffer overflow in spapr_populate_drconf_memory() The buffer that is allocated in spapr_populate_drconf_memory() is used for setting both, the "ibm,dynamic-memory" and the "ibm,associativity-lookup-arrays" property. However, only the size of the first one is taken into account when allocating the memory. So if the length of the second property is larger than the length of the first one, we run into a buffer overflow here! Fix it by taking the length of the second property into account, too. Fixes: "spapr: Support ibm,dynamic-reconfiguration-memory" patch Signed-off-by: Thomas Huth Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 59fc814640..1333375846 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -725,9 +725,12 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) uint32_t *int_buf, *cur_index, buf_len; int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; - /* Allocate enough buffer size to fit in ibm,dynamic-memory */ - buf_len = nr_lmbs * SPAPR_DR_LMB_LIST_ENTRY_SIZE * sizeof(uint32_t) + - sizeof(uint32_t); + /* + * Allocate enough buffer size to fit in ibm,dynamic-memory + * or ibm,associativity-lookup-arrays + */ + buf_len = MAX(nr_lmbs * SPAPR_DR_LMB_LIST_ENTRY_SIZE + 1, nr_nodes * 4 + 2) + * sizeof(uint32_t); cur_index = int_buf = g_malloc0(buf_len); offset = fdt_add_subnode(fdt, 0, "ibm,dynamic-reconfiguration-memory"); -- cgit v1.2.3 From 4d9392be6c1aada69ce86c0f6584128976985394 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 17 Sep 2015 10:49:41 +0200 Subject: ppc/spapr: Implement H_RANDOM hypercall in QEMU The PAPR interface defines a hypercall to pass high-quality hardware generated random numbers to guests. Recent kernels can already provide this hypercall to the guest if the right hardware random number generator is available. But in case the user wants to use another source like EGD, or QEMU is running with an older kernel, we should also have this call in QEMU, so that guests that do not support virtio-rng yet can get good random numbers, too. This patch now adds a new pseudo-device to QEMU that either directly provides this hypercall to the guest or is able to enable the in-kernel hypercall if available. The in-kernel hypercall can be enabled with the use-kvm property, e.g.: qemu-system-ppc64 -device spapr-rng,use-kvm=true For handling the hypercall in QEMU instead, a "RngBackend" is required since the hypercall should provide "good" random data instead of pseudo-random (like from a "simple" library function like rand() or g_random_int()). Since there are multiple RngBackends available, the user must select an appropriate back-end via the "rng" property of the device, e.g.: qemu-system-ppc64 -object rng-random,filename=/dev/hwrng,id=gid0 \ -device spapr-rng,rng=gid0 ... See http://wiki.qemu-project.org/Features-Done/VirtIORNG for other example of specifying RngBackends. Signed-off-by: Thomas Huth Signed-off-by: David Gibson --- hw/ppc/Makefile.objs | 2 +- hw/ppc/spapr.c | 8 +++ hw/ppc/spapr_rng.c | 186 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 hw/ppc/spapr_rng.c (limited to 'hw') diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs index c8ab06e7f3..c1ffc7771b 100644 --- a/hw/ppc/Makefile.objs +++ b/hw/ppc/Makefile.objs @@ -3,7 +3,7 @@ obj-y += ppc.o ppc_booke.o # IBM pSeries (sPAPR) obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o -obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o +obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy) obj-y += spapr_pci_vfio.o endif diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 1333375846..7f4f196e53 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -883,6 +883,14 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr, exit(1); } + if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)) { + ret = spapr_rng_populate_dt(fdt); + if (ret < 0) { + fprintf(stderr, "could not set up rng device in the fdt\n"); + exit(1); + } + } + QLIST_FOREACH(phb, &spapr->phbs, list) { ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt); } diff --git a/hw/ppc/spapr_rng.c b/hw/ppc/spapr_rng.c new file mode 100644 index 0000000000..ed43d5e042 --- /dev/null +++ b/hw/ppc/spapr_rng.c @@ -0,0 +1,186 @@ +/* + * QEMU sPAPR random number generator "device" for H_RANDOM hypercall + * + * Copyright 2015 Thomas Huth, Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "qemu/error-report.h" +#include "sysemu/sysemu.h" +#include "sysemu/device_tree.h" +#include "sysemu/rng.h" +#include "hw/ppc/spapr.h" +#include "kvm_ppc.h" + +#define SPAPR_RNG(obj) \ + OBJECT_CHECK(sPAPRRngState, (obj), TYPE_SPAPR_RNG) + +struct sPAPRRngState { + /*< private >*/ + DeviceState ds; + RngBackend *backend; + bool use_kvm; +}; +typedef struct sPAPRRngState sPAPRRngState; + +struct HRandomData { + QemuSemaphore sem; + union { + uint64_t v64; + uint8_t v8[8]; + } val; + int received; +}; +typedef struct HRandomData HRandomData; + +/* Callback function for the RngBackend */ +static void random_recv(void *dest, const void *src, size_t size) +{ + HRandomData *hrdp = dest; + + if (src && size > 0) { + assert(size + hrdp->received <= sizeof(hrdp->val.v8)); + memcpy(&hrdp->val.v8[hrdp->received], src, size); + hrdp->received += size; + } + + qemu_sem_post(&hrdp->sem); +} + +/* Handler for the H_RANDOM hypercall */ +static target_ulong h_random(PowerPCCPU *cpu, sPAPRMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + sPAPRRngState *rngstate; + HRandomData hrdata; + + rngstate = SPAPR_RNG(object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)); + + if (!rngstate || !rngstate->backend) { + return H_HARDWARE; + } + + qemu_sem_init(&hrdata.sem, 0); + hrdata.val.v64 = 0; + hrdata.received = 0; + + qemu_mutex_unlock_iothread(); + while (hrdata.received < 8) { + rng_backend_request_entropy(rngstate->backend, 8 - hrdata.received, + random_recv, &hrdata); + qemu_sem_wait(&hrdata.sem); + } + qemu_mutex_lock_iothread(); + + qemu_sem_destroy(&hrdata.sem); + args[0] = hrdata.val.v64; + + return H_SUCCESS; +} + +static void spapr_rng_instance_init(Object *obj) +{ + sPAPRRngState *rngstate = SPAPR_RNG(obj); + + if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL) != NULL) { + error_report("spapr-rng can not be instantiated twice!"); + return; + } + + object_property_add_link(obj, "rng", TYPE_RNG_BACKEND, + (Object **)&rngstate->backend, + object_property_allow_set_link, + OBJ_PROP_LINK_UNREF_ON_RELEASE, NULL); + object_property_set_description(obj, "rng", + "ID of the random number generator backend", + NULL); +} + +static void spapr_rng_realize(DeviceState *dev, Error **errp) +{ + + sPAPRRngState *rngstate = SPAPR_RNG(dev); + + if (rngstate->use_kvm) { + if (kvmppc_enable_hwrng() == 0) { + return; + } + /* + * If user specified both, use-kvm and a backend, we fall back to + * the backend now. If not, provide an appropriate error message. + */ + if (!rngstate->backend) { + error_setg(errp, "Could not initialize in-kernel H_RANDOM call!"); + return; + } + } + + if (rngstate->backend) { + spapr_register_hypercall(H_RANDOM, h_random); + } else { + error_setg(errp, "spapr-rng needs an RNG backend!"); + } +} + +int spapr_rng_populate_dt(void *fdt) +{ + int node; + int ret; + + node = qemu_fdt_add_subnode(fdt, "/ibm,platform-facilities"); + if (node <= 0) { + return -1; + } + ret = fdt_setprop_string(fdt, node, "device_type", + "ibm,platform-facilities"); + ret |= fdt_setprop_cell(fdt, node, "#address-cells", 0x1); + ret |= fdt_setprop_cell(fdt, node, "#size-cells", 0x0); + + node = fdt_add_subnode(fdt, node, "ibm,random-v1"); + if (node <= 0) { + return -1; + } + ret |= fdt_setprop_string(fdt, node, "compatible", "ibm,random"); + + return ret ? -1 : 0; +} + +static Property spapr_rng_properties[] = { + DEFINE_PROP_BOOL("use-kvm", sPAPRRngState, use_kvm, false), + DEFINE_PROP_END_OF_LIST(), +}; + +static void spapr_rng_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + dc->realize = spapr_rng_realize; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + dc->props = spapr_rng_properties; +} + +static const TypeInfo spapr_rng_info = { + .name = TYPE_SPAPR_RNG, + .parent = TYPE_DEVICE, + .instance_size = sizeof(sPAPRRngState), + .instance_init = spapr_rng_instance_init, + .class_init = spapr_rng_class_init, +}; + +static void spapr_rng_register_type(void) +{ + type_register_static(&spapr_rng_info); +} +type_init(spapr_rng_register_type) -- cgit v1.2.3 From 47445c80fb57035331574ac1ac0bcee67fb84aeb Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 18 Sep 2015 17:30:43 +1000 Subject: sPAPR: Revert don't enable EEH on emulated PCI devices This reverts commit 7cb18007 ("sPAPR: Don't enable EEH on emulated PCI devices") as rtas_ibm_set_eeh_option() isn't the right place to check if there has the corresponding PCI device for the input address, which can be PE address, not PCI device address. Signed-off-by: Gavin Shan Signed-off-by: David Gibson --- hw/ppc/spapr_pci.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'hw') diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index b088491c9e..617b7f3fdd 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -431,7 +431,6 @@ static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu, { sPAPRPHBState *sphb; sPAPRPHBClass *spc; - PCIDevice *pdev; uint32_t addr, option; uint64_t buid; int ret; @@ -449,12 +448,6 @@ static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu, goto param_error_exit; } - pdev = pci_find_device(PCI_HOST_BRIDGE(sphb)->bus, - (addr >> 16) & 0xFF, (addr >> 8) & 0xFF); - if (!pdev || !object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { - goto param_error_exit; - } - spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); if (!spc->eeh_set_option) { goto param_error_exit; -- cgit v1.2.3 From d76548a98f4e18d3c65a3d921bbb70caf9be6138 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 18 Sep 2015 17:30:44 +1000 Subject: sPAPR: Enable EEH on VFIO PCI device only This checks if the PCI device retrieved from the PCI device address is VFIO PCI device when enabling EEH functionality. If it's not VFIO PCI device, the EEH functonality isn't enabled. Signed-off-by: Gavin Shan Signed-off-by: David Gibson --- hw/ppc/spapr_pci_vfio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index cca45ed312..a61b418131 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -117,7 +117,7 @@ static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb, phb = PCI_HOST_BRIDGE(sphb); pdev = pci_find_device(phb->bus, (addr >> 16) & 0xFF, (addr >> 8) & 0xFF); - if (!pdev) { + if (!pdev || !object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { return RTAS_OUT_PARAM_ERROR; } -- cgit v1.2.3