diff options
author | SeokYeon Hwang <syeon.hwang@samsung.com> | 2016-11-30 14:11:48 +0900 |
---|---|---|
committer | SeokYeon Hwang <syeon.hwang@samsung.com> | 2016-11-30 14:11:58 +0900 |
commit | 2371603ef207b015cc29728296b21cb1722d8ae6 (patch) | |
tree | 25cbb62daf29066fa874b9a4b8f754bc18187806 /hw/ppc | |
parent | d5df306aedcc13cf5a2463ddf858c8d718a788d5 (diff) | |
parent | d4dcb59384ab4433702f015fdddda1eff8e3927f (diff) | |
download | qemu-2371603ef207b015cc29728296b21cb1722d8ae6.tar.gz qemu-2371603ef207b015cc29728296b21cb1722d8ae6.tar.bz2 qemu-2371603ef207b015cc29728296b21cb1722d8ae6.zip |
Merge branch 'develop_qemu_2.7' into develop
Change-Id: Ibae70e3c0d1d88632903f98be114f47af9ca7502
Signed-off-by: SeokYeon Hwang <syeon.hwang@samsung.com>
Diffstat (limited to 'hw/ppc')
-rw-r--r-- | hw/ppc/Makefile.objs | 2 | ||||
-rw-r--r-- | hw/ppc/e500.c | 2 | ||||
-rw-r--r-- | hw/ppc/e500.h | 2 | ||||
-rw-r--r-- | hw/ppc/e500plat.c | 1 | ||||
-rw-r--r-- | hw/ppc/mac.h | 7 | ||||
-rw-r--r-- | hw/ppc/mac_newworld.c | 1 | ||||
-rw-r--r-- | hw/ppc/mac_oldworld.c | 2 | ||||
-rw-r--r-- | hw/ppc/ppc.c | 65 | ||||
-rw-r--r-- | hw/ppc/ppc405.h | 6 | ||||
-rw-r--r-- | hw/ppc/ppc4xx_devs.c | 1 | ||||
-rw-r--r-- | hw/ppc/ppce500_spin.c | 9 | ||||
-rw-r--r-- | hw/ppc/prep.c | 3 | ||||
-rw-r--r-- | hw/ppc/spapr.c | 373 | ||||
-rw-r--r-- | hw/ppc/spapr_cpu_core.c | 432 | ||||
-rw-r--r-- | hw/ppc/spapr_drc.c | 25 | ||||
-rw-r--r-- | hw/ppc/spapr_events.c | 11 | ||||
-rw-r--r-- | hw/ppc/spapr_hcall.c | 90 | ||||
-rw-r--r-- | hw/ppc/spapr_iommu.c | 211 | ||||
-rw-r--r-- | hw/ppc/spapr_pci.c | 123 | ||||
-rw-r--r-- | hw/ppc/spapr_pci_vfio.c | 7 | ||||
-rw-r--r-- | hw/ppc/spapr_rtas.c | 26 | ||||
-rw-r--r-- | hw/ppc/spapr_rtas_ddw.c | 295 | ||||
-rw-r--r-- | hw/ppc/spapr_vio.c | 18 | ||||
-rw-r--r-- | hw/ppc/trace-events | 43 | ||||
-rw-r--r-- | hw/ppc/virtex_ml507.c | 1 |
25 files changed, 1483 insertions, 273 deletions
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs index c1ffc7771b..91a3420f47 100644 --- a/hw/ppc/Makefile.objs +++ b/hw/ppc/Makefile.objs @@ -4,9 +4,11 @@ obj-y += ppc.o ppc_booke.o obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o +obj-$(CONFIG_PSERIES) += spapr_cpu_core.o ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy) obj-y += spapr_pci_vfio.o endif +obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o # PowerPC 4xx boards obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o obj-y += ppc4xx_pci.o diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index ee1c60b820..0cd534df55 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -601,7 +601,7 @@ static int ppce500_prep_device_tree(MachineState *machine, } /* Create -kernel TLB entries for BookE. */ -static inline hwaddr booke206_page_size_to_tlb(uint64_t size) +hwaddr booke206_page_size_to_tlb(uint64_t size) { return 63 - clz64(size >> 10); } diff --git a/hw/ppc/e500.h b/hw/ppc/e500.h index ef224ea5e6..70ba1d8f4f 100644 --- a/hw/ppc/e500.h +++ b/hw/ppc/e500.h @@ -26,4 +26,6 @@ typedef struct PPCE500Params { void ppce500_init(MachineState *machine, PPCE500Params *params); +hwaddr booke206_page_size_to_tlb(uint64_t size); + #endif diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c index b00565c3d3..94b454551f 100644 --- a/hw/ppc/e500plat.c +++ b/hw/ppc/e500plat.c @@ -14,6 +14,7 @@ #include "e500.h" #include "hw/boards.h" #include "sysemu/device_tree.h" +#include "sysemu/kvm.h" #include "hw/pci/pci.h" #include "hw/ppc/openpic.h" #include "kvm_ppc.h" diff --git a/hw/ppc/mac.h b/hw/ppc/mac.h index 5764b86c28..20cbddb4e4 100644 --- a/hw/ppc/mac.h +++ b/hw/ppc/mac.h @@ -22,8 +22,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#if !defined(__PPC_MAC_H__) -#define __PPC_MAC_H__ + +#ifndef PPC_MAC_H +#define PPC_MAC_H #include "exec/memory.h" #include "hw/sysbus.h" @@ -184,4 +185,4 @@ typedef struct MacIONVRAMState { } MacIONVRAMState; void pmac_format_nvram_partition (MacIONVRAMState *nvr, int len); -#endif /* !defined(__PPC_MAC_H__) */ +#endif /* PPC_MAC_H */ diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index 32e88b3786..7d2510658d 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -380,6 +380,7 @@ static void ppc_core99_init(MachineState *machine) pci_bus = pci_pmac_init(pic, get_system_memory(), get_system_io()); machine_arch = ARCH_MAC99; } + object_property_set_bool(OBJECT(pci_bus), true, "realized", &error_abort); machine->usb |= defaults_enabled() && !machine->usb_disabled; diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c index a9bb1c27df..447948746b 100644 --- a/hw/ppc/mac_oldworld.c +++ b/hw/ppc/mac_oldworld.c @@ -309,7 +309,7 @@ static void ppc_heathrow_init(MachineState *machine) dev = qdev_create(adb_bus, TYPE_ADB_MOUSE); qdev_init_nofail(dev); - if (usb_enabled()) { + if (machine_usb(machine)) { pci_create_simple(pci_bus, -1, "pci-ohci"); } diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 38ff2e1596..8945869009 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -33,6 +33,7 @@ #include "hw/timer/m48t59.h" #include "qemu/log.h" #include "qemu/error-report.h" +#include "qapi/error.h" #include "hw/loader.h" #include "sysemu/kvm.h" #include "kvm_ppc.h" @@ -164,9 +165,9 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level) } } -void ppc6xx_irq_init(CPUPPCState *env) +void ppc6xx_irq_init(PowerPCCPU *cpu) { - PowerPCCPU *cpu = ppc_env_get_cpu(env); + CPUPPCState *env = &cpu->env; env->irq_inputs = (void **)qemu_allocate_irqs(&ppc6xx_set_irq, cpu, PPC6xx_INPUT_NB); @@ -251,9 +252,9 @@ static void ppc970_set_irq(void *opaque, int pin, int level) } } -void ppc970_irq_init(CPUPPCState *env) +void ppc970_irq_init(PowerPCCPU *cpu) { - PowerPCCPU *cpu = ppc_env_get_cpu(env); + CPUPPCState *env = &cpu->env; env->irq_inputs = (void **)qemu_allocate_irqs(&ppc970_set_irq, cpu, PPC970_INPUT_NB); @@ -287,9 +288,9 @@ static void power7_set_irq(void *opaque, int pin, int level) } } -void ppcPOWER7_irq_init(CPUPPCState *env) +void ppcPOWER7_irq_init(PowerPCCPU *cpu) { - PowerPCCPU *cpu = ppc_env_get_cpu(env); + CPUPPCState *env = &cpu->env; env->irq_inputs = (void **)qemu_allocate_irqs(&power7_set_irq, cpu, POWER7_INPUT_NB); @@ -372,9 +373,9 @@ static void ppc40x_set_irq(void *opaque, int pin, int level) } } -void ppc40x_irq_init(CPUPPCState *env) +void ppc40x_irq_init(PowerPCCPU *cpu) { - PowerPCCPU *cpu = ppc_env_get_cpu(env); + CPUPPCState *env = &cpu->env; env->irq_inputs = (void **)qemu_allocate_irqs(&ppc40x_set_irq, cpu, PPC40x_INPUT_NB); @@ -436,9 +437,9 @@ static void ppce500_set_irq(void *opaque, int pin, int level) } } -void ppce500_irq_init(CPUPPCState *env) +void ppce500_irq_init(PowerPCCPU *cpu) { - PowerPCCPU *cpu = ppc_env_get_cpu(env); + CPUPPCState *env = &cpu->env; env->irq_inputs = (void **)qemu_allocate_irqs(&ppce500_set_irq, cpu, PPCE500_INPUT_NB); @@ -699,9 +700,18 @@ static inline void cpu_ppc_decr_lower(PowerPCCPU *cpu) static inline void cpu_ppc_hdecr_excp(PowerPCCPU *cpu) { + CPUPPCState *env = &cpu->env; + /* Raise it */ - LOG_TB("raise decrementer exception\n"); - ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 1); + LOG_TB("raise hv decrementer exception\n"); + + /* The architecture specifies that we don't deliver HDEC + * interrupts in a PM state. Not only they don't cause a + * wakeup but they also get effectively discarded. + */ + if (!env->in_pm_state) { + ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 1); + } } static inline void cpu_ppc_hdecr_lower(PowerPCCPU *cpu) @@ -880,7 +890,7 @@ static int timebase_post_load(void *opaque, int version_id) host_ns = qemu_clock_get_ns(QEMU_CLOCK_HOST); ns_diff = MAX(0, host_ns - tb_remote->time_of_the_day_ns); migration_duration_ns = MIN(NANOSECONDS_PER_SECOND, ns_diff); - migration_duration_tb = muldiv64(migration_duration_ns, freq, + migration_duration_tb = muldiv64(freq, migration_duration_ns, NANOSECONDS_PER_SECOND); guest_tb = tb_remote->guest_timebase + MIN(0, migration_duration_tb); @@ -928,9 +938,7 @@ clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq) } /* Create new timer */ tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_decr_cb, cpu); - if (0) { - /* XXX: find a suitable condition to enable the hypervisor decrementer - */ + if (env->has_hv_mode) { tb_env->hdecr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_hdecr_cb, cpu); } else { @@ -1343,3 +1351,28 @@ PowerPCCPU *ppc_get_vcpu_by_dt_id(int cpu_dt_id) return NULL; } + +void ppc_cpu_parse_features(const char *cpu_model) +{ + CPUClass *cc; + ObjectClass *oc; + const char *typename; + gchar **model_pieces; + + model_pieces = g_strsplit(cpu_model, ",", 2); + if (!model_pieces[0]) { + error_report("Invalid/empty CPU model name"); + exit(1); + } + + oc = cpu_class_by_name(TYPE_POWERPC_CPU, model_pieces[0]); + if (oc == NULL) { + error_report("Unable to find CPU definition: %s", model_pieces[0]); + exit(1); + } + + typename = object_class_get_name(oc); + cc = CPU_CLASS(oc); + cc->parse_features(typename, model_pieces[1], &error_fatal); + g_strfreev(model_pieces); +} diff --git a/hw/ppc/ppc405.h b/hw/ppc/ppc405.h index 1c5f04fae1..c67febca2f 100644 --- a/hw/ppc/ppc405.h +++ b/hw/ppc/ppc405.h @@ -22,8 +22,8 @@ * THE SOFTWARE. */ -#if !defined(PPC_405_H) -#define PPC_405_H +#ifndef PPC405_H +#define PPC405_H #include "hw/ppc/ppc4xx.h" @@ -78,4 +78,4 @@ CPUPPCState *ppc_stb025_init (MemoryRegion ram_memories[2], uint32_t sysclk, qemu_irq **picp, ram_addr_t *offsetp); -#endif /* !defined(PPC_405_H) */ +#endif /* PPC405_H */ diff --git a/hw/ppc/ppc4xx_devs.c b/hw/ppc/ppc4xx_devs.c index 7d59018fc2..e7f413e49d 100644 --- a/hw/ppc/ppc4xx_devs.c +++ b/hw/ppc/ppc4xx_devs.c @@ -22,6 +22,7 @@ * THE SOFTWARE. */ #include "qemu/osdep.h" +#include "cpu.h" #include "hw/hw.h" #include "hw/ppc/ppc.h" #include "hw/ppc/ppc4xx.h" diff --git a/hw/ppc/ppce500_spin.c b/hw/ppc/ppce500_spin.c index 76bd78bfd7..22c584eb8d 100644 --- a/hw/ppc/ppce500_spin.c +++ b/hw/ppc/ppce500_spin.c @@ -32,6 +32,7 @@ #include "sysemu/sysemu.h" #include "hw/sysbus.h" #include "sysemu/kvm.h" +#include "e500.h" #define MAX_CPUS 32 @@ -72,12 +73,6 @@ static void spin_reset(void *opaque) } } -/* Create -kernel TLB entries for BookE, linearly spanning 256MB. */ -static inline hwaddr booke206_page_size_to_tlb(uint64_t size) -{ - return ctz32(size >> 10) >> 1; -} - static void mmubooke_create_initial_mapping(CPUPPCState *env, target_ulong va, hwaddr pa, @@ -104,7 +99,7 @@ static void spin_kick(void *data) hwaddr map_start; cpu_synchronize_state(cpu); - stl_p(&curspin->pir, env->spr[SPR_PIR]); + stl_p(&curspin->pir, env->spr[SPR_BOOKE_PIR]); env->nip = ldq_p(&curspin->addr) & (map_size - 1); env->gpr[3] = ldq_p(&curspin->r3); env->gpr[4] = 0; diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index 3ffb85e601..054af1e8b4 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -22,6 +22,7 @@ * THE SOFTWARE. */ #include "qemu/osdep.h" +#include "cpu.h" #include "hw/hw.h" #include "hw/timer/m48t59.h" #include "hw/i386/pc.h" @@ -648,7 +649,7 @@ static void ppc_prep_init(MachineState *machine) memory_region_add_subregion(sysmem, 0xFEFF0000, xcsr); #endif - if (usb_enabled()) { + if (machine_usb(machine)) { pci_create_simple(pci_bus, -1, "pci-ohci"); } diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index b69995e0dc..30d6800ab3 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -29,6 +29,7 @@ #include "sysemu/sysemu.h" #include "sysemu/numa.h" #include "hw/hw.h" +#include "qemu/log.h" #include "hw/fw-path-provider.h" #include "elf.h" #include "net/net.h" @@ -65,6 +66,8 @@ #include "hw/compat.h" #include "qemu/cutils.h" +#include "hw/ppc/spapr_cpu_core.h" +#include "qmp-commands.h" #include <libfdt.h> @@ -88,8 +91,6 @@ #define MIN_RMA_SLOF 128UL -#define TIMEBASE_FREQ 512000000ULL - #define PHANDLE_XICP 0x00001111 #define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) @@ -115,15 +116,16 @@ static XICSState *try_create_xics(const char *type, int nr_servers, static XICSState *xics_system_init(MachineState *machine, int nr_servers, int nr_irqs, Error **errp) { - XICSState *icp = NULL; + XICSState *xics = NULL; if (kvm_enabled()) { Error *err = NULL; if (machine_kernel_irqchip_allowed(machine)) { - icp = try_create_xics(TYPE_KVM_XICS, nr_servers, nr_irqs, &err); + xics = try_create_xics(TYPE_XICS_SPAPR_KVM, nr_servers, nr_irqs, + &err); } - if (machine_kernel_irqchip_required(machine) && !icp) { + if (machine_kernel_irqchip_required(machine) && !xics) { error_reportf_err(err, "kernel_irqchip requested but unavailable: "); } else { @@ -131,11 +133,11 @@ static XICSState *xics_system_init(MachineState *machine, } } - if (!icp) { - icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs, errp); + if (!xics) { + xics = try_create_xics(TYPE_XICS_SPAPR, nr_servers, nr_irqs, errp); } - return icp; + return xics; } static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu, @@ -338,6 +340,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, add_str(hypertas, "hcall-splpar"); add_str(hypertas, "hcall-bulk"); add_str(hypertas, "hcall-set-mode"); + add_str(hypertas, "hcall-sprg0"); + add_str(hypertas, "hcall-copy"); + add_str(hypertas, "hcall-debug"); add_str(qemu_hypertas, "hcall-memop1"); fdt = g_malloc0(FDT_MAX_SIZE); @@ -598,12 +603,23 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, int index = ppc_get_vcpu_dt_id(cpu); uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), 0xffffffff, 0xffffffff}; - uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ; + uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() + : SPAPR_TIMEBASE_FREQ; uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; uint32_t page_sizes_prop[64]; size_t page_sizes_prop_size; uint32_t vcpus_per_socket = smp_threads * smp_cores; uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + int drc_index; + + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index); + if (drc) { + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + drc_index = drck->get_index(drc); + _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index))); + } /* Note: we keep CI large pages off for now because a 64K capable guest * provisioned with large pages might otherwise try to map a qemu @@ -761,14 +777,17 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) int ret, i, offset; uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)}; - uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size; + uint32_t hotplug_lmb_start = spapr->hotplug_memory.base / lmb_size; + uint32_t nr_lmbs = (spapr->hotplug_memory.base + + memory_region_size(&spapr->hotplug_memory.mr)) / + lmb_size; uint32_t *int_buf, *cur_index, buf_len; int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; /* - * Don't create the node if there are no DR LMBs. + * Don't create the node if there is no hotpluggable memory */ - if (!nr_lmbs) { + if (machine->ram_size == machine->maxram_size) { return 0; } @@ -802,26 +821,40 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) int_buf[0] = cpu_to_be32(nr_lmbs); cur_index++; for (i = 0; i < nr_lmbs; i++) { - sPAPRDRConnector *drc; - sPAPRDRConnectorClass *drck; - uint64_t addr = i * lmb_size + spapr->hotplug_memory.base;; + uint64_t addr = i * lmb_size; uint32_t *dynamic_memory = cur_index; - drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, - addr/lmb_size); - g_assert(drc); - drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - - dynamic_memory[0] = cpu_to_be32(addr >> 32); - dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff); - dynamic_memory[2] = cpu_to_be32(drck->get_index(drc)); - dynamic_memory[3] = cpu_to_be32(0); /* reserved */ - dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL)); - if (addr < machine->ram_size || - memory_region_present(get_system_memory(), addr)) { - dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED); + if (i >= hotplug_lmb_start) { + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, i); + g_assert(drc); + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + dynamic_memory[0] = cpu_to_be32(addr >> 32); + dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff); + dynamic_memory[2] = cpu_to_be32(drck->get_index(drc)); + dynamic_memory[3] = cpu_to_be32(0); /* reserved */ + dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL)); + if (memory_region_present(get_system_memory(), addr)) { + dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED); + } else { + dynamic_memory[5] = cpu_to_be32(0); + } } else { - dynamic_memory[5] = cpu_to_be32(0); + /* + * LMB information for RMA, boot time RAM and gap b/n RAM and + * hotplug memory region -- all these are marked as reserved + * and as having no valid DRC. + */ + dynamic_memory[0] = cpu_to_be32(addr >> 32); + dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff); + dynamic_memory[2] = cpu_to_be32(0); + dynamic_memory[3] = cpu_to_be32(0); /* reserved */ + dynamic_memory[4] = cpu_to_be32(-1); + dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_RESERVED | + SPAPR_LMB_FLAGS_DRC_INVALID); } cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE; @@ -905,6 +938,7 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr, hwaddr rtas_size) { MachineState *machine = MACHINE(qdev_get_machine()); + MachineClass *mc = MACHINE_GET_CLASS(machine); sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); const char *boot_device = machine->boot_order; int ret, i; @@ -987,6 +1021,16 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr, _FDT(spapr_drc_populate_dt(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_LMB)); } + if (mc->query_hotpluggable_cpus) { + int offset = fdt_path_offset(fdt, "/cpus"); + ret = spapr_drc_populate_dt(fdt, offset, NULL, + SPAPR_DR_CONNECTOR_TYPE_CPU); + if (ret < 0) { + error_report("Couldn't set up CPU DR device tree properties"); + exit(1); + } + } + _FDT((fdt_pack(fdt))); if (fdt_totalsize(fdt) > FDT_MAX_SIZE) { @@ -1180,26 +1224,6 @@ static void ppc_spapr_reset(void) } -static void spapr_cpu_reset(void *opaque) -{ - sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); - PowerPCCPU *cpu = opaque; - CPUState *cs = CPU(cpu); - CPUPPCState *env = &cpu->env; - - cpu_reset(cs); - - /* All CPUs start halted. CPU0 is unhalted from the machine level - * reset code and the rest are explicitly started up by the guest - * using an RTAS call */ - cs->halted = 1; - - env->spr[SPR_HIOR] = 0; - - ppc_hash64_set_external_hpt(cpu, spapr->htab, spapr->htab_shift, - &error_fatal); -} - static void spapr_create_nvram(sPAPRMachineState *spapr) { DeviceState *dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram"); @@ -1489,7 +1513,6 @@ static int htab_save_complete(QEMUFile *f, void *opaque) if (rc < 0) { return rc; } - close_htab_fd(spapr); } else { if (spapr->htab_first_pass) { htab_save_first_pass(f, spapr, -1); @@ -1591,10 +1614,18 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id) return 0; } +static void htab_cleanup(void *opaque) +{ + sPAPRMachineState *spapr = opaque; + + close_htab_fd(spapr); +} + static SaveVMHandlers savevm_htab_handlers = { .save_live_setup = htab_save_setup, .save_live_iterate = htab_save_iterate, .save_live_complete_precopy = htab_save_complete, + .cleanup = htab_cleanup, .load_state = htab_load, }; @@ -1605,32 +1636,6 @@ static void spapr_boot_set(void *opaque, const char *boot_device, machine->boot_order = g_strdup(boot_device); } -static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, - Error **errp) -{ - CPUPPCState *env = &cpu->env; - - /* Set time-base frequency to 512 MHz */ - cpu_ppc_tb_init(env, TIMEBASE_FREQ); - - /* Enable PAPR mode in TCG or KVM */ - cpu_ppc_set_papr(cpu); - - if (cpu->max_compat) { - Error *local_err = NULL; - - ppc_set_compat(cpu, cpu->max_compat, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - } - - xics_cpu_setup(spapr->icp, cpu); - - qemu_register_reset(spapr_cpu_reset, cpu); -} - /* * Reset routine for LMB DR devices. * @@ -1708,11 +1713,11 @@ static void spapr_validate_node_memory(MachineState *machine, Error **errp) static void ppc_spapr_init(MachineState *machine) { sPAPRMachineState *spapr = SPAPR_MACHINE(machine); + MachineClass *mc = MACHINE_GET_CLASS(machine); sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); const char *kernel_filename = machine->kernel_filename; const char *kernel_cmdline = machine->kernel_cmdline; const char *initrd_filename = machine->initrd_filename; - PowerPCCPU *cpu; PCIHostState *phb; int i; MemoryRegion *sysmem = get_system_memory(); @@ -1726,6 +1731,22 @@ static void ppc_spapr_init(MachineState *machine) long load_limit, fw_size; bool kernel_le = false; char *filename; + int smt = kvmppc_smt_threads(); + int spapr_cores = smp_cpus / smp_threads; + int spapr_max_cores = max_cpus / smp_threads; + + if (mc->query_hotpluggable_cpus) { + if (smp_cpus % smp_threads) { + error_report("smp_cpus (%u) must be multiple of threads (%u)", + smp_cpus, smp_threads); + exit(1); + } + if (max_cpus % smp_threads) { + error_report("max_cpus (%u) must be multiple of threads (%u)", + max_cpus, smp_threads); + exit(1); + } + } msi_nonbroken = true; @@ -1759,6 +1780,13 @@ static void ppc_spapr_init(MachineState *machine) spapr->vrma_adjust = 1; spapr->rma_size = MIN(spapr->rma_size, 0x10000000); } + + /* Actually we don't support unbounded RMA anymore since we + * added proper emulation of HV mode. The max we can get is + * 16G which also happens to be what we configure for PAPR + * mode so make sure we don't do anything bigger than that + */ + spapr->rma_size = MIN(spapr->rma_size, 0x400000000ull); } if (spapr->rma_size > node0_size) { @@ -1771,10 +1799,9 @@ static void ppc_spapr_init(MachineState *machine) load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD; /* Set up Interrupt Controller before we create the VCPUs */ - spapr->icp = xics_system_init(machine, - DIV_ROUND_UP(max_cpus * kvmppc_smt_threads(), - smp_threads), - XICS_IRQS, &error_fatal); + spapr->xics = xics_system_init(machine, + DIV_ROUND_UP(max_cpus * smt, smp_threads), + XICS_IRQS_SPAPR, &error_fatal); if (smc->dr_lmb_enabled) { spapr_validate_node_memory(machine, &error_fatal); @@ -1784,13 +1811,46 @@ static void ppc_spapr_init(MachineState *machine) if (machine->cpu_model == NULL) { machine->cpu_model = kvm_enabled() ? "host" : "POWER7"; } - for (i = 0; i < smp_cpus; i++) { - cpu = cpu_ppc_init(machine->cpu_model); - if (cpu == NULL) { - error_report("Unable to find PowerPC CPU definition"); + + ppc_cpu_parse_features(machine->cpu_model); + + if (mc->query_hotpluggable_cpus) { + char *type = spapr_get_cpu_core_type(machine->cpu_model); + + if (type == NULL) { + error_report("Unable to find sPAPR CPU Core definition"); exit(1); } - spapr_cpu_init(spapr, cpu, &error_fatal); + + spapr->cores = g_new0(Object *, spapr_max_cores); + for (i = 0; i < spapr_max_cores; i++) { + int core_id = i * smp_threads; + sPAPRDRConnector *drc = + spapr_dr_connector_new(OBJECT(spapr), + SPAPR_DR_CONNECTOR_TYPE_CPU, + (core_id / smp_threads) * smt); + + qemu_register_reset(spapr_drc_reset, drc); + + if (i < spapr_cores) { + Object *core = object_new(type); + object_property_set_int(core, smp_threads, "nr-threads", + &error_fatal); + object_property_set_int(core, core_id, CPU_CORE_PROP_CORE_ID, + &error_fatal); + object_property_set_bool(core, true, "realized", &error_fatal); + } + } + g_free(type); + } else { + for (i = 0; i < smp_cpus; i++) { + PowerPCCPU *cpu = cpu_ppc_init(machine->cpu_model); + if (cpu == NULL) { + error_report("Unable to find PowerPC CPU definition"); + exit(1); + } + spapr_cpu_init(spapr, cpu, &error_fatal); + } } if (kvm_enabled()) { @@ -1815,11 +1875,21 @@ static void ppc_spapr_init(MachineState *machine) /* initialize hotplug memory address space */ if (machine->ram_size < machine->maxram_size) { ram_addr_t hotplug_mem_size = machine->maxram_size - machine->ram_size; + /* + * Limit the number of hotpluggable memory slots to half the number + * slots that KVM supports, leaving the other half for PCI and other + * devices. However ensure that number of slots doesn't drop below 32. + */ + int max_memslots = kvm_enabled() ? kvm_get_max_memslots() / 2 : + SPAPR_MAX_RAM_SLOTS; - if (machine->ram_slots > SPAPR_MAX_RAM_SLOTS) { + if (max_memslots < SPAPR_MAX_RAM_SLOTS) { + max_memslots = SPAPR_MAX_RAM_SLOTS; + } + if (machine->ram_slots > max_memslots) { error_report("Specified number of memory slots %" PRIu64" exceeds max supported %d", - machine->ram_slots, SPAPR_MAX_RAM_SLOTS); + machine->ram_slots, max_memslots); exit(1); } @@ -1841,6 +1911,10 @@ static void ppc_spapr_init(MachineState *machine) exit(1); } spapr->rtas_size = get_image_size(filename); + if (spapr->rtas_size < 0) { + error_report("Could not get size of LPAR rtas '%s'", filename); + exit(1); + } spapr->rtas_blob = g_malloc(spapr->rtas_size); if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) { error_report("Could not load LPAR rtas '%s'", filename); @@ -2131,15 +2205,6 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size, int i, fdt_offset, fdt_size; void *fdt; - /* - * Check for DRC connectors and send hotplug notification to the - * guest only in case of hotplugged memory. This allows cold plugged - * memory to be specified at boot time. - */ - if (!dev->hotplugged) { - return; - } - for (i = 0; i < nr_lmbs; i++) { drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, addr/SPAPR_MEMORY_BLOCK_SIZE); @@ -2153,7 +2218,12 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size, drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, errp); addr += SPAPR_MEMORY_BLOCK_SIZE; } - spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, nr_lmbs); + /* send hotplug notification to the + * guest only in case of hotplugged memory + */ + if (dev->hotplugged) { + spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, nr_lmbs); + } } static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, @@ -2191,6 +2261,27 @@ out: error_propagate(errp, local_err); } +void *spapr_populate_hotplug_cpu_dt(CPUState *cs, int *fdt_offset, + sPAPRMachineState *spapr) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + DeviceClass *dc = DEVICE_GET_CLASS(cs); + int id = ppc_get_vcpu_dt_id(cpu); + void *fdt; + int offset, fdt_size; + char *nodename; + + fdt = create_device_tree(&fdt_size); + nodename = g_strdup_printf("%s@%x", dc->fw_name, id); + offset = fdt_add_subnode(fdt, 0, nodename); + + spapr_populate_cpu_dt(cs, fdt, offset, spapr); + g_free(nodename); + + *fdt_offset = offset; + return fdt; +} + static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -2235,21 +2326,40 @@ static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, } spapr_memory_plug(hotplug_dev, dev, node, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { + spapr_core_plug(hotplug_dev, dev, errp); } } static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { + MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { error_setg(errp, "Memory hot unplug not supported by sPAPR"); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { + if (!mc->query_hotpluggable_cpus) { + error_setg(errp, "CPU hot unplug not supported on this machine"); + return; + } + spapr_core_unplug(hotplug_dev, dev, errp); + } +} + +static void spapr_machine_device_pre_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { + spapr_core_pre_plug(hotplug_dev, dev, errp); } } static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine, DeviceState *dev) { - if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || + object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { return HOTPLUG_HANDLER(machine); } return NULL; @@ -2262,6 +2372,37 @@ static unsigned spapr_cpu_index_to_socket_id(unsigned cpu_index) return cpu_index / smp_threads / smp_cores; } +static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine) +{ + int i; + HotpluggableCPUList *head = NULL; + sPAPRMachineState *spapr = SPAPR_MACHINE(machine); + int spapr_max_cores = max_cpus / smp_threads; + + for (i = 0; i < spapr_max_cores; i++) { + HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1); + HotpluggableCPU *cpu_item = g_new0(typeof(*cpu_item), 1); + CpuInstanceProperties *cpu_props = g_new0(typeof(*cpu_props), 1); + + cpu_item->type = spapr_get_cpu_core_type(machine->cpu_model); + cpu_item->vcpus_count = smp_threads; + cpu_props->has_core_id = true; + cpu_props->core_id = i * smp_threads; + /* TODO: add 'has_node/node' here to describe + to which node core belongs */ + + cpu_item->props = cpu_props; + if (spapr->cores[i]) { + cpu_item->has_qom_path = true; + cpu_item->qom_path = object_get_canonical_path(spapr->cores[i]); + } + list_item->value = cpu_item; + list_item->next = head; + head = list_item; + } + return head; +} + static void spapr_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -2288,11 +2429,13 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) mc->has_dynamic_sysbus = true; mc->pci_allow_0_address = true; mc->get_hotplug_handler = spapr_get_hotpug_handler; + hc->pre_plug = spapr_machine_device_pre_plug; hc->plug = spapr_machine_device_plug; hc->unplug = spapr_machine_device_unplug; mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id; smc->dr_lmb_enabled = true; + mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus; fwc->get_dev_path = spapr_get_fw_dev_path; nc->nmi_monitor_handler = spapr_nmi; } @@ -2343,18 +2486,42 @@ static const TypeInfo spapr_machine_info = { type_init(spapr_machine_register_##suffix) /* + * pseries-2.7 + */ +static void spapr_machine_2_7_instance_options(MachineState *machine) +{ +} + +static void spapr_machine_2_7_class_options(MachineClass *mc) +{ + /* Defaults for the latest behaviour inherited from the base class */ +} + +DEFINE_SPAPR_MACHINE(2_7, "2.7", true); + +/* * pseries-2.6 */ +#define SPAPR_COMPAT_2_6 \ + HW_COMPAT_2_6 \ + { \ + .driver = TYPE_SPAPR_PCI_HOST_BRIDGE,\ + .property = "ddw",\ + .value = stringify(off),\ + }, + static void spapr_machine_2_6_instance_options(MachineState *machine) { } static void spapr_machine_2_6_class_options(MachineClass *mc) { - /* Defaults for the latest behaviour inherited from the base class */ + spapr_machine_2_7_class_options(mc); + mc->query_hotpluggable_cpus = NULL; + SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_6); } -DEFINE_SPAPR_MACHINE(2_6, "2.6", true); +DEFINE_SPAPR_MACHINE(2_6, "2.6", false); /* * pseries-2.5 @@ -2386,7 +2553,6 @@ DEFINE_SPAPR_MACHINE(2_5, "2.5", false); * pseries-2.4 */ #define SPAPR_COMPAT_2_4 \ - SPAPR_COMPAT_2_5 \ HW_COMPAT_2_4 static void spapr_machine_2_4_instance_options(MachineState *machine) @@ -2409,7 +2575,6 @@ DEFINE_SPAPR_MACHINE(2_4, "2.4", false); * pseries-2.3 */ #define SPAPR_COMPAT_2_3 \ - SPAPR_COMPAT_2_4 \ HW_COMPAT_2_3 \ {\ .driver = "spapr-pci-host-bridge",\ @@ -2437,7 +2602,6 @@ DEFINE_SPAPR_MACHINE(2_3, "2.3", false); */ #define SPAPR_COMPAT_2_2 \ - SPAPR_COMPAT_2_3 \ HW_COMPAT_2_2 \ {\ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE,\ @@ -2462,7 +2626,6 @@ DEFINE_SPAPR_MACHINE(2_2, "2.2", false); * pseries-2.1 */ #define SPAPR_COMPAT_2_1 \ - SPAPR_COMPAT_2_2 \ HW_COMPAT_2_1 static void spapr_machine_2_1_instance_options(MachineState *machine) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c new file mode 100644 index 0000000000..bcb483dbe6 --- /dev/null +++ b/hw/ppc/spapr_cpu_core.c @@ -0,0 +1,432 @@ +/* + * sPAPR CPU core device, acts as container of CPU thread devices. + * + * Copyright (C) 2016 Bharata B Rao <bharata@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "hw/cpu/core.h" +#include "hw/ppc/spapr_cpu_core.h" +#include "target-ppc/cpu.h" +#include "hw/ppc/spapr.h" +#include "hw/boards.h" +#include "qapi/error.h" +#include "sysemu/cpus.h" +#include "target-ppc/kvm_ppc.h" +#include "hw/ppc/ppc.h" +#include "target-ppc/mmu-hash64.h" +#include "sysemu/numa.h" + +static void spapr_cpu_reset(void *opaque) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + PowerPCCPU *cpu = opaque; + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + + cpu_reset(cs); + + /* All CPUs start halted. CPU0 is unhalted from the machine level + * reset code and the rest are explicitly started up by the guest + * using an RTAS call */ + cs->halted = 1; + + env->spr[SPR_HIOR] = 0; + + ppc_hash64_set_external_hpt(cpu, spapr->htab, spapr->htab_shift, + &error_fatal); +} + +static void spapr_cpu_destroy(PowerPCCPU *cpu) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + + xics_cpu_destroy(spapr->xics, cpu); + qemu_unregister_reset(spapr_cpu_reset, cpu); +} + +void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, Error **errp) +{ + CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); + int i; + + /* Set time-base frequency to 512 MHz */ + cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ); + + /* Enable PAPR mode in TCG or KVM */ + cpu_ppc_set_papr(cpu); + + if (cpu->max_compat) { + Error *local_err = NULL; + + ppc_set_compat(cpu, cpu->max_compat, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + + /* Set NUMA node for the added CPUs */ + for (i = 0; i < nb_numa_nodes; i++) { + if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) { + cs->numa_node = i; + break; + } + } + + xics_cpu_setup(spapr->xics, cpu); + + qemu_register_reset(spapr_cpu_reset, cpu); + spapr_cpu_reset(cpu); +} + +/* + * Return the sPAPR CPU core type for @model which essentially is the CPU + * model specified with -cpu cmdline option. + */ +char *spapr_get_cpu_core_type(const char *model) +{ + char *core_type; + gchar **model_pieces = g_strsplit(model, ",", 2); + + core_type = g_strdup_printf("%s-%s", model_pieces[0], TYPE_SPAPR_CPU_CORE); + g_strfreev(model_pieces); + + /* Check whether it exists or whether we have to look up an alias name */ + if (!object_class_by_name(core_type)) { + const char *realmodel; + + g_free(core_type); + realmodel = ppc_cpu_lookup_alias(model); + if (realmodel) { + return spapr_get_cpu_core_type(realmodel); + } + return NULL; + } + + return core_type; +} + +static void spapr_core_release(DeviceState *dev, void *opaque) +{ + sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev)); + const char *typename = object_class_get_name(sc->cpu_class); + size_t size = object_type_get_instance_size(typename); + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + CPUCore *cc = CPU_CORE(dev); + int i; + + for (i = 0; i < cc->nr_threads; i++) { + void *obj = sc->threads + i * size; + DeviceState *dev = DEVICE(obj); + CPUState *cs = CPU(dev); + PowerPCCPU *cpu = POWERPC_CPU(cs); + + spapr_cpu_destroy(cpu); + cpu_remove_sync(cs); + object_unparent(obj); + } + + spapr->cores[cc->core_id / smp_threads] = NULL; + + g_free(sc->threads); + object_unparent(OBJECT(dev)); +} + +void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + CPUCore *cc = CPU_CORE(dev); + int smt = kvmppc_smt_threads(); + int index = cc->core_id / smp_threads; + sPAPRDRConnector *drc = + spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index * smt); + sPAPRDRConnectorClass *drck; + Error *local_err = NULL; + + if (index == 0) { + error_setg(errp, "Boot CPU core may not be unplugged"); + return; + } + + g_assert(drc); + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + drck->detach(drc, dev, spapr_core_release, NULL, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + spapr_hotplug_req_remove_by_index(drc); +} + +void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev)); + sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev)); + CPUCore *cc = CPU_CORE(dev); + CPUState *cs = CPU(core->threads); + sPAPRDRConnector *drc; + sPAPRDRConnectorClass *drck; + Error *local_err = NULL; + void *fdt = NULL; + int fdt_offset = 0; + int index = cc->core_id / smp_threads; + int smt = kvmppc_smt_threads(); + + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index * smt); + spapr->cores[index] = OBJECT(dev); + + g_assert(drc); + + /* + * Setup CPU DT entries only for hotplugged CPUs. For boot time or + * coldplugged CPUs DT entries are setup in spapr_finalize_fdt(). + */ + if (dev->hotplugged) { + fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr); + } + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, &local_err); + if (local_err) { + g_free(fdt); + spapr->cores[index] = NULL; + error_propagate(errp, local_err); + return; + } + + if (dev->hotplugged) { + /* + * Send hotplug notification interrupt to the guest only in case + * of hotplugged CPUs. + */ + spapr_hotplug_req_add_by_index(drc); + } else { + /* + * Set the right DRC states for cold plugged CPU. + */ + drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_USABLE); + drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_UNISOLATED); + } +} + +void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + MachineState *machine = MACHINE(OBJECT(hotplug_dev)); + MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev); + sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev)); + int spapr_max_cores = max_cpus / smp_threads; + int index; + Error *local_err = NULL; + CPUCore *cc = CPU_CORE(dev); + char *base_core_type = spapr_get_cpu_core_type(machine->cpu_model); + const char *type = object_get_typename(OBJECT(dev)); + + if (!mc->query_hotpluggable_cpus) { + error_setg(&local_err, "CPU hotplug not supported for this machine"); + goto out; + } + + if (strcmp(base_core_type, type)) { + error_setg(&local_err, "CPU core type should be %s", base_core_type); + goto out; + } + + if (cc->nr_threads != smp_threads) { + error_setg(&local_err, "threads must be %d", smp_threads); + goto out; + } + + if (cc->core_id % smp_threads) { + error_setg(&local_err, "invalid core id %d", cc->core_id); + goto out; + } + + index = cc->core_id / smp_threads; + if (index < 0 || index >= spapr_max_cores) { + error_setg(&local_err, "core id %d out of range", cc->core_id); + goto out; + } + + if (spapr->cores[index]) { + error_setg(&local_err, "core %d already populated", cc->core_id); + goto out; + } + +out: + g_free(base_core_type); + error_propagate(errp, local_err); +} + +static void spapr_cpu_core_realize_child(Object *child, Error **errp) +{ + Error *local_err = NULL; + sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + CPUState *cs = CPU(child); + PowerPCCPU *cpu = POWERPC_CPU(cs); + + object_property_set_bool(child, true, "realized", &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + spapr_cpu_init(spapr, cpu, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } +} + +static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) +{ + sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev)); + CPUCore *cc = CPU_CORE(OBJECT(dev)); + const char *typename = object_class_get_name(sc->cpu_class); + size_t size = object_type_get_instance_size(typename); + Error *local_err = NULL; + void *obj; + int i, j; + + sc->threads = g_malloc0(size * cc->nr_threads); + for (i = 0; i < cc->nr_threads; i++) { + char id[32]; + CPUState *cs; + + obj = sc->threads + i * size; + + object_initialize(obj, size, typename); + cs = CPU(obj); + cs->cpu_index = cc->core_id + i; + snprintf(id, sizeof(id), "thread[%d]", i); + object_property_add_child(OBJECT(sc), id, obj, &local_err); + if (local_err) { + goto err; + } + object_unref(obj); + } + + for (j = 0; j < cc->nr_threads; j++) { + obj = sc->threads + j * size; + + spapr_cpu_core_realize_child(obj, &local_err); + if (local_err) { + goto err; + } + } + return; + +err: + while (--i >= 0) { + obj = sc->threads + i * size; + object_unparent(obj); + } + g_free(sc->threads); + error_propagate(errp, local_err); +} + +static void spapr_cpu_core_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + dc->realize = spapr_cpu_core_realize; +} + +/* + * instance_init routines from different flavours of sPAPR CPU cores. + */ +#define SPAPR_CPU_CORE_INITFN(_type, _fname) \ +static void glue(glue(spapr_cpu_core_, _fname), _initfn(Object *obj)) \ +{ \ + sPAPRCPUCore *core = SPAPR_CPU_CORE(obj); \ + char *name = g_strdup_printf("%s-" TYPE_POWERPC_CPU, stringify(_type)); \ + ObjectClass *oc = object_class_by_name(name); \ + g_assert(oc); \ + g_free((void *)name); \ + core->cpu_class = oc; \ +} + +SPAPR_CPU_CORE_INITFN(970mp_v1.0, 970MP_v10); +SPAPR_CPU_CORE_INITFN(970mp_v1.1, 970MP_v11); +SPAPR_CPU_CORE_INITFN(970_v2.2, 970); +SPAPR_CPU_CORE_INITFN(POWER5+_v2.1, POWER5plus); +SPAPR_CPU_CORE_INITFN(POWER7_v2.3, POWER7); +SPAPR_CPU_CORE_INITFN(POWER7+_v2.1, POWER7plus); +SPAPR_CPU_CORE_INITFN(POWER8_v2.0, POWER8); +SPAPR_CPU_CORE_INITFN(POWER8E_v2.1, POWER8E); +SPAPR_CPU_CORE_INITFN(POWER8NVL_v1.0, POWER8NVL); + +typedef struct SPAPRCoreInfo { + const char *name; + void (*initfn)(Object *obj); +} SPAPRCoreInfo; + +static const SPAPRCoreInfo spapr_cores[] = { + /* 970 */ + { .name = "970_v2.2", .initfn = spapr_cpu_core_970_initfn }, + + /* 970MP variants */ + { .name = "970MP_v1.0", .initfn = spapr_cpu_core_970MP_v10_initfn }, + { .name = "970mp_v1.0", .initfn = spapr_cpu_core_970MP_v10_initfn }, + { .name = "970MP_v1.1", .initfn = spapr_cpu_core_970MP_v11_initfn }, + { .name = "970mp_v1.1", .initfn = spapr_cpu_core_970MP_v11_initfn }, + + /* POWER5+ */ + { .name = "POWER5+_v2.1", .initfn = spapr_cpu_core_POWER5plus_initfn }, + + /* POWER7 */ + { .name = "POWER7_v2.3", .initfn = spapr_cpu_core_POWER7_initfn }, + + /* POWER7+ */ + { .name = "POWER7+_v2.1", .initfn = spapr_cpu_core_POWER7plus_initfn }, + + /* POWER8 */ + { .name = "POWER8_v2.0", .initfn = spapr_cpu_core_POWER8_initfn }, + + /* POWER8E */ + { .name = "POWER8E_v2.1", .initfn = spapr_cpu_core_POWER8E_initfn }, + + /* POWER8NVL */ + { .name = "POWER8NVL_v1.0", .initfn = spapr_cpu_core_POWER8NVL_initfn }, + + { .name = NULL } +}; + +static void spapr_cpu_core_register(const SPAPRCoreInfo *info) +{ + TypeInfo type_info = { + .parent = TYPE_SPAPR_CPU_CORE, + .instance_size = sizeof(sPAPRCPUCore), + .instance_init = info->initfn, + }; + + type_info.name = g_strdup_printf("%s-" TYPE_SPAPR_CPU_CORE, info->name); + type_register(&type_info); + g_free((void *)type_info.name); +} + +static const TypeInfo spapr_cpu_core_type_info = { + .name = TYPE_SPAPR_CPU_CORE, + .parent = TYPE_CPU_CORE, + .abstract = true, + .instance_size = sizeof(sPAPRCPUCore), + .class_init = spapr_cpu_core_class_init, +}; + +static void spapr_cpu_core_register_types(void) +{ + const SPAPRCoreInfo *info = spapr_cores; + + type_register_static(&spapr_cpu_core_type_info); + while (info->name) { + spapr_cpu_core_register(info); + info++; + } +} + +type_init(spapr_cpu_core_register_types) diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 1f5f1d790a..26a067951c 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -140,6 +140,8 @@ static uint32_t set_allocation_state(sPAPRDRConnector *drc, DPRINTFN("finalizing device removal"); drck->detach(drc, DEVICE(drc->dev), drc->detach_cb, drc->detach_cb_opaque, NULL); + } else if (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) { + drc->awaiting_allocation = false; } } return RTAS_OUT_SUCCESS; @@ -269,11 +271,7 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name, void *fdt; if (!drc->fdt) { - visit_start_struct(v, name, NULL, 0, &err); - if (!err) { - visit_end_struct(v, &err); - } - error_propagate(errp, err); + visit_type_null(v, NULL, errp); return; } @@ -301,7 +299,8 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name, case FDT_END_NODE: /* shouldn't ever see an FDT_END_NODE before FDT_BEGIN_NODE */ g_assert(fdt_depth > 0); - visit_end_struct(v, &err); + visit_check_struct(v, &err); + visit_end_struct(v, NULL); if (err) { error_propagate(errp, err); return; @@ -312,7 +311,7 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name, int i; prop = fdt_get_property_by_offset(fdt, fdt_offset, &prop_len); name = fdt_string(fdt, fdt32_to_cpu(prop->nameoff)); - visit_start_list(v, name, &err); + visit_start_list(v, name, NULL, 0, &err); if (err) { error_propagate(errp, err); return; @@ -324,7 +323,7 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name, return; } } - visit_end_list(v); + visit_end_list(v, NULL); break; } default: @@ -376,6 +375,10 @@ static void attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt, drc->signalled = (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI) ? true : coldplug; + if (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI) { + drc->awaiting_allocation = true; + } + object_property_add_link(OBJECT(drc), "device", object_get_typename(OBJECT(drc->dev)), (Object **)(&drc->dev), @@ -424,6 +427,12 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d, return; } + if (drc->awaiting_allocation) { + drc->awaiting_release = true; + DPRINTFN("awaiting allocation to complete before removal"); + return; + } + drc->indicator_state = SPAPR_DR_INDICATOR_STATE_INACTIVE; if (drc->detach_cb) { diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 049fb1b325..b0668b34a9 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -386,7 +386,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque) rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true); - qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq)); + qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); } static void spapr_hotplug_set_signalled(uint32_t drc_index) @@ -449,6 +449,9 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, case SPAPR_DR_CONNECTOR_TYPE_LMB: hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_MEMORY; break; + case SPAPR_DR_CONNECTOR_TYPE_CPU: + hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_CPU; + break; default: /* we shouldn't be signaling hotplug events for resources * that don't support them @@ -465,7 +468,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true); - qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq)); + qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); } void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc) @@ -548,7 +551,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr, * interrupts. */ if (rtas_event_log_contains(mask, true)) { - qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq)); + qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq)); } return; @@ -600,7 +603,7 @@ out_no_events: void spapr_events_init(sPAPRMachineState *spapr) { QTAILQ_INIT(&spapr->pending_events); - spapr->check_exception_irq = xics_alloc(spapr->icp, 0, 0, false, + spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, 0, false, &error_fatal); spapr->epow_notifier.notify = spapr_powerdown_req; qemu_register_powerdown_notifier(&spapr->epow_notifier); diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 8f40602a5e..73af112e1d 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -1,12 +1,15 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "sysemu/sysemu.h" +#include "qemu/log.h" #include "cpu.h" +#include "exec/exec-all.h" #include "helper_regs.h" #include "hw/ppc/spapr.h" #include "mmu-hash64.h" #include "cpu-models.h" #include "trace.h" +#include "sysemu/kvm.h" #include "kvm_ppc.h" struct SPRSyncState { @@ -80,12 +83,12 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong pte_index = args[1]; target_ulong pteh = args[2]; target_ulong ptel = args[3]; - unsigned apshift, spshift; + unsigned apshift; target_ulong raddr; target_ulong index; uint64_t token; - apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel, &spshift); + apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel); if (!apshift) { /* Bad page size encoding */ return H_PARAMETER; @@ -99,11 +102,15 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr, return H_PARAMETER; } } else { + target_ulong wimg_flags; /* Looks like an IO address */ /* FIXME: What WIMG combinations could be sensible for IO? * For now we allow WIMG=010x, but are there others? */ /* FIXME: Should we check against registered IO addresses? */ - if ((ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M)) != HPTE64_R_I) { + wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M)); + + if (wimg_flags != HPTE64_R_I && + wimg_flags != (HPTE64_R_I | HPTE64_R_M)) { return H_PARAMETER; } } @@ -183,6 +190,7 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu, target_ulong ptex, static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { + CPUPPCState *env = &cpu->env; target_ulong flags = args[0]; target_ulong pte_index = args[1]; target_ulong avpn = args[2]; @@ -193,6 +201,7 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr, switch (ret) { case REMOVE_SUCCESS: + check_tlb_flush(env); return H_SUCCESS; case REMOVE_NOT_FOUND: @@ -229,7 +238,9 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr, static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { + CPUPPCState *env = &cpu->env; int i; + target_ulong rc = H_SUCCESS; for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { target_ulong *tsh = &args[i*2]; @@ -262,14 +273,18 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr, break; case REMOVE_PARM: - return H_PARAMETER; + rc = H_PARAMETER; + goto exit; case REMOVE_HW: - return H_HARDWARE; + rc = H_HARDWARE; + goto exit; } } + exit: + check_tlb_flush(env); - return H_SUCCESS; + return rc; } static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr, @@ -911,6 +926,41 @@ static void do_set_compat(void *arg) ((cpuver) == CPU_POWERPC_LOGICAL_2_06_PLUS) ? 2061 : \ ((cpuver) == CPU_POWERPC_LOGICAL_2_07) ? 2070 : 0) +static void cas_handle_compat_cpu(PowerPCCPUClass *pcc, uint32_t pvr, + unsigned max_lvl, unsigned *compat_lvl, + unsigned *cpu_version) +{ + unsigned lvl = get_compat_level(pvr); + bool is205, is206, is207; + + if (!lvl) { + return; + } + + /* If it is a logical PVR, try to determine the highest level */ + is205 = (pcc->pcr_supported & PCR_COMPAT_2_05) && + (lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_05)); + is206 = (pcc->pcr_supported & PCR_COMPAT_2_06) && + ((lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_06)) || + (lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_06_PLUS))); + is207 = (pcc->pcr_supported & PCR_COMPAT_2_07) && + (lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_07)); + + if (is205 || is206 || is207) { + if (!max_lvl) { + /* User did not set the level, choose the highest */ + if (*compat_lvl <= lvl) { + *compat_lvl = lvl; + *cpu_version = pvr; + } + } else if (max_lvl >= lvl) { + /* User chose the level, don't set higher than this */ + *compat_lvl = lvl; + *cpu_version = pvr; + } + } +} + #define OV5_DRCONF_MEMORY 0x20 static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, @@ -920,7 +970,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, { target_ulong list = ppc64_phys_to_real(args[0]); target_ulong ov_table, ov5; - PowerPCCPUClass *pcc_ = POWERPC_CPU_GET_CLASS(cpu_); + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu_); CPUState *cs; bool cpu_match = false, cpu_update = true, memory_update = false; unsigned old_cpu_version = cpu_->cpu_version; @@ -947,29 +997,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, cpu_match = true; cpu_version = cpu_->cpu_version; } else if (!cpu_match) { - /* If it is a logical PVR, try to determine the highest level */ - unsigned lvl = get_compat_level(pvr); - if (lvl) { - bool is205 = (pcc_->pcr_mask & PCR_COMPAT_2_05) && - (lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_05)); - bool is206 = (pcc_->pcr_mask & PCR_COMPAT_2_06) && - ((lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_06)) || - (lvl == get_compat_level(CPU_POWERPC_LOGICAL_2_06_PLUS))); - - if (is205 || is206) { - if (!max_lvl) { - /* User did not set the level, choose the highest */ - if (compat_lvl <= lvl) { - compat_lvl = lvl; - cpu_version = pvr; - } - } else if (max_lvl >= lvl) { - /* User chose the level, don't set higher than this */ - compat_lvl = lvl; - cpu_version = pvr; - } - } - } + cas_handle_compat_cpu(pcc, pvr, max_lvl, &compat_lvl, &cpu_version); } /* Terminator record */ if (~pvr_mask & pvr) { @@ -979,7 +1007,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_, /* Parsing finished */ trace_spapr_cas_pvr(cpu_->cpu_version, cpu_match, - cpu_version, pcc_->pcr_mask); + cpu_version, pcc->pcr_mask); /* Update CPUs */ if (old_cpu_version != cpu_version) { diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index 7dd458846e..6bc4d4db33 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -17,7 +17,9 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include "qemu/osdep.h" +#include "qemu/error-report.h" #include "hw/hw.h" +#include "qemu/log.h" #include "sysemu/kvm.h" #include "hw/qdev.h" #include "kvm_ppc.h" @@ -75,6 +77,37 @@ static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce) } } +static uint64_t *spapr_tce_alloc_table(uint32_t liobn, + uint32_t page_shift, + uint32_t nb_table, + int *fd, + bool need_vfio) +{ + uint64_t *table = NULL; + uint64_t window_size = (uint64_t)nb_table << page_shift; + + if (kvm_enabled() && !(window_size >> 32)) { + table = kvmppc_create_spapr_tce(liobn, window_size, fd, need_vfio); + } + + if (!table) { + *fd = -1; + table = g_malloc0(nb_table * sizeof(uint64_t)); + } + + trace_spapr_iommu_new_table(liobn, table, *fd); + + return table; +} + +static void spapr_tce_free_table(uint64_t *table, int fd, uint32_t nb_table) +{ + if (!kvm_enabled() || + (kvmppc_remove_spapr_tce(table, fd, nb_table) != 0)) { + g_free(table); + } +} + /* Called from RCU critical section */ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, bool is_write) @@ -105,61 +138,131 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, return ret; } +static void spapr_tce_table_pre_save(void *opaque) +{ + sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque); + + tcet->mig_table = tcet->table; + tcet->mig_nb_table = tcet->nb_table; + + trace_spapr_iommu_pre_save(tcet->liobn, tcet->mig_nb_table, + tcet->bus_offset, tcet->page_shift); +} + +static uint64_t spapr_tce_get_min_page_size(MemoryRegion *iommu) +{ + sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu); + + return 1ULL << tcet->page_shift; +} + +static void spapr_tce_notify_started(MemoryRegion *iommu) +{ + spapr_tce_set_need_vfio(container_of(iommu, sPAPRTCETable, iommu), true); +} + +static void spapr_tce_notify_stopped(MemoryRegion *iommu) +{ + spapr_tce_set_need_vfio(container_of(iommu, sPAPRTCETable, iommu), false); +} + static int spapr_tce_table_post_load(void *opaque, int version_id) { sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque); + uint32_t old_nb_table = tcet->nb_table; + uint64_t old_bus_offset = tcet->bus_offset; + uint32_t old_page_shift = tcet->page_shift; if (tcet->vdev) { spapr_vio_set_bypass(tcet->vdev, tcet->bypass); } + if (tcet->mig_nb_table != tcet->nb_table) { + spapr_tce_table_disable(tcet); + } + + if (tcet->mig_nb_table) { + if (!tcet->nb_table) { + spapr_tce_table_enable(tcet, old_page_shift, old_bus_offset, + tcet->mig_nb_table); + } + + memcpy(tcet->table, tcet->mig_table, + tcet->nb_table * sizeof(tcet->table[0])); + + free(tcet->mig_table); + tcet->mig_table = NULL; + } + + trace_spapr_iommu_post_load(tcet->liobn, old_nb_table, tcet->nb_table, + tcet->bus_offset, tcet->page_shift); + return 0; } +static bool spapr_tce_table_ex_needed(void *opaque) +{ + sPAPRTCETable *tcet = opaque; + + return tcet->bus_offset || tcet->page_shift != 0xC; +} + +static const VMStateDescription vmstate_spapr_tce_table_ex = { + .name = "spapr_iommu_ex", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_tce_table_ex_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(bus_offset, sPAPRTCETable), + VMSTATE_UINT32(page_shift, sPAPRTCETable), + VMSTATE_END_OF_LIST() + }, +}; + static const VMStateDescription vmstate_spapr_tce_table = { .name = "spapr_iommu", .version_id = 2, .minimum_version_id = 2, + .pre_save = spapr_tce_table_pre_save, .post_load = spapr_tce_table_post_load, .fields = (VMStateField []) { /* Sanity check */ VMSTATE_UINT32_EQUAL(liobn, sPAPRTCETable), - VMSTATE_UINT32_EQUAL(nb_table, sPAPRTCETable), /* IOMMU state */ + VMSTATE_UINT32(mig_nb_table, sPAPRTCETable), VMSTATE_BOOL(bypass, sPAPRTCETable), - VMSTATE_VARRAY_UINT32(table, sPAPRTCETable, nb_table, 0, vmstate_info_uint64, uint64_t), + VMSTATE_VARRAY_UINT32_ALLOC(mig_table, sPAPRTCETable, mig_nb_table, 0, + vmstate_info_uint64, uint64_t), VMSTATE_END_OF_LIST() }, + .subsections = (const VMStateDescription*[]) { + &vmstate_spapr_tce_table_ex, + NULL + } }; static MemoryRegionIOMMUOps spapr_iommu_ops = { .translate = spapr_tce_translate_iommu, + .get_min_page_size = spapr_tce_get_min_page_size, + .notify_started = spapr_tce_notify_started, + .notify_stopped = spapr_tce_notify_stopped, }; static int spapr_tce_table_realize(DeviceState *dev) { sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev); - uint64_t window_size = (uint64_t)tcet->nb_table << tcet->page_shift; - - if (kvm_enabled() && !(window_size >> 32)) { - tcet->table = kvmppc_create_spapr_tce(tcet->liobn, - window_size, - &tcet->fd, - tcet->need_vfio); - } - - if (!tcet->table) { - size_t table_size = tcet->nb_table * sizeof(uint64_t); - tcet->table = g_malloc0(table_size); - } + Object *tcetobj = OBJECT(tcet); + char tmp[32]; - trace_spapr_iommu_new_table(tcet->liobn, tcet, tcet->table, tcet->fd); + tcet->fd = -1; + tcet->need_vfio = false; + snprintf(tmp, sizeof(tmp), "tce-root-%x", tcet->liobn); + memory_region_init(&tcet->root, tcetobj, tmp, UINT64_MAX); - memory_region_init_iommu(&tcet->iommu, OBJECT(dev), &spapr_iommu_ops, - "iommu-spapr", - (uint64_t)tcet->nb_table << tcet->page_shift); + snprintf(tmp, sizeof(tmp), "tce-iommu-%x", tcet->liobn); + memory_region_init_iommu(&tcet->iommu, tcetobj, &spapr_iommu_ops, tmp, 0); QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list); @@ -201,14 +304,10 @@ void spapr_tce_set_need_vfio(sPAPRTCETable *tcet, bool need_vfio) tcet->table = newtable; } -sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, - uint64_t bus_offset, - uint32_t page_shift, - uint32_t nb_table, - bool need_vfio) +sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn) { sPAPRTCETable *tcet; - char tmp[64]; + char tmp[32]; if (spapr_tce_find_by_liobn(liobn)) { fprintf(stderr, "Attempted to create TCE table with duplicate" @@ -216,16 +315,8 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, return NULL; } - if (!nb_table) { - return NULL; - } - tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE)); tcet->liobn = liobn; - tcet->bus_offset = bus_offset; - tcet->page_shift = page_shift; - tcet->nb_table = nb_table; - tcet->need_vfio = need_vfio; snprintf(tmp, sizeof(tmp), "tce-table-%x", liobn); object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet), NULL); @@ -235,22 +326,58 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn, return tcet; } +void spapr_tce_table_enable(sPAPRTCETable *tcet, + uint32_t page_shift, uint64_t bus_offset, + uint32_t nb_table) +{ + if (tcet->nb_table) { + error_report("Warning: trying to enable already enabled TCE table"); + return; + } + + tcet->bus_offset = bus_offset; + tcet->page_shift = page_shift; + tcet->nb_table = nb_table; + tcet->table = spapr_tce_alloc_table(tcet->liobn, + tcet->page_shift, + tcet->nb_table, + &tcet->fd, + tcet->need_vfio); + + memory_region_set_size(&tcet->iommu, + (uint64_t)tcet->nb_table << tcet->page_shift); + memory_region_add_subregion(&tcet->root, tcet->bus_offset, &tcet->iommu); +} + +void spapr_tce_table_disable(sPAPRTCETable *tcet) +{ + if (!tcet->nb_table) { + return; + } + + memory_region_del_subregion(&tcet->root, &tcet->iommu); + memory_region_set_size(&tcet->iommu, 0); + + spapr_tce_free_table(tcet->table, tcet->fd, tcet->nb_table); + tcet->fd = -1; + tcet->table = NULL; + tcet->bus_offset = 0; + tcet->page_shift = 0; + tcet->nb_table = 0; +} + static void spapr_tce_table_unrealize(DeviceState *dev, Error **errp) { sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev); QLIST_REMOVE(tcet, list); - if (!kvm_enabled() || - (kvmppc_remove_spapr_tce(tcet->table, tcet->fd, - tcet->nb_table) != 0)) { - g_free(tcet->table); - } + spapr_tce_table_disable(tcet); } MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet) { - return &tcet->iommu; + return &tcet->root; } static void spapr_tce_reset(DeviceState *dev) @@ -258,7 +385,9 @@ static void spapr_tce_reset(DeviceState *dev) sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev); size_t table_size = tcet->nb_table * sizeof(uint64_t); - memset(tcet->table, 0, table_size); + if (tcet->nb_table) { + memset(tcet->table, 0, table_size); + } } static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba, @@ -277,7 +406,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba, tcet->table[index] = tce; entry.target_as = &address_space_memory, - entry.iova = ioba & page_mask; + entry.iova = (ioba - tcet->bus_offset) & page_mask; entry.translated_addr = tce & page_mask; entry.addr_mask = ~page_mask; entry.perm = spapr_tce_iommu_access_flags(tce); diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 573e635bfb..949c44fec8 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -35,6 +35,7 @@ #include "hw/ppc/spapr.h" #include "hw/pci-host/spapr.h" #include "exec/address-spaces.h" +#include "exec/ram_addr.h" #include <libfdt.h> #include "trace.h" #include "qemu/error-report.h" @@ -44,6 +45,8 @@ #include "hw/pci/pci_bus.h" #include "hw/ppc/spapr_drc.h" #include "sysemu/device_tree.h" +#include "sysemu/kvm.h" +#include "sysemu/hostmem.h" #include "hw/vfio/vfio.h" @@ -321,7 +324,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, return; } - xics_free(spapr->icp, msi->first_irq, msi->num); + xics_spapr_free(spapr->xics, msi->first_irq, msi->num); if (msi_present(pdev)) { spapr_msi_setmsg(pdev, 0, false, 0, 0); } @@ -359,7 +362,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, } /* Allocate MSIs */ - irq = xics_alloc_block(spapr->icp, 0, req_num, false, + irq = xics_spapr_alloc_block(spapr->xics, 0, req_num, false, ret_intr_type == RTAS_TYPE_MSI, &err); if (err) { error_reportf_err(err, "Can't allocate MSIs for device %x: ", @@ -370,7 +373,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, /* Release previous MSIs */ if (msi) { - xics_free(spapr->icp, msi->first_irq, msi->num); + xics_spapr_free(spapr->xics, msi->first_irq, msi->num); g_hash_table_remove(phb->msi, &config_addr); } @@ -732,7 +735,7 @@ static void spapr_msi_write(void *opaque, hwaddr addr, trace_spapr_pci_msi_write(addr, data, irq); - qemu_irq_pulse(xics_get_qirq(spapr->icp, irq)); + qemu_irq_pulse(xics_get_qirq(spapr->xics, irq)); } static const MemoryRegionOps spapr_msi_ops = { @@ -1086,19 +1089,11 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc, void *fdt = NULL; int fdt_start_offset = 0, fdt_size; - if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { - sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn); - - spapr_tce_set_need_vfio(tcet, true); - } - - if (dev->hotplugged) { - fdt = create_device_tree(&fdt_size); - fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0); - if (!fdt_start_offset) { - error_setg(errp, "Failed to create pci child device tree node"); - goto out; - } + fdt = create_device_tree(&fdt_size); + fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0); + if (!fdt_start_offset) { + error_setg(errp, "Failed to create pci child device tree node"); + goto out; } drck->attach(drc, DEVICE(pdev), @@ -1311,12 +1306,14 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) PCIBus *bus; uint64_t msi_window_size = 4096; sPAPRTCETable *tcet; - uint32_t nb_table; + const unsigned windows_supported = + sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1; if (sphb->index != (uint32_t)-1) { hwaddr windows_base; - if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn != (uint32_t)-1) + if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != (uint32_t)-1) + || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 2) || (sphb->mem_win_addr != (hwaddr)-1) || (sphb->io_win_addr != (hwaddr)-1)) { error_setg(errp, "Either \"index\" or other parameters must" @@ -1331,7 +1328,9 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) } sphb->buid = SPAPR_PCI_BASE_BUID + sphb->index; - sphb->dma_liobn = SPAPR_PCI_LIOBN(sphb->index, 0); + for (i = 0; i < windows_supported; ++i) { + sphb->dma_liobn[i] = SPAPR_PCI_LIOBN(sphb->index, i); + } windows_base = SPAPR_PCI_WINDOW_BASE + sphb->index * SPAPR_PCI_WINDOW_SPACING; @@ -1344,8 +1343,9 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) return; } - if (sphb->dma_liobn == (uint32_t)-1) { - error_setg(errp, "LIOBN not specified for PHB"); + if ((sphb->dma_liobn[0] == (uint32_t)-1) || + ((sphb->dma_liobn[1] == (uint32_t)-1) && (windows_supported > 1))) { + error_setg(errp, "LIOBN(s) not specified for PHB"); return; } @@ -1444,7 +1444,8 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) uint32_t irq; Error *local_err = NULL; - irq = xics_alloc_block(spapr->icp, 0, 1, true, false, &local_err); + irq = xics_spapr_alloc_block(spapr->xics, 0, 1, true, false, + &local_err); if (local_err) { error_propagate(errp, local_err); error_prepend(errp, "can't allocate LSIs: "); @@ -1463,19 +1464,18 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) } } - nb_table = sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT; - tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn, - 0, SPAPR_TCE_PAGE_SHIFT, nb_table, false); - if (!tcet) { - error_setg(errp, "Unable to create TCE table for %s", - sphb->dtbusname); - return; + /* DMA setup */ + for (i = 0; i < windows_supported; ++i) { + tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn[i]); + if (!tcet) { + error_setg(errp, "Creating window#%d failed for %s", + i, sphb->dtbusname); + return; + } + memory_region_add_subregion_overlap(&sphb->iommu_root, 0, + spapr_tce_get_iommu(tcet), 0); } - /* Register default 32bit DMA window */ - memory_region_add_subregion(&sphb->iommu_root, sphb->dma_win_addr, - spapr_tce_get_iommu(tcet)); - sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free); } @@ -1490,8 +1490,31 @@ static int spapr_phb_children_reset(Object *child, void *opaque) return 0; } +void spapr_phb_dma_reset(sPAPRPHBState *sphb) +{ + int i; + sPAPRTCETable *tcet; + + for (i = 0; i < SPAPR_PCI_DMA_MAX_WINDOWS; ++i) { + tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]); + + if (tcet && tcet->nb_table) { + spapr_tce_table_disable(tcet); + } + } + + /* Register default 32bit DMA window */ + tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[0]); + spapr_tce_table_enable(tcet, SPAPR_TCE_PAGE_SHIFT, sphb->dma_win_addr, + sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT); +} + static void spapr_phb_reset(DeviceState *qdev) { + sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev); + + spapr_phb_dma_reset(sphb); + /* Reset the IOMMU state */ object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL); @@ -1503,7 +1526,8 @@ static void spapr_phb_reset(DeviceState *qdev) static Property spapr_phb_properties[] = { DEFINE_PROP_UINT32("index", sPAPRPHBState, index, -1), DEFINE_PROP_UINT64("buid", sPAPRPHBState, buid, -1), - DEFINE_PROP_UINT32("liobn", sPAPRPHBState, dma_liobn, -1), + DEFINE_PROP_UINT32("liobn", sPAPRPHBState, dma_liobn[0], -1), + DEFINE_PROP_UINT32("liobn64", sPAPRPHBState, dma_liobn[1], -1), DEFINE_PROP_UINT64("mem_win_addr", sPAPRPHBState, mem_win_addr, -1), DEFINE_PROP_UINT64("mem_win_size", sPAPRPHBState, mem_win_size, SPAPR_PCI_MMIO_WIN_SIZE), @@ -1515,6 +1539,11 @@ static Property spapr_phb_properties[] = { /* Default DMA window is 0..1GB */ DEFINE_PROP_UINT64("dma_win_addr", sPAPRPHBState, dma_win_addr, 0), DEFINE_PROP_UINT64("dma_win_size", sPAPRPHBState, dma_win_size, 0x40000000), + DEFINE_PROP_UINT64("dma64_win_addr", sPAPRPHBState, dma64_win_addr, + 0x800000000000000ULL), + DEFINE_PROP_BOOL("ddw", sPAPRPHBState, ddw_enabled, true), + DEFINE_PROP_UINT64("pgsz", sPAPRPHBState, page_size_mask, + (1ULL << 12) | (1ULL << 16)), DEFINE_PROP_END_OF_LIST(), }; @@ -1591,7 +1620,7 @@ static const VMStateDescription vmstate_spapr_pci = { .post_load = spapr_pci_post_load, .fields = (VMStateField[]) { VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState), - VMSTATE_UINT32_EQUAL(dma_liobn, sPAPRPHBState), + VMSTATE_UINT32_EQUAL(dma_liobn[0], sPAPRPHBState), VMSTATE_UINT64_EQUAL(mem_win_addr, sPAPRPHBState), VMSTATE_UINT64_EQUAL(mem_win_size, sPAPRPHBState), VMSTATE_UINT64_EQUAL(io_win_addr, sPAPRPHBState), @@ -1625,7 +1654,6 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) dc->reset = spapr_phb_reset; dc->vmsd = &vmstate_spapr_pci; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); - dc->cannot_instantiate_with_device_add_yet = false; hp->plug = spapr_phb_hot_plug_child; hp->unplug = spapr_phb_hot_unplug_child; } @@ -1768,6 +1796,15 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t interrupt_map_mask[] = { cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)}; uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7]; + uint32_t ddw_applicable[] = { + cpu_to_be32(RTAS_IBM_QUERY_PE_DMA_WINDOW), + cpu_to_be32(RTAS_IBM_CREATE_PE_DMA_WINDOW), + cpu_to_be32(RTAS_IBM_REMOVE_PE_DMA_WINDOW) + }; + uint32_t ddw_extensions[] = { + cpu_to_be32(1), + cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW) + }; sPAPRTCETable *tcet; PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus; sPAPRFDT s_fdt; @@ -1790,7 +1827,15 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges)); _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg))); _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1)); - _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS)); + _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS_SPAPR)); + + /* Dynamic DMA window */ + if (phb->ddw_enabled) { + _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-applicable", &ddw_applicable, + sizeof(ddw_applicable))); + _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-extensions", + &ddw_extensions, sizeof(ddw_extensions))); + } /* Build the interrupt-map, this must matches what is done * in pci_spapr_map_irq @@ -1815,7 +1860,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map, sizeof(interrupt_map))); - tcet = spapr_tce_find_by_liobn(SPAPR_PCI_LIOBN(phb->index, 0)); + tcet = spapr_tce_find_by_liobn(phb->dma_liobn[0]); if (!tcet) { return -1; } diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index cbd3d23c91..8448e0b024 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -18,15 +18,16 @@ */ #include "qemu/osdep.h" +#include <linux/vfio.h> #include "qapi/error.h" #include "qemu-common.h" #include "cpu.h" #include "hw/ppc/spapr.h" #include "hw/pci-host/spapr.h" #include "hw/pci/msix.h" -#include "linux/vfio.h" #include "hw/vfio/vfio.h" #include "qemu/error-report.h" +#include "sysemu/qtest.h" #define TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE "spapr-pci-vfio-host-bridge" @@ -48,7 +49,9 @@ static Property spapr_phb_vfio_properties[] = { static void spapr_phb_vfio_instance_init(Object *obj) { - error_report("spapr-pci-vfio-host-bridge is deprecated"); + if (!qtest_enabled()) { + error_report("spapr-pci-vfio-host-bridge is deprecated"); + } } bool spapr_phb_eeh_available(sPAPRPHBState *sphb) diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index f07325831c..dc058e512b 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -26,14 +26,17 @@ */ #include "qemu/osdep.h" #include "cpu.h" +#include "qemu/log.h" #include "sysemu/sysemu.h" #include "sysemu/char.h" #include "hw/qdev.h" #include "sysemu/device_tree.h" #include "sysemu/cpus.h" +#include "sysemu/kvm.h" #include "hw/ppc/spapr.h" #include "hw/ppc/spapr_vio.h" +#include "hw/ppc/ppc.h" #include "qapi-event.h" #include "hw/boards.h" @@ -162,6 +165,27 @@ static void rtas_query_cpu_stopped_state(PowerPCCPU *cpu_, rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); } +/* + * Set the timebase offset of the CPU to that of first CPU. + * This helps hotplugged CPU to have the correct timebase offset. + */ +static void spapr_cpu_update_tb_offset(PowerPCCPU *cpu) +{ + PowerPCCPU *fcpu = POWERPC_CPU(first_cpu); + + cpu->env.tb_env->tb_offset = fcpu->env.tb_env->tb_offset; +} + +static void spapr_cpu_set_endianness(PowerPCCPU *cpu) +{ + PowerPCCPU *fcpu = POWERPC_CPU(first_cpu); + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(fcpu); + + if (!pcc->interrupts_big_endian(fcpu)) { + cpu->env.spr[SPR_LPCR] |= LPCR_ILE; + } +} + static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPRMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, @@ -198,6 +222,8 @@ static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPRMachineState *spapr, env->nip = start; env->gpr[3] = r3; cs->halted = 0; + spapr_cpu_set_endianness(cpu); + spapr_cpu_update_tb_offset(cpu); qemu_cpu_kick(cs); diff --git a/hw/ppc/spapr_rtas_ddw.c b/hw/ppc/spapr_rtas_ddw.c new file mode 100644 index 0000000000..177dcffc9b --- /dev/null +++ b/hw/ppc/spapr_rtas_ddw.c @@ -0,0 +1,295 @@ +/* + * QEMU sPAPR Dynamic DMA windows support + * + * Copyright (c) 2015 Alexey Kardashevskiy, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "qemu/error-report.h" +#include "hw/ppc/spapr.h" +#include "hw/pci-host/spapr.h" +#include "trace.h" + +static int spapr_phb_get_active_win_num_cb(Object *child, void *opaque) +{ + sPAPRTCETable *tcet; + + tcet = (sPAPRTCETable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE); + if (tcet && tcet->nb_table) { + ++*(unsigned *)opaque; + } + return 0; +} + +static unsigned spapr_phb_get_active_win_num(sPAPRPHBState *sphb) +{ + unsigned ret = 0; + + object_child_foreach(OBJECT(sphb), spapr_phb_get_active_win_num_cb, &ret); + + return ret; +} + +static int spapr_phb_get_free_liobn_cb(Object *child, void *opaque) +{ + sPAPRTCETable *tcet; + + tcet = (sPAPRTCETable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE); + if (tcet && !tcet->nb_table) { + *(uint32_t *)opaque = tcet->liobn; + return 1; + } + return 0; +} + +static unsigned spapr_phb_get_free_liobn(sPAPRPHBState *sphb) +{ + uint32_t liobn = 0; + + object_child_foreach(OBJECT(sphb), spapr_phb_get_free_liobn_cb, &liobn); + + return liobn; +} + +static uint32_t spapr_page_mask_to_query_mask(uint64_t page_mask) +{ + int i; + uint32_t mask = 0; + const struct { int shift; uint32_t mask; } masks[] = { + { 12, RTAS_DDW_PGSIZE_4K }, + { 16, RTAS_DDW_PGSIZE_64K }, + { 24, RTAS_DDW_PGSIZE_16M }, + { 25, RTAS_DDW_PGSIZE_32M }, + { 26, RTAS_DDW_PGSIZE_64M }, + { 27, RTAS_DDW_PGSIZE_128M }, + { 28, RTAS_DDW_PGSIZE_256M }, + { 34, RTAS_DDW_PGSIZE_16G }, + }; + + for (i = 0; i < ARRAY_SIZE(masks); ++i) { + if (page_mask & (1ULL << masks[i].shift)) { + mask |= masks[i].mask; + } + } + + return mask; +} + +static void rtas_ibm_query_pe_dma_window(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + sPAPRPHBState *sphb; + uint64_t buid, max_window_size; + uint32_t avail, addr, pgmask = 0; + MachineState *machine = MACHINE(spapr); + + if ((nargs != 3) || (nret != 5)) { + goto param_error_exit; + } + + buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + addr = rtas_ld(args, 0); + sphb = spapr_pci_find_phb(spapr, buid); + if (!sphb || !sphb->ddw_enabled) { + goto param_error_exit; + } + + /* Translate page mask to LoPAPR format */ + pgmask = spapr_page_mask_to_query_mask(sphb->page_size_mask); + + /* + * This is "Largest contiguous block of TCEs allocated specifically + * for (that is, are reserved for) this PE". + * Return the maximum number as maximum supported RAM size was in 4K pages. + */ + if (machine->ram_size == machine->maxram_size) { + max_window_size = machine->ram_size; + } else { + MemoryHotplugState *hpms = &spapr->hotplug_memory; + + max_window_size = hpms->base + memory_region_size(&hpms->mr); + } + + avail = SPAPR_PCI_DMA_MAX_WINDOWS - spapr_phb_get_active_win_num(sphb); + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + rtas_st(rets, 1, avail); + rtas_st(rets, 2, max_window_size >> SPAPR_TCE_PAGE_SHIFT); + rtas_st(rets, 3, pgmask); + rtas_st(rets, 4, 0); /* DMA migration mask, not supported */ + + trace_spapr_iommu_ddw_query(buid, addr, avail, max_window_size, pgmask); + return; + +param_error_exit: + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); +} + +static void rtas_ibm_create_pe_dma_window(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + sPAPRPHBState *sphb; + sPAPRTCETable *tcet = NULL; + uint32_t addr, page_shift, window_shift, liobn; + uint64_t buid, win_addr; + int windows; + + if ((nargs != 5) || (nret != 4)) { + goto param_error_exit; + } + + buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + addr = rtas_ld(args, 0); + sphb = spapr_pci_find_phb(spapr, buid); + if (!sphb || !sphb->ddw_enabled) { + goto param_error_exit; + } + + page_shift = rtas_ld(args, 3); + window_shift = rtas_ld(args, 4); + liobn = spapr_phb_get_free_liobn(sphb); + windows = spapr_phb_get_active_win_num(sphb); + + if (!(sphb->page_size_mask & (1ULL << page_shift)) || + (window_shift < page_shift)) { + goto param_error_exit; + } + + if (!liobn || !sphb->ddw_enabled || windows == SPAPR_PCI_DMA_MAX_WINDOWS) { + goto hw_error_exit; + } + + tcet = spapr_tce_find_by_liobn(liobn); + if (!tcet) { + goto hw_error_exit; + } + + win_addr = (windows == 0) ? sphb->dma_win_addr : sphb->dma64_win_addr; + spapr_tce_table_enable(tcet, page_shift, win_addr, + 1ULL << (window_shift - page_shift)); + if (!tcet->nb_table) { + goto hw_error_exit; + } + + trace_spapr_iommu_ddw_create(buid, addr, 1ULL << page_shift, + 1ULL << window_shift, tcet->bus_offset, liobn); + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + rtas_st(rets, 1, liobn); + rtas_st(rets, 2, tcet->bus_offset >> 32); + rtas_st(rets, 3, tcet->bus_offset & ((uint32_t) -1)); + + return; + +hw_error_exit: + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + +param_error_exit: + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); +} + +static void rtas_ibm_remove_pe_dma_window(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + sPAPRPHBState *sphb; + sPAPRTCETable *tcet; + uint32_t liobn; + + if ((nargs != 1) || (nret != 1)) { + goto param_error_exit; + } + + liobn = rtas_ld(args, 0); + tcet = spapr_tce_find_by_liobn(liobn); + if (!tcet) { + goto param_error_exit; + } + + sphb = SPAPR_PCI_HOST_BRIDGE(OBJECT(tcet)->parent); + if (!sphb || !sphb->ddw_enabled || !tcet->nb_table) { + goto param_error_exit; + } + + spapr_tce_table_disable(tcet); + trace_spapr_iommu_ddw_remove(liobn); + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + return; + +param_error_exit: + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); +} + +static void rtas_ibm_reset_pe_dma_window(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + sPAPRPHBState *sphb; + uint64_t buid; + uint32_t addr; + + if ((nargs != 3) || (nret != 1)) { + goto param_error_exit; + } + + buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + addr = rtas_ld(args, 0); + sphb = spapr_pci_find_phb(spapr, buid); + if (!sphb || !sphb->ddw_enabled) { + goto param_error_exit; + } + + spapr_phb_dma_reset(sphb); + trace_spapr_iommu_ddw_reset(buid, addr); + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + + return; + +param_error_exit: + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); +} + +static void spapr_rtas_ddw_init(void) +{ + spapr_rtas_register(RTAS_IBM_QUERY_PE_DMA_WINDOW, + "ibm,query-pe-dma-window", + rtas_ibm_query_pe_dma_window); + spapr_rtas_register(RTAS_IBM_CREATE_PE_DMA_WINDOW, + "ibm,create-pe-dma-window", + rtas_ibm_create_pe_dma_window); + spapr_rtas_register(RTAS_IBM_REMOVE_PE_DMA_WINDOW, + "ibm,remove-pe-dma-window", + rtas_ibm_remove_pe_dma_window); + spapr_rtas_register(RTAS_IBM_RESET_PE_DMA_WINDOW, + "ibm,reset-pe-dma-window", + rtas_ibm_reset_pe_dma_window); +} + +type_init(spapr_rtas_ddw_init) diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index 8aa021fde9..f93244d7c1 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -22,6 +22,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "hw/hw.h" +#include "qemu/log.h" #include "sysemu/sysemu.h" #include "hw/boards.h" #include "hw/loader.h" @@ -56,12 +57,9 @@ static char *spapr_vio_get_dev_name(DeviceState *qdev) { VIOsPAPRDevice *dev = VIO_SPAPR_DEVICE(qdev); VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); - char *name; /* Device tree style name device@reg */ - name = g_strdup_printf("%s@%x", pc->dt_name, dev->reg); - - return name; + return g_strdup_printf("%s@%x", pc->dt_name, dev->reg); } static void spapr_vio_bus_class_init(ObjectClass *klass, void *data) @@ -465,7 +463,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) dev->qdev.id = id; } - dev->irq = xics_alloc(spapr->icp, 0, dev->irq, false, &local_err); + dev->irq = xics_spapr_alloc(spapr->xics, 0, dev->irq, false, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -482,11 +480,9 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) memory_region_add_subregion_overlap(&dev->mrroot, 0, &dev->mrbypass, 1); address_space_init(&dev->as, &dev->mrroot, qdev->id); - dev->tcet = spapr_tce_new_table(qdev, liobn, - 0, - SPAPR_TCE_PAGE_SHIFT, - pc->rtce_window_size >> - SPAPR_TCE_PAGE_SHIFT, false); + dev->tcet = spapr_tce_new_table(qdev, liobn); + spapr_tce_table_enable(dev->tcet, SPAPR_TCE_PAGE_SHIFT, 0, + pc->rtce_window_size >> SPAPR_TCE_PAGE_SHIFT); dev->tcet->vdev = dev; memory_region_add_subregion_overlap(&dev->mrroot, 0, spapr_tce_get_iommu(dev->tcet), 2); @@ -584,7 +580,7 @@ const VMStateDescription vmstate_spapr_vio = { VMSTATE_UINT32_EQUAL(irq, VIOsPAPRDevice), /* General VIO device state */ - VMSTATE_UINTTL(signal_state, VIOsPAPRDevice), + VMSTATE_UINT64(signal_state, VIOsPAPRDevice), VMSTATE_UINT64(crq.qladdr, VIOsPAPRDevice), VMSTATE_UINT32(crq.qsize, VIOsPAPRDevice), VMSTATE_UINT32(crq.qnext, VIOsPAPRDevice), diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events new file mode 100644 index 0000000000..dfeab93089 --- /dev/null +++ b/hw/ppc/trace-events @@ -0,0 +1,43 @@ +# See docs/tracing.txt for syntax documentation. + +# hw/ppc/spapr_pci.c +spapr_pci_msi(const char *msg, uint32_t ca) "%s (cfg=%x)" +spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr) "dev\"%s\" vector %u, addr=%"PRIx64 +spapr_pci_rtas_ibm_change_msi(unsigned cfg, unsigned func, unsigned req, unsigned first) "cfgaddr %x func %u, requested %u, first irq %u" +spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) "queries for #%u, IRQ%u" +spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) "@%"PRIx64"<=%"PRIx64" IRQ %u" +spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u" +spapr_pci_msi_retry(unsigned config_addr, unsigned req_num, unsigned max_irqs) "Guest device at %x asked %u, have only %u" + +# hw/ppc/spapr.c +spapr_cas_failed(unsigned long n) "DT diff buffer is too small: %ld bytes" +spapr_cas_continue(unsigned long n) "Copy changes to the guest: %ld bytes" + +# hw/ppc/spapr_hcall.c +spapr_cas_pvr_try(uint32_t pvr) "%x" +spapr_cas_pvr(uint32_t cur_pvr, bool cpu_match, uint32_t new_pvr, uint64_t pcr) "current=%x, cpu_match=%u, new=%x, compat flags=%"PRIx64 + +# hw/ppc/spapr_iommu.c +spapr_iommu_put(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tce=0x%"PRIx64" ret=%"PRId64 +spapr_iommu_get(uint64_t liobn, uint64_t ioba, uint64_t ret, uint64_t tce) "liobn=%"PRIx64" ioba=0x%"PRIx64" ret=%"PRId64" tce=0x%"PRIx64 +spapr_iommu_indirect(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t iobaN, uint64_t tceN, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tcelist=0x%"PRIx64" iobaN=0x%"PRIx64" tceN=0x%"PRIx64" ret=%"PRId64 +spapr_iommu_stuff(uint64_t liobn, uint64_t ioba, uint64_t tce_value, uint64_t npages, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tcevalue=0x%"PRIx64" npages=%"PRId64" ret=%"PRId64 +spapr_iommu_pci_put(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tce=0x%"PRIx64" ret=%"PRId64 +spapr_iommu_pci_get(uint64_t liobn, uint64_t ioba, uint64_t ret, uint64_t tce) "liobn=%"PRIx64" ioba=0x%"PRIx64" ret=%"PRId64" tce=0x%"PRIx64 +spapr_iommu_pci_indirect(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t iobaN, uint64_t tceN, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tcelist=0x%"PRIx64" iobaN=0x%"PRIx64" tceN=0x%"PRIx64" ret=%"PRId64 +spapr_iommu_pci_stuff(uint64_t liobn, uint64_t ioba, uint64_t tce_value, uint64_t npages, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tcevalue=0x%"PRIx64" npages=%"PRId64" ret=%"PRId64 +spapr_iommu_xlate(uint64_t liobn, uint64_t ioba, uint64_t tce, unsigned perm, unsigned pgsize) "liobn=%"PRIx64" 0x%"PRIx64" -> 0x%"PRIx64" perm=%u mask=%x" +spapr_iommu_new_table(uint64_t liobn, void *table, int fd) "liobn=%"PRIx64" table=%p fd=%d" +spapr_iommu_pre_save(uint64_t liobn, uint32_t nb, uint64_t offs, uint32_t ps) "liobn=%"PRIx64" %"PRIx32" bus_offset=%"PRIx64" ps=%"PRIu32 +spapr_iommu_post_load(uint64_t liobn, uint32_t pre_nb, uint32_t post_nb, uint64_t offs, uint32_t ps) "liobn=%"PRIx64" %"PRIx32" => %"PRIx32" bus_offset=%"PRIx64" ps=%"PRIu32 +spapr_iommu_ddw_query(uint64_t buid, uint32_t cfgaddr, unsigned wa, uint64_t win_size, uint32_t pgmask) "buid=%"PRIx64" addr=%"PRIx32", %u windows available, max window size=%"PRIx64", mask=%"PRIx32 +spapr_iommu_ddw_create(uint64_t buid, uint32_t cfgaddr, uint64_t pg_size, uint64_t req_size, uint64_t start, uint32_t liobn) "buid=%"PRIx64" addr=%"PRIx32", page size=0x%"PRIx64", requested=0x%"PRIx64", start addr=%"PRIx64", liobn=%"PRIx32 +spapr_iommu_ddw_remove(uint32_t liobn) "liobn=%"PRIx32 +spapr_iommu_ddw_reset(uint64_t buid, uint32_t cfgaddr) "buid=%"PRIx64" addr=%"PRIx32 + +# hw/ppc/ppc.c +ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)" + +# hw/ppc/prep.c +prep_io_800_writeb(uint32_t addr, uint32_t val) "0x%08" PRIx32 " => 0x%02" PRIx32 +prep_io_800_readb(uint32_t addr, uint32_t retval) "0x%08" PRIx32 " <= 0x%02" PRIx32 diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c index b807a08c28..b97d96685c 100644 --- a/hw/ppc/virtex_ml507.c +++ b/hw/ppc/virtex_ml507.c @@ -23,6 +23,7 @@ */ #include "qemu/osdep.h" +#include "cpu.h" #include "hw/sysbus.h" #include "hw/hw.h" #include "hw/char/serial.h" |