diff options
Diffstat (limited to 'hw/spapr.c')
-rw-r--r-- | hw/spapr.c | 278 |
1 files changed, 211 insertions, 67 deletions
diff --git a/hw/spapr.c b/hw/spapr.c index 5a98d8651f..cca20f9a51 100644 --- a/hw/spapr.c +++ b/hw/spapr.c @@ -50,19 +50,29 @@ #include <libfdt.h> -#define KERNEL_LOAD_ADDR 0x00000000 -#define INITRD_LOAD_ADDR 0x02800000 +/* SLOF memory layout: + * + * SLOF raw image loaded at 0, copies its romfs right below the flat + * device-tree, then position SLOF itself 31M below that + * + * So we set FW_OVERHEAD to 40MB which should account for all of that + * and more + * + * We load our kernel at 4M, leaving space for SLOF initial image + */ #define FDT_MAX_SIZE 0x10000 #define RTAS_MAX_SIZE 0x10000 #define FW_MAX_SIZE 0x400000 #define FW_FILE_NAME "slof.bin" +#define FW_OVERHEAD 0x2800000 +#define KERNEL_LOAD_ADDR FW_MAX_SIZE -#define MIN_RMA_SLOF 128UL +#define MIN_RMA_SLOF 128UL #define TIMEBASE_FREQ 512000000ULL #define MAX_CPUS 256 -#define XICS_IRQS 1024 +#define XICS_IRQS 1024 #define SPAPR_PCI_BUID 0x800000020000001ULL #define SPAPR_PCI_MEM_WIN_ADDR (0x10000000000ULL + 0xA0000000) @@ -73,7 +83,8 @@ sPAPREnvironment *spapr; -qemu_irq spapr_allocate_irq(uint32_t hint, uint32_t *irq_num) +qemu_irq spapr_allocate_irq(uint32_t hint, uint32_t *irq_num, + enum xics_irq_type type) { uint32_t irq; qemu_irq qirq; @@ -85,7 +96,7 @@ qemu_irq spapr_allocate_irq(uint32_t hint, uint32_t *irq_num) irq = spapr->next_irq++; } - qirq = xics_find_qirq(spapr->icp, irq); + qirq = xics_assign_irq(spapr->icp, irq, type); if (!qirq) { return NULL; } @@ -97,19 +108,56 @@ qemu_irq spapr_allocate_irq(uint32_t hint, uint32_t *irq_num) return qirq; } +static int spapr_set_associativity(void *fdt, sPAPREnvironment *spapr) +{ + int ret = 0, offset; + CPUPPCState *env; + char cpu_model[32]; + int smt = kvmppc_smt_threads(); + + assert(spapr->cpu_model); + + for (env = first_cpu; env != NULL; env = env->next_cpu) { + uint32_t associativity[] = {cpu_to_be32(0x5), + cpu_to_be32(0x0), + cpu_to_be32(0x0), + cpu_to_be32(0x0), + cpu_to_be32(env->numa_node), + cpu_to_be32(env->cpu_index)}; + + if ((env->cpu_index % smt) != 0) { + continue; + } + + snprintf(cpu_model, 32, "/cpus/%s@%x", spapr->cpu_model, + env->cpu_index); + + offset = fdt_path_offset(fdt, cpu_model); + if (offset < 0) { + return offset; + } + + ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity, + sizeof(associativity)); + if (ret < 0) { + return ret; + } + } + return ret; +} + static void *spapr_create_fdt_skel(const char *cpu_model, target_phys_addr_t rma_size, target_phys_addr_t initrd_base, target_phys_addr_t initrd_size, + target_phys_addr_t kernel_size, const char *boot_device, const char *kernel_cmdline, long hash_shift) { void *fdt; - CPUState *env; - uint64_t mem_reg_property_rma[] = { 0, cpu_to_be64(rma_size) }; - uint64_t mem_reg_property_nonrma[] = { cpu_to_be64(rma_size), - cpu_to_be64(ram_size - rma_size) }; + CPUPPCState *env; + uint64_t mem_reg_property[2]; uint32_t start_prop = cpu_to_be32(initrd_base); uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)}; @@ -119,6 +167,13 @@ static void *spapr_create_fdt_skel(const char *cpu_model, int i; char *modelname; int smt = kvmppc_smt_threads(); + unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80}; + uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)}; + uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0), + cpu_to_be32(0x0), cpu_to_be32(0x0), + cpu_to_be32(0x0)}; + char mem_name[32]; + target_phys_addr_t node0_size, mem_start; #define _FDT(exp) \ do { \ @@ -133,6 +188,12 @@ static void *spapr_create_fdt_skel(const char *cpu_model, fdt = g_malloc0(FDT_MAX_SIZE); _FDT((fdt_create(fdt, FDT_MAX_SIZE))); + if (kernel_size) { + _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size))); + } + if (initrd_size) { + _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size))); + } _FDT((fdt_finish_reservemap(fdt))); /* Root node */ @@ -146,40 +207,71 @@ static void *spapr_create_fdt_skel(const char *cpu_model, /* /chosen */ _FDT((fdt_begin_node(fdt, "chosen"))); + /* Set Form1_affinity */ + _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5)))); + _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline))); _FDT((fdt_property(fdt, "linux,initrd-start", &start_prop, sizeof(start_prop)))); _FDT((fdt_property(fdt, "linux,initrd-end", &end_prop, sizeof(end_prop)))); - _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device))); + if (kernel_size) { + uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR), + cpu_to_be64(kernel_size) }; - /* - * Because we don't always invoke any firmware, we can't rely on - * that to do BAR allocation. Long term, we should probably do - * that ourselves, but for now, this setting (plus advertising the - * current BARs as 0) causes sufficiently recent kernels to to the - * BAR assignment themselves */ - _FDT((fdt_property_cell(fdt, "linux,pci-probe-only", 0))); + _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop)))); + } + _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device))); _FDT((fdt_end_node(fdt))); /* memory node(s) */ - _FDT((fdt_begin_node(fdt, "memory@0"))); + node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size; + if (rma_size > node0_size) { + rma_size = node0_size; + } + /* RMA */ + mem_reg_property[0] = 0; + mem_reg_property[1] = cpu_to_be64(rma_size); + _FDT((fdt_begin_node(fdt, "memory@0"))); _FDT((fdt_property_string(fdt, "device_type", "memory"))); - _FDT((fdt_property(fdt, "reg", mem_reg_property_rma, - sizeof(mem_reg_property_rma)))); + _FDT((fdt_property(fdt, "reg", mem_reg_property, + sizeof(mem_reg_property)))); + _FDT((fdt_property(fdt, "ibm,associativity", associativity, + sizeof(associativity)))); _FDT((fdt_end_node(fdt))); - if (ram_size > rma_size) { - char mem_name[32]; + /* RAM: Node 0 */ + if (node0_size > rma_size) { + mem_reg_property[0] = cpu_to_be64(rma_size); + mem_reg_property[1] = cpu_to_be64(node0_size - rma_size); + + sprintf(mem_name, "memory@" TARGET_FMT_lx, rma_size); + _FDT((fdt_begin_node(fdt, mem_name))); + _FDT((fdt_property_string(fdt, "device_type", "memory"))); + _FDT((fdt_property(fdt, "reg", mem_reg_property, + sizeof(mem_reg_property)))); + _FDT((fdt_property(fdt, "ibm,associativity", associativity, + sizeof(associativity)))); + _FDT((fdt_end_node(fdt))); + } - sprintf(mem_name, "memory@%" PRIx64, (uint64_t)rma_size); + /* RAM: Node 1 and beyond */ + mem_start = node0_size; + for (i = 1; i < nb_numa_nodes; i++) { + mem_reg_property[0] = cpu_to_be64(mem_start); + mem_reg_property[1] = cpu_to_be64(node_mem[i]); + associativity[3] = associativity[4] = cpu_to_be32(i); + sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start); _FDT((fdt_begin_node(fdt, mem_name))); _FDT((fdt_property_string(fdt, "device_type", "memory"))); - _FDT((fdt_property(fdt, "reg", mem_reg_property_nonrma, - sizeof(mem_reg_property_nonrma)))); + _FDT((fdt_property(fdt, "reg", mem_reg_property, + sizeof(mem_reg_property)))); + _FDT((fdt_property(fdt, "ibm,associativity", associativity, + sizeof(associativity)))); _FDT((fdt_end_node(fdt))); + mem_start += node_mem[i]; } /* cpus */ @@ -194,6 +286,9 @@ static void *spapr_create_fdt_skel(const char *cpu_model, modelname[i] = toupper(modelname[i]); } + /* This is needed during FDT finalization */ + spapr->cpu_model = g_strdup(modelname); + for (env = first_cpu; env != NULL; env = env->next_cpu) { int index = env->cpu_index; uint32_t servers_prop[smp_threads]; @@ -280,6 +375,9 @@ static void *spapr_create_fdt_skel(const char *cpu_model, _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop, sizeof(hypertas_prop)))); + _FDT((fdt_property(fdt, "ibm,associativity-reference-points", + refpoints, sizeof(refpoints)))); + _FDT((fdt_end_node(fdt))); /* interrupt controller */ @@ -351,10 +449,24 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr, fprintf(stderr, "Couldn't set up RTAS device tree properties\n"); } + /* Advertise NUMA via ibm,associativity */ + if (nb_numa_nodes > 1) { + ret = spapr_set_associativity(fdt, spapr); + if (ret < 0) { + fprintf(stderr, "Couldn't set up NUMA device tree properties\n"); + } + } + spapr_populate_chosen_stdout(fdt, spapr->vio_bus); _FDT((fdt_pack(fdt))); + if (fdt_totalsize(fdt) > FDT_MAX_SIZE) { + hw_error("FDT too big ! 0x%x bytes (max is 0x%x)\n", + fdt_totalsize(fdt), FDT_MAX_SIZE); + exit(1); + } + cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt)); g_free(fdt); @@ -365,7 +477,7 @@ static uint64_t translate_kernel_address(void *opaque, uint64_t addr) return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR; } -static void emulate_spapr_hypercall(CPUState *env) +static void emulate_spapr_hypercall(CPUPPCState *env) { env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]); } @@ -391,6 +503,13 @@ static void spapr_reset(void *opaque) } +static void spapr_cpu_reset(void *opaque) +{ + CPUPPCState *env = opaque; + + cpu_state_reset(env); +} + /* pSeries LPAR / sPAPR hardware init */ static void ppc_spapr_init(ram_addr_t ram_size, const char *boot_device, @@ -399,13 +518,14 @@ static void ppc_spapr_init(ram_addr_t ram_size, const char *initrd_filename, const char *cpu_model) { - CPUState *env; + CPUPPCState *env; int i; MemoryRegion *sysmem = get_system_memory(); MemoryRegion *ram = g_new(MemoryRegion, 1); target_phys_addr_t rma_alloc_size, rma_size; - uint32_t initrd_base; - long kernel_size, initrd_size, fw_size; + uint32_t initrd_base = 0; + long kernel_size = 0, initrd_size = 0; + long load_limit, rtas_limit, fw_size; long pteg_shift = 17; char *filename; @@ -427,11 +547,13 @@ static void ppc_spapr_init(ram_addr_t ram_size, rma_size = ram_size; } - /* We place the device tree just below either the top of the RMA, + /* We place the device tree and RTAS just below either the top of the RMA, * or just below 2GB, whichever is lowere, so that it can be * processed with 32-bit real mode code if necessary */ - spapr->fdt_addr = MIN(rma_size, 0x80000000) - FDT_MAX_SIZE; - spapr->rtas_addr = spapr->fdt_addr - RTAS_MAX_SIZE; + rtas_limit = MIN(rma_size, 0x80000000); + spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE; + spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE; + load_limit = spapr->fdt_addr - FW_OVERHEAD; /* init CPUs */ if (cpu_model == NULL) { @@ -446,7 +568,7 @@ static void ppc_spapr_init(ram_addr_t ram_size, } /* Set time-base frequency to 512 MHz */ cpu_ppc_tb_init(env, TIMEBASE_FREQ); - qemu_register_reset((QEMUResetHandler *)&cpu_reset, env); + qemu_register_reset(spapr_cpu_reset, env); env->hreset_vector = 0x60; env->hreset_excp_prefix = 0; @@ -459,7 +581,8 @@ static void ppc_spapr_init(ram_addr_t ram_size, ram_addr_t nonrma_base = rma_alloc_size; ram_addr_t nonrma_size = spapr->ram_limit - rma_alloc_size; - memory_region_init_ram(ram, NULL, "ppc_spapr.ram", nonrma_size); + memory_region_init_ram(ram, "ppc_spapr.ram", nonrma_size); + vmstate_register_ram_global(ram); memory_region_add_subregion(sysmem, nonrma_base, ram); } @@ -486,13 +609,19 @@ static void ppc_spapr_init(ram_addr_t ram_size, filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin"); spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr, - ram_size - spapr->rtas_addr); + rtas_limit - spapr->rtas_addr); if (spapr->rtas_size < 0) { hw_error("qemu: could not load LPAR rtas '%s'\n", filename); exit(1); } + if (spapr->rtas_size > RTAS_MAX_SIZE) { + hw_error("RTAS too big ! 0x%lx bytes (max is 0x%x)\n", + spapr->rtas_size, RTAS_MAX_SIZE); + exit(1); + } g_free(filename); + /* Set up Interrupt Controller */ spapr->icp = xics_system_init(XICS_IRQS); spapr->next_irq = 16; @@ -502,8 +631,7 @@ static void ppc_spapr_init(ram_addr_t ram_size, for (i = 0; i < MAX_SERIAL_PORTS; i++) { if (serial_hds[i]) { - spapr_vty_create(spapr->vio_bus, SPAPR_VTY_BASE_ADDRESS + i, - serial_hds[i]); + spapr_vty_create(spapr->vio_bus, serial_hds[i]); } } @@ -521,16 +649,30 @@ static void ppc_spapr_init(ram_addr_t ram_size, } if (strcmp(nd->model, "ibmveth") == 0) { - spapr_vlan_create(spapr->vio_bus, 0x1000 + i, nd); + spapr_vlan_create(spapr->vio_bus, nd); } else { pci_nic_init_nofail(&nd_table[i], nd->model, NULL); } } for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) { - spapr_vscsi_create(spapr->vio_bus, 0x2000 + i); + spapr_vscsi_create(spapr->vio_bus); } + if (rma_size < (MIN_RMA_SLOF << 20)) { + fprintf(stderr, "qemu: pSeries SLOF firmware requires >= " + "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF); + exit(1); + } + + fprintf(stderr, "sPAPR memory map:\n"); + fprintf(stderr, "RTAS : 0x%08lx..%08lx\n", + (unsigned long)spapr->rtas_addr, + (unsigned long)(spapr->rtas_addr + spapr->rtas_size - 1)); + fprintf(stderr, "FDT : 0x%08lx..%08lx\n", + (unsigned long)spapr->fdt_addr, + (unsigned long)(spapr->fdt_addr + FDT_MAX_SIZE - 1)); + if (kernel_filename) { uint64_t lowaddr = 0; @@ -539,57 +681,60 @@ static void ppc_spapr_init(ram_addr_t ram_size, if (kernel_size < 0) { kernel_size = load_image_targphys(kernel_filename, KERNEL_LOAD_ADDR, - ram_size - KERNEL_LOAD_ADDR); + load_limit - KERNEL_LOAD_ADDR); } if (kernel_size < 0) { fprintf(stderr, "qemu: could not load kernel '%s'\n", kernel_filename); exit(1); } + fprintf(stderr, "Kernel : 0x%08x..%08lx\n", + KERNEL_LOAD_ADDR, KERNEL_LOAD_ADDR + kernel_size - 1); /* load initrd */ if (initrd_filename) { - initrd_base = INITRD_LOAD_ADDR; + /* Try to locate the initrd in the gap between the kernel + * and the firmware. Add a bit of space just in case + */ + initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff; initrd_size = load_image_targphys(initrd_filename, initrd_base, - ram_size - initrd_base); + load_limit - initrd_base); if (initrd_size < 0) { fprintf(stderr, "qemu: could not load initial ram disk '%s'\n", initrd_filename); exit(1); } + fprintf(stderr, "Ramdisk : 0x%08lx..%08lx\n", + (long)initrd_base, (long)(initrd_base + initrd_size - 1)); } else { initrd_base = 0; initrd_size = 0; } + } - spapr->entry_point = KERNEL_LOAD_ADDR; - } else { - if (rma_size < (MIN_RMA_SLOF << 20)) { - fprintf(stderr, "qemu: pSeries SLOF firmware requires >= " - "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF); - exit(1); - } - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, FW_FILE_NAME); - fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE); - if (fw_size < 0) { - hw_error("qemu: could not load LPAR rtas '%s'\n", filename); - exit(1); - } - g_free(filename); - spapr->entry_point = 0x100; - initrd_base = 0; - initrd_size = 0; - - /* SLOF will startup the secondary CPUs using RTAS, - rather than expecting a kexec() style entry */ - for (env = first_cpu; env != NULL; env = env->next_cpu) { - env->halted = 1; - } + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, FW_FILE_NAME); + fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE); + if (fw_size < 0) { + hw_error("qemu: could not load LPAR rtas '%s'\n", filename); + exit(1); + } + g_free(filename); + fprintf(stderr, "Firmware load : 0x%08x..%08lx\n", + 0, fw_size); + fprintf(stderr, "Firmware runtime : 0x%08lx..%08lx\n", + load_limit, (unsigned long)spapr->fdt_addr); + + spapr->entry_point = 0x100; + + /* SLOF will startup the secondary CPUs using RTAS */ + for (env = first_cpu; env != NULL; env = env->next_cpu) { + env->halted = 1; } /* Prepare the device tree */ spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size, initrd_base, initrd_size, + kernel_size, boot_device, kernel_cmdline, pteg_shift + 7); assert(spapr->fdt_skel != NULL); @@ -602,7 +747,6 @@ static QEMUMachine spapr_machine = { .desc = "pSeries Logical Partition (PAPR compliant)", .init = ppc_spapr_init, .max_cpus = MAX_CPUS, - .no_vga = 1, .no_parallel = 1, .use_scsi = 1, }; |