diff options
author | Yonghee Han <onstudy@samsung.com> | 2016-07-27 16:39:12 +0900 |
---|---|---|
committer | Yonghee Han <onstudy@samsung.com> | 2016-07-27 16:47:03 +0900 |
commit | a3b133b0ea0696e42fd876b9a803e28bc6ef5299 (patch) | |
tree | 68d7537fb9ede28b2e4d2b9f44eb70988279b8ba /numa.c | |
parent | 0d6a2f7e595218b5632ba7005128470e65138951 (diff) | |
download | qemu-a3b133b0ea0696e42fd876b9a803e28bc6ef5299.tar.gz qemu-a3b133b0ea0696e42fd876b9a803e28bc6ef5299.tar.bz2 qemu-a3b133b0ea0696e42fd876b9a803e28bc6ef5299.zip |
Imported Upstream version 2.3.1upstream/2.3.1
Change-Id: I2161522ea1d7ff10cd1d697609d473243c05e1df
Diffstat (limited to 'numa.c')
-rw-r--r-- | numa.c | 105 |
1 files changed, 86 insertions, 19 deletions
@@ -22,7 +22,7 @@ * THE SOFTWARE. */ -#include "sysemu/sysemu.h" +#include "sysemu/numa.h" #include "exec/cpu-common.h" #include "qemu/bitmap.h" #include "qom/cpu.h" @@ -36,6 +36,8 @@ #include "sysemu/hostmem.h" #include "qmp-commands.h" #include "hw/mem/pc-dimm.h" +#include "qemu/option.h" +#include "qemu/config-file.h" QemuOptsList qemu_numa_opts = { .name = "numa", @@ -45,6 +47,11 @@ QemuOptsList qemu_numa_opts = { }; static int have_memdevs = -1; +static int max_numa_nodeid; /* Highest specified NUMA node ID, plus one. + * For all nodes, nodeid < max_numa_nodeid + */ +int nb_numa_nodes; +NodeInfo numa_info[MAX_NODES]; static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) { @@ -59,7 +66,7 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) if (nodenr >= MAX_NODES) { error_setg(errp, "Max number of NUMA nodes reached: %" - PRIu16 "\n", nodenr); + PRIu16 "", nodenr); return; } @@ -69,16 +76,18 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) } for (cpus = node->cpus; cpus; cpus = cpus->next) { - if (cpus->value > MAX_CPUMASK_BITS) { - error_setg(errp, "CPU number %" PRIu16 " is bigger than %d", - cpus->value, MAX_CPUMASK_BITS); + if (cpus->value >= max_cpus) { + error_setg(errp, + "CPU index (%" PRIu16 ")" + " should be smaller than maxcpus (%d)", + cpus->value, max_cpus); return; } bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1); } if (node->has_mem && node->has_memdev) { - error_setg(errp, "qemu: cannot specify both mem= and memdev=\n"); + error_setg(errp, "qemu: cannot specify both mem= and memdev="); return; } @@ -87,7 +96,7 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) } if (node->has_memdev != have_memdevs) { error_setg(errp, "qemu: memdev option must be specified for either " - "all or no nodes\n"); + "all or no nodes"); return; } @@ -116,7 +125,7 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1); } -int numa_init_func(QemuOpts *opts, void *opaque) +static int parse_numa(QemuOpts *opts, void *opaque) { NumaOptions *object = NULL; Error *err = NULL; @@ -146,8 +155,7 @@ int numa_init_func(QemuOpts *opts, void *opaque) return 0; error: - qerror_report_err(err); - error_free(err); + error_report_err(err); if (object) { QapiDeallocVisitor *dv = qapi_dealloc_visitor_new(); @@ -159,9 +167,59 @@ error: return -1; } -void set_numa_nodes(void) +static char *enumerate_cpus(unsigned long *cpus, int max_cpus) +{ + int cpu; + bool first = true; + GString *s = g_string_new(NULL); + + for (cpu = find_first_bit(cpus, max_cpus); + cpu < max_cpus; + cpu = find_next_bit(cpus, max_cpus, cpu + 1)) { + g_string_append_printf(s, "%s%d", first ? "" : " ", cpu); + first = false; + } + return g_string_free(s, FALSE); +} + +static void validate_numa_cpus(void) { int i; + DECLARE_BITMAP(seen_cpus, MAX_CPUMASK_BITS); + + bitmap_zero(seen_cpus, MAX_CPUMASK_BITS); + for (i = 0; i < nb_numa_nodes; i++) { + if (bitmap_intersects(seen_cpus, numa_info[i].node_cpu, + MAX_CPUMASK_BITS)) { + bitmap_and(seen_cpus, seen_cpus, + numa_info[i].node_cpu, MAX_CPUMASK_BITS); + error_report("CPU(s) present in multiple NUMA nodes: %s", + enumerate_cpus(seen_cpus, max_cpus));; + exit(EXIT_FAILURE); + } + bitmap_or(seen_cpus, seen_cpus, + numa_info[i].node_cpu, MAX_CPUMASK_BITS); + } + + if (!bitmap_full(seen_cpus, max_cpus)) { + char *msg; + bitmap_complement(seen_cpus, seen_cpus, max_cpus); + msg = enumerate_cpus(seen_cpus, max_cpus); + error_report("warning: CPU(s) not present in any NUMA nodes: %s", msg); + error_report("warning: All CPU(s) up to maxcpus should be described " + "in NUMA config"); + g_free(msg); + } +} + +void parse_numa_opts(MachineClass *mc) +{ + int i; + + if (qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, + NULL, 1) != 0) { + exit(1); + } assert(max_numa_nodeid <= MAX_NODES); @@ -222,19 +280,29 @@ void set_numa_nodes(void) break; } } - /* assigning the VCPUs round-robin is easier to implement, guest OSes - * must cope with this anyway, because there are BIOSes out there in - * real machines which also use this scheme. + /* Historically VCPUs were assigned in round-robin order to NUMA + * nodes. However it causes issues with guest not handling it nice + * in case where cores/threads from a multicore CPU appear on + * different nodes. So allow boards to override default distribution + * rule grouping VCPUs by socket so that VCPUs from the same socket + * would be on the same node. */ if (i == nb_numa_nodes) { for (i = 0; i < max_cpus; i++) { - set_bit(i, numa_info[i % nb_numa_nodes].node_cpu); + unsigned node_id = i % nb_numa_nodes; + if (mc->cpu_index_to_socket_id) { + node_id = mc->cpu_index_to_socket_id(i) % nb_numa_nodes; + } + + set_bit(i, numa_info[node_id].node_cpu); } } + + validate_numa_cpus(); } } -void set_numa_modes(void) +void numa_post_machine_init(void) { CPUState *cpu; int i; @@ -262,8 +330,7 @@ static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, * regular RAM allocation. */ if (err) { - qerror_report_err(err); - error_free(err); + error_report_err(err); memory_region_init_ram(mr, owner, name, ram_size, &error_abort); } #else @@ -298,7 +365,7 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, } MemoryRegion *seg = host_memory_backend_get_memory(backend, &local_err); if (local_err) { - qerror_report_err(local_err); + error_report_err(local_err); exit(1); } |