diff options
51 files changed, 802 insertions, 980 deletions
diff --git a/.gitignore b/.gitignore index c91d018c78..c88ebf9598 100644 --- a/.gitignore +++ b/.gitignore @@ -39,9 +39,7 @@ /qmp-introspect.[ch] /qmp-marshal.c /qemu-doc.html -/qemu-tech.html /qemu-doc.info -/qemu-tech.info /qemu-img /qemu-nbd /qemu-options.def diff --git a/.travis.yml b/.travis.yml index f30b10e4f7..9916178bf3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ cache: ccache addons: apt: packages: + # Build dependencies - libaio-dev - libattr1-dev - libbrlapi-dev @@ -89,6 +90,7 @@ matrix: - env: CONFIG="" os: osx compiler: clang + # Plain Trusty Build - env: CONFIG="" sudo: required addons: @@ -99,3 +101,46 @@ matrix: - sudo apt-get build-dep -qq qemu - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ - git submodule update --init --recursive + # Using newer GCC with sanitizers + - addons: + apt: + sources: + # PPAs for newer toolchains + - ubuntu-toolchain-r-test + packages: + # Extra toolchains + - gcc-5 + - g++-5 + # Build dependencies + - libaio-dev + - libattr1-dev + - libbrlapi-dev + - libcap-ng-dev + - libgnutls-dev + - libgtk-3-dev + - libiscsi-dev + - liblttng-ust-dev + - libnfs-dev + - libncurses5-dev + - libnss3-dev + - libpixman-1-dev + - libpng12-dev + - librados-dev + - libsdl1.2-dev + - libseccomp-dev + - libspice-protocol-dev + - libspice-server-dev + - libssh2-1-dev + - liburcu-dev + - libusb-1.0-0-dev + - libvte-2.90-dev + - sparse + - uuid-dev + language: generic + compiler: none + env: + - COMPILER_NAME=gcc CXX=g++-5 CC=gcc-5 + - CONFIG="--cc=gcc-5 --cxx=g++-5 --disable-pie --disable-linux-user --with-coroutine=gthread" + - TEST_CMD="" + before_script: + - ./configure ${CONFIG} --extra-cflags="-g3 -O0 -fsanitize=thread -fuse-ld=gold" || cat config.log @@ -93,7 +93,7 @@ LIBS+=-lz $(LIBS_TOOLS) HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF) ifdef BUILD_DOCS -DOCS=qemu-doc.html qemu-tech.html qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8 +DOCS=qemu-doc.html qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8 ifdef CONFIG_VIRTFS DOCS+=fsdev/virtfs-proxy-helper.1 endif @@ -398,7 +398,6 @@ distclean: clean rm -f qemu-doc.vr rm -f config.log rm -f linux-headers/asm - rm -f qemu-tech.info qemu-tech.aux qemu-tech.cp qemu-tech.dvi qemu-tech.fn qemu-tech.info qemu-tech.ky qemu-tech.log qemu-tech.pdf qemu-tech.pg qemu-tech.toc qemu-tech.tp qemu-tech.vr for d in $(TARGET_DIRS); do \ rm -rf $$d || exit 1 ; \ done @@ -434,7 +433,7 @@ endif install-doc: $(DOCS) $(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)" - $(INSTALL_DATA) qemu-doc.html qemu-tech.html "$(DESTDIR)$(qemu_docdir)" + $(INSTALL_DATA) qemu-doc.html "$(DESTDIR)$(qemu_docdir)" $(INSTALL_DATA) $(SRC_PATH)/docs/qmp-commands.txt "$(DESTDIR)$(qemu_docdir)" ifdef CONFIG_POSIX $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" @@ -592,10 +591,10 @@ qemu-ga.8: qemu-ga.texi $(POD2MAN) --section=8 --center=" " --release=" " qemu-ga.pod > $@, \ "GEN","$@") -dvi: qemu-doc.dvi qemu-tech.dvi -html: qemu-doc.html qemu-tech.html -info: qemu-doc.info qemu-tech.info -pdf: qemu-doc.pdf qemu-tech.pdf +dvi: qemu-doc.dvi +html: qemu-doc.html +info: qemu-doc.info +pdf: qemu-doc.pdf qemu-doc.dvi qemu-doc.html qemu-doc.info qemu-doc.pdf: \ qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \ @@ -42,8 +42,6 @@ of other UNIX targets. The simple steps to build QEMU are: ../configure make -Complete details of the process for building and configuring QEMU for -all supported host platforms can be found in the qemu-tech.html file. Additional information can also be found online via the QEMU website: http://qemu-project.org/Hosts/Linux diff --git a/cpu-exec.c b/cpu-exec.c index 8823d23df7..e114fcdf29 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -192,7 +192,7 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb) /* We were asked to stop executing TBs (probably a pending * interrupt. We've now stopped, so clear the flag. */ - cpu->tcg_exit_req = 0; + atomic_set(&cpu->tcg_exit_req, 0); } return ret; } @@ -490,8 +490,8 @@ static inline void cpu_handle_interrupt(CPUState *cpu, *last_tb = NULL; } } - if (unlikely(cpu->exit_request || replay_has_interrupt())) { - cpu->exit_request = 0; + if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) { + atomic_set(&cpu->exit_request, 0); cpu->exception_index = EXCP_INTERRUPT; cpu_loop_exit(cpu); } @@ -503,7 +503,7 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, { uintptr_t ret; - if (unlikely(cpu->exit_request)) { + if (unlikely(atomic_read(&cpu->exit_request))) { return; } diff --git a/docs/specs/edu.txt b/docs/specs/edu.txt index 7f8146780b..0876310809 100644 --- a/docs/specs/edu.txt +++ b/docs/specs/edu.txt @@ -52,7 +52,7 @@ size == 8 for the rest. 0x20 (RW) : status register, bitwise OR 0x01 -- computing factorial (RO) - 0x80 -- raise interrupt 0x01 after finishing factorial computation + 0x80 -- raise interrupt after finishing factorial computation 0x24 (RO) : interrupt status register It contains values which raised the interrupt (see interrupt raise @@ -87,6 +87,11 @@ An IRQ is generated when written to the interrupt raise register. The value appears in interrupt status register when the interrupt is raised and has to be written to the interrupt acknowledge register to lower it. +The device supports both INTx and MSI interrupt. By default, INTx is +used. Even if the driver disabled INTx and only uses MSI, it still +needs to update the acknowledge register at the end of the IRQ handler +routine. + DMA controller -------------- One has to specify, source, destination, size, and start the transfer. One diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx index 19729e55ae..55d50c42c6 100644 --- a/hmp-commands-info.hx +++ b/hmp-commands-info.hx @@ -172,20 +172,12 @@ STEXI Show the command line history. ETEXI -#if defined(TARGET_I386) || defined(TARGET_PPC) || defined(TARGET_MIPS) || \ - defined(TARGET_LM32) || (defined(TARGET_SPARC) && !defined(TARGET_SPARC64)) { .name = "irq", .args_type = "", .params = "", .help = "show the interrupts statistics (if available)", -#ifdef TARGET_SPARC - .cmd = sun4m_hmp_info_irq, -#elif defined(TARGET_LM32) - .cmd = lm32_hmp_info_irq, -#else .cmd = hmp_info_irq, -#endif }, STEXI @@ -198,16 +190,9 @@ ETEXI .name = "pic", .args_type = "", .params = "", - .help = "show i8259 (PIC) state", -#ifdef TARGET_SPARC - .cmd = sun4m_hmp_info_pic, -#elif defined(TARGET_LM32) - .cmd = lm32_hmp_info_pic, -#else + .help = "show PIC state", .cmd = hmp_info_pic, -#endif }, -#endif STEXI @item info pic @@ -36,6 +36,7 @@ #include "qemu-io.h" #include "qemu/cutils.h" #include "qemu/error-report.h" +#include "hw/intc/intc.h" #ifdef CONFIG_SPICE #include <spice/enums.h> @@ -787,6 +788,70 @@ static void hmp_info_pci_device(Monitor *mon, const PciDeviceInfo *dev) } } +static int hmp_info_irq_foreach(Object *obj, void *opaque) +{ + InterruptStatsProvider *intc; + InterruptStatsProviderClass *k; + Monitor *mon = opaque; + + if (object_dynamic_cast(obj, TYPE_INTERRUPT_STATS_PROVIDER)) { + intc = INTERRUPT_STATS_PROVIDER(obj); + k = INTERRUPT_STATS_PROVIDER_GET_CLASS(obj); + uint64_t *irq_counts; + unsigned int nb_irqs, i; + if (k->get_statistics && + k->get_statistics(intc, &irq_counts, &nb_irqs)) { + if (nb_irqs > 0) { + monitor_printf(mon, "IRQ statistics for %s:\n", + object_get_typename(obj)); + for (i = 0; i < nb_irqs; i++) { + if (irq_counts[i] > 0) { + monitor_printf(mon, "%2d: %" PRId64 "\n", i, + irq_counts[i]); + } + } + } + } else { + monitor_printf(mon, "IRQ statistics not available for %s.\n", + object_get_typename(obj)); + } + } + + return 0; +} + +void hmp_info_irq(Monitor *mon, const QDict *qdict) +{ + object_child_foreach_recursive(object_get_root(), + hmp_info_irq_foreach, mon); +} + +static int hmp_info_pic_foreach(Object *obj, void *opaque) +{ + InterruptStatsProvider *intc; + InterruptStatsProviderClass *k; + Monitor *mon = opaque; + + if (object_dynamic_cast(obj, TYPE_INTERRUPT_STATS_PROVIDER)) { + intc = INTERRUPT_STATS_PROVIDER(obj); + k = INTERRUPT_STATS_PROVIDER_GET_CLASS(obj); + if (k->print_info) { + k->print_info(intc, mon); + } else { + monitor_printf(mon, "Interrupt controller information not available for %s.\n", + object_get_typename(obj)); + } + } + + return 0; +} + +void hmp_info_pic(Monitor *mon, const QDict *qdict) +{ + object_child_foreach_recursive(object_get_root(), + hmp_info_pic_foreach, mon); +} + void hmp_info_pci(Monitor *mon, const QDict *qdict) { PciInfoList *info_list, *info; @@ -36,6 +36,8 @@ void hmp_info_blockstats(Monitor *mon, const QDict *qdict); void hmp_info_vnc(Monitor *mon, const QDict *qdict); void hmp_info_spice(Monitor *mon, const QDict *qdict); void hmp_info_balloon(Monitor *mon, const QDict *qdict); +void hmp_info_irq(Monitor *mon, const QDict *qdict); +void hmp_info_pic(Monitor *mon, const QDict *qdict); void hmp_info_pci(Monitor *mon, const QDict *qdict); void hmp_info_block_jobs(Monitor *mon, const QDict *qdict); void hmp_info_tpm(Monitor *mon, const QDict *qdict); diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 023de526f6..47b79d9112 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -143,10 +143,10 @@ static void amdvi_assign_andq(AMDVIState *s, hwaddr addr, uint64_t val) static void amdvi_generate_msi_interrupt(AMDVIState *s) { - MSIMessage msg; - MemTxAttrs attrs; - - attrs.requester_id = pci_requester_id(&s->pci.dev); + MSIMessage msg = {}; + MemTxAttrs attrs = { + .requester_id = pci_requester_id(&s->pci.dev) + }; if (msi_enabled(&s->pci.dev)) { msg = msi_get_message(&s->pci.dev, 0); @@ -185,7 +185,7 @@ static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start, int length) { int index = start / 64, bitpos = start % 64; - uint64_t mask = ((1 << length) - 1) << bitpos; + uint64_t mask = MAKE_64BIT_MASK(start, length); buffer[index] &= ~mask; buffer[index] |= (value << bitpos) & mask; } @@ -333,8 +333,8 @@ static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid, uint64_t gpa, IOMMUTLBEntry to_cache, uint16_t domid) { - AMDVIIOTLBEntry *entry = g_malloc(sizeof(*entry)); - uint64_t *key = g_malloc(sizeof(key)); + AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1); + uint64_t *key = g_new(uint64_t, 1); uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K; /* don't cache erroneous translations */ @@ -1135,6 +1135,7 @@ static void amdvi_reset(DeviceState *dev) static void amdvi_realize(DeviceState *dev, Error **err) { + int ret = 0; AMDVIState *s = AMD_IOMMU_DEVICE(dev); X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); PCIBus *bus = PC_MACHINE(qdev_get_machine())->bus; @@ -1147,8 +1148,11 @@ static void amdvi_realize(DeviceState *dev, Error **err) object_property_set_bool(OBJECT(&s->pci), true, "realized", err); s->capab_offset = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0, AMDVI_CAPAB_SIZE); - pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0, AMDVI_CAPAB_REG_SIZE); - pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0, AMDVI_CAPAB_REG_SIZE); + assert(s->capab_offset > 0); + ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0, AMDVI_CAPAB_REG_SIZE); + assert(ret > 0); + ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0, AMDVI_CAPAB_REG_SIZE); + assert(ret > 0); /* set up MMIO */ memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio", diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs index 9cca2800d3..2f44a2da26 100644 --- a/hw/intc/Makefile.objs +++ b/hw/intc/Makefile.objs @@ -18,6 +18,7 @@ common-obj-$(CONFIG_ARM_GIC) += arm_gicv3_dist.o common-obj-$(CONFIG_ARM_GIC) += arm_gicv3_redist.o common-obj-$(CONFIG_ARM_GIC) += arm_gicv3_its_common.o common-obj-$(CONFIG_OPENPIC) += openpic.o +common-obj-y += intc.o obj-$(CONFIG_APIC) += apic.o apic_common.o obj-$(CONFIG_ARM_GIC_KVM) += arm_gic_kvm.o diff --git a/hw/intc/i8259.c b/hw/intc/i8259.c index c2607a5868..fe9ecd6bd4 100644 --- a/hw/intc/i8259.c +++ b/hw/intc/i8259.c @@ -29,6 +29,7 @@ #include "qemu/timer.h" #include "qemu/log.h" #include "hw/isa/i8259_internal.h" +#include "hw/intc/intc.h" /* debug PIC */ //#define DEBUG_PIC @@ -251,6 +252,35 @@ static void pic_reset(DeviceState *dev) pic_init_reset(s); } +static bool pic_get_statistics(InterruptStatsProvider *obj, + uint64_t **irq_counts, unsigned int *nb_irqs) +{ + PICCommonState *s = PIC_COMMON(obj); + + if (s->master) { +#ifdef DEBUG_IRQ_COUNT + *irq_counts = irq_count; + *nb_irqs = ARRAY_SIZE(irq_count); +#else + return false; +#endif + } else { + *irq_counts = NULL; + *nb_irqs = 0; + } + return true; +} + +static void pic_print_info(InterruptStatsProvider *obj, Monitor *mon) +{ + PICCommonState *s = PIC_COMMON(obj); + monitor_printf(mon, "pic%d: irr=%02x imr=%02x isr=%02x hprio=%d " + "irq_base=%02x rr_sel=%d elcr=%02x fnm=%d\n", + s->master ? 0 : 1, s->irr, s->imr, s->isr, s->priority_add, + s->irq_base, s->read_reg_select, s->elcr, + s->special_fully_nested_mode); +} + static void pic_ioport_write(void *opaque, hwaddr addr64, uint64_t val64, unsigned size) { @@ -431,42 +461,6 @@ static void pic_realize(DeviceState *dev, Error **errp) pc->parent_realize(dev, errp); } -void hmp_info_pic(Monitor *mon, const QDict *qdict) -{ - int i; - PICCommonState *s; - - if (!isa_pic) { - return; - } - for (i = 0; i < 2; i++) { - s = i == 0 ? PIC_COMMON(isa_pic) : slave_pic; - monitor_printf(mon, "pic%d: irr=%02x imr=%02x isr=%02x hprio=%d " - "irq_base=%02x rr_sel=%d elcr=%02x fnm=%d\n", - i, s->irr, s->imr, s->isr, s->priority_add, - s->irq_base, s->read_reg_select, s->elcr, - s->special_fully_nested_mode); - } -} - -void hmp_info_irq(Monitor *mon, const QDict *qdict) -{ -#ifndef DEBUG_IRQ_COUNT - monitor_printf(mon, "irq statistic code not compiled.\n"); -#else - int i; - int64_t count; - - monitor_printf(mon, "IRQ statistics:\n"); - for (i = 0; i < 16; i++) { - count = irq_count[i]; - if (count > 0) { - monitor_printf(mon, "%2d: %" PRId64 "\n", i, count); - } - } -#endif -} - qemu_irq *i8259_init(ISABus *bus, qemu_irq parent_irq) { qemu_irq *irq_set; @@ -503,10 +497,13 @@ static void i8259_class_init(ObjectClass *klass, void *data) { PICClass *k = PIC_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); + InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(klass); k->parent_realize = dc->realize; dc->realize = pic_realize; dc->reset = pic_reset; + ic->get_statistics = pic_get_statistics; + ic->print_info = pic_print_info; } static const TypeInfo i8259_info = { @@ -515,6 +512,10 @@ static const TypeInfo i8259_info = { .parent = TYPE_PIC_COMMON, .class_init = i8259_class_init, .class_size = sizeof(PICClass), + .interfaces = (InterfaceInfo[]) { + { TYPE_INTERRUPT_STATS_PROVIDER }, + { } + }, }; static void pic_register_types(void) diff --git a/hw/intc/i8259_common.c b/hw/intc/i8259_common.c index 3a850b0c66..d9a5e8b217 100644 --- a/hw/intc/i8259_common.c +++ b/hw/intc/i8259_common.c @@ -70,10 +70,11 @@ static int pic_dispatch_post_load(void *opaque, int version_id) static void pic_common_realize(DeviceState *dev, Error **errp) { PICCommonState *s = PIC_COMMON(dev); + ISADevice *isa = ISA_DEVICE(dev); - isa_register_ioport(NULL, &s->base_io, s->iobase); + isa_register_ioport(isa, &s->base_io, s->iobase); if (s->elcr_addr != -1) { - isa_register_ioport(NULL, &s->elcr_io, s->elcr_addr); + isa_register_ioport(isa, &s->elcr_io, s->elcr_addr); } qdev_set_legacy_instance_id(dev, s->iobase, 1); diff --git a/hw/intc/intc.c b/hw/intc/intc.c new file mode 100644 index 0000000000..2e1e29e753 --- /dev/null +++ b/hw/intc/intc.c @@ -0,0 +1,41 @@ +/* + * QEMU Generic Interrupt Controller + * + * Copyright (c) 2016 Hervé Poussineau + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "hw/intc/intc.h" +#include "qemu/module.h" + +static const TypeInfo intctrl_info = { + .name = TYPE_INTERRUPT_STATS_PROVIDER, + .parent = TYPE_INTERFACE, + .class_size = sizeof(InterruptStatsProviderClass), +}; + +static void intc_register_types(void) +{ + type_register_static(&intctrl_info); +} + +type_init(intc_register_types) + diff --git a/hw/intc/lm32_pic.c b/hw/intc/lm32_pic.c index 3dad01c5ba..09e15115fb 100644 --- a/hw/intc/lm32_pic.c +++ b/hw/intc/lm32_pic.c @@ -25,6 +25,7 @@ #include "hw/sysbus.h" #include "trace.h" #include "hw/lm32/lm32_pic.h" +#include "hw/intc/intc.h" #define TYPE_LM32_PIC "lm32-pic" #define LM32_PIC(obj) OBJECT_CHECK(LM32PicState, (obj), TYPE_LM32_PIC) @@ -38,39 +39,10 @@ struct LM32PicState { uint32_t irq_state; /* statistics */ - uint32_t stats_irq_count[32]; + uint64_t stats_irq_count[32]; }; typedef struct LM32PicState LM32PicState; -static LM32PicState *pic; -void lm32_hmp_info_pic(Monitor *mon, const QDict *qdict) -{ - if (pic == NULL) { - return; - } - - monitor_printf(mon, "lm32-pic: im=%08x ip=%08x irq_state=%08x\n", - pic->im, pic->ip, pic->irq_state); -} - -void lm32_hmp_info_irq(Monitor *mon, const QDict *qdict) -{ - int i; - uint32_t count; - - if (pic == NULL) { - return; - } - - monitor_printf(mon, "IRQ statistics:\n"); - for (i = 0; i < 32; i++) { - count = pic->stats_irq_count[i]; - if (count > 0) { - monitor_printf(mon, "%2d: %u\n", i, count); - } - } -} - static void update_irq(LM32PicState *s) { s->ip |= s->irq_state; @@ -152,6 +124,22 @@ static void pic_reset(DeviceState *d) } } +static bool lm32_get_statistics(InterruptStatsProvider *obj, + uint64_t **irq_counts, unsigned int *nb_irqs) +{ + LM32PicState *s = LM32_PIC(obj); + *irq_counts = s->stats_irq_count; + *nb_irqs = ARRAY_SIZE(s->stats_irq_count); + return true; +} + +static void lm32_print_info(InterruptStatsProvider *obj, Monitor *mon) +{ + LM32PicState *s = LM32_PIC(obj); + monitor_printf(mon, "lm32-pic: im=%08x ip=%08x irq_state=%08x\n", + s->im, s->ip, s->irq_state); +} + static void lm32_pic_init(Object *obj) { DeviceState *dev = DEVICE(obj); @@ -160,19 +148,17 @@ static void lm32_pic_init(Object *obj) qdev_init_gpio_in(dev, irq_handler, 32); sysbus_init_irq(sbd, &s->parent_irq); - - pic = s; } static const VMStateDescription vmstate_lm32_pic = { .name = "lm32-pic", - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, .fields = (VMStateField[]) { VMSTATE_UINT32(im, LM32PicState), VMSTATE_UINT32(ip, LM32PicState), VMSTATE_UINT32(irq_state, LM32PicState), - VMSTATE_UINT32_ARRAY(stats_irq_count, LM32PicState, 32), + VMSTATE_UINT64_ARRAY(stats_irq_count, LM32PicState, 32), VMSTATE_END_OF_LIST() } }; @@ -180,9 +166,12 @@ static const VMStateDescription vmstate_lm32_pic = { static void lm32_pic_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(klass); dc->reset = pic_reset; dc->vmsd = &vmstate_lm32_pic; + ic->get_statistics = lm32_get_statistics; + ic->print_info = lm32_print_info; } static const TypeInfo lm32_pic_info = { @@ -191,6 +180,10 @@ static const TypeInfo lm32_pic_info = { .instance_size = sizeof(LM32PicState), .instance_init = lm32_pic_init, .class_init = lm32_pic_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_INTERRUPT_STATS_PROVIDER }, + { } + }, }; static void lm32_pic_register_types(void) diff --git a/hw/intc/slavio_intctl.c b/hw/intc/slavio_intctl.c index e82e893628..84e0bee4a9 100644 --- a/hw/intc/slavio_intctl.c +++ b/hw/intc/slavio_intctl.c @@ -26,6 +26,7 @@ #include "hw/sparc/sun4m.h" #include "monitor/monitor.h" #include "hw/sysbus.h" +#include "hw/intc/intc.h" #include "trace.h" //#define DEBUG_IRQ_COUNT @@ -210,38 +211,6 @@ static const MemoryRegionOps slavio_intctlm_mem_ops = { }, }; -void slavio_pic_info(Monitor *mon, DeviceState *dev) -{ - SLAVIO_INTCTLState *s = SLAVIO_INTCTL(dev); - int i; - - for (i = 0; i < MAX_CPUS; i++) { - monitor_printf(mon, "per-cpu %d: pending 0x%08x\n", i, - s->slaves[i].intreg_pending); - } - monitor_printf(mon, "master: pending 0x%08x, disabled 0x%08x\n", - s->intregm_pending, s->intregm_disabled); -} - -void slavio_irq_info(Monitor *mon, DeviceState *dev) -{ -#ifndef DEBUG_IRQ_COUNT - monitor_printf(mon, "irq statistic code not compiled.\n"); -#else - SLAVIO_INTCTLState *s = SLAVIO_INTCTL(dev); - int i; - int64_t count; - - s = SLAVIO_INTCTL(dev); - monitor_printf(mon, "IRQ statistics:\n"); - for (i = 0; i < 32; i++) { - count = s->irq_count[i]; - if (count > 0) - monitor_printf(mon, "%2d: %" PRId64 "\n", i, count); - } -#endif -} - static const uint32_t intbit_to_level[] = { 2, 3, 5, 7, 9, 11, 13, 2, 3, 5, 7, 9, 11, 13, 12, 12, 6, 13, 4, 10, 8, 9, 11, 0, 0, 0, 0, 15, 15, 15, 15, 0, @@ -418,6 +387,31 @@ static void slavio_intctl_reset(DeviceState *d) slavio_check_interrupts(s, 0); } +#ifdef DEBUG_IRQ_COUNT +static bool slavio_intctl_get_statistics(InterruptStatsProvider *obj, + uint64_t **irq_counts, + unsigned int *nb_irqs) +{ + SLAVIO_INTCTLState *s = SLAVIO_INTCTL(obj); + *irq_counts = s->irq_count; + *nb_irqs = ARRAY_SIZE(s->irq_count); + return true; +} +#endif + +static void slavio_intctl_print_info(InterruptStatsProvider *obj, Monitor *mon) +{ + SLAVIO_INTCTLState *s = SLAVIO_INTCTL(obj); + int i; + + for (i = 0; i < MAX_CPUS; i++) { + monitor_printf(mon, "per-cpu %d: pending 0x%08x\n", i, + s->slaves[i].intreg_pending); + } + monitor_printf(mon, "master: pending 0x%08x, disabled 0x%08x\n", + s->intregm_pending, s->intregm_disabled); +} + static void slavio_intctl_init(Object *obj) { DeviceState *dev = DEVICE(obj); @@ -449,9 +443,14 @@ static void slavio_intctl_init(Object *obj) static void slavio_intctl_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(klass); dc->reset = slavio_intctl_reset; dc->vmsd = &vmstate_intctl; +#ifdef DEBUG_IRQ_COUNT + ic->get_statistics = slavio_intctl_get_statistics; +#endif + ic->print_info = slavio_intctl_print_info; } static const TypeInfo slavio_intctl_info = { @@ -460,6 +459,10 @@ static const TypeInfo slavio_intctl_info = { .instance_size = sizeof(SLAVIO_INTCTLState), .instance_init = slavio_intctl_init, .class_init = slavio_intctl_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_INTERRUPT_STATS_PROVIDER }, + { } + }, }; static void slavio_intctl_register_types(void) diff --git a/hw/misc/edu.c b/hw/misc/edu.c index 888ba49a0e..401039c100 100644 --- a/hw/misc/edu.c +++ b/hw/misc/edu.c @@ -24,6 +24,7 @@ #include "qemu/osdep.h" #include "hw/pci/pci.h" +#include "hw/pci/msi.h" #include "qemu/timer.h" #include "qemu/main-loop.h" /* iothread mutex */ #include "qapi/visitor.h" @@ -69,11 +70,20 @@ typedef struct { uint64_t dma_mask; } EduState; +static bool edu_msi_enabled(EduState *edu) +{ + return msi_enabled(&edu->pdev); +} + static void edu_raise_irq(EduState *edu, uint32_t val) { edu->irq_status |= val; if (edu->irq_status) { - pci_set_irq(&edu->pdev, 1); + if (edu_msi_enabled(edu)) { + msi_notify(&edu->pdev, 0); + } else { + pci_set_irq(&edu->pdev, 1); + } } } @@ -81,7 +91,7 @@ static void edu_lower_irq(EduState *edu, uint32_t val) { edu->irq_status &= ~val; - if (!edu->irq_status) { + if (!edu->irq_status && !edu_msi_enabled(edu)) { pci_set_irq(&edu->pdev, 0); } } @@ -342,6 +352,10 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp) pci_config_set_interrupt_pin(pci_conf, 1); + if (msi_init(pdev, 0, 1, true, false, errp)) { + return; + } + memory_region_init_io(&edu->mmio, OBJECT(edu), &edu_mmio_ops, edu, "edu-mmio", 1 << 20); pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &edu->mmio); diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c index 478fda8209..b3915e4fd6 100644 --- a/hw/sparc/sun4m.c +++ b/hw/sparc/sun4m.c @@ -159,20 +159,6 @@ static void nvram_init(Nvram *nvram, uint8_t *macaddr, } } -static DeviceState *slavio_intctl; - -void sun4m_hmp_info_pic(Monitor *mon, const QDict *qdict) -{ - if (slavio_intctl) - slavio_pic_info(mon, slavio_intctl); -} - -void sun4m_hmp_info_irq(Monitor *mon, const QDict *qdict) -{ - if (slavio_intctl) - slavio_irq_info(mon, slavio_intctl); -} - void cpu_check_irqs(CPUSPARCState *env) { CPUState *cs; @@ -873,6 +859,7 @@ static void dummy_fdc_tc(void *opaque, int irq, int level) static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, MachineState *machine) { + DeviceState *slavio_intctl; const char *cpu_model = machine->cpu_model; unsigned int i; void *iommu, *espdma, *ledma, *nvram; diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c index ea625f25ce..da209d02f0 100644 --- a/hw/timer/mc146818rtc.c +++ b/hw/timer/mc146818rtc.c @@ -717,11 +717,18 @@ static void rtc_set_date_from_host(ISADevice *dev) rtc_set_cmos(s, &tm); } +static void rtc_pre_save(void *opaque) +{ + RTCState *s = opaque; + + rtc_update_time(s); +} + static int rtc_post_load(void *opaque, int version_id) { RTCState *s = opaque; - if (version_id <= 2) { + if (version_id <= 2 || rtc_clock == QEMU_CLOCK_REALTIME) { rtc_set_time(s); s->offset = 0; check_update_timer(s); @@ -764,6 +771,7 @@ static const VMStateDescription vmstate_rtc = { .name = "mc146818rtc", .version_id = 3, .minimum_version_id = 1, + .pre_save = rtc_pre_save, .post_load = rtc_post_load, .fields = (VMStateField[]) { VMSTATE_BUFFER(cmos_data, RTCState), diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index cb2df83b2c..b16c448249 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -181,8 +181,6 @@ qemu_irq *i8259_init(ISABus *bus, qemu_irq parent_irq); qemu_irq *kvm_i8259_init(ISABus *bus); int pic_read_irq(DeviceState *d); int pic_get_output(DeviceState *d); -void hmp_info_pic(Monitor *mon, const QDict *qdict); -void hmp_info_irq(Monitor *mon, const QDict *qdict); /* ioapic.c */ diff --git a/include/hw/intc/intc.h b/include/hw/intc/intc.h new file mode 100644 index 0000000000..27d9828943 --- /dev/null +++ b/include/hw/intc/intc.h @@ -0,0 +1,33 @@ +#ifndef INTC_H +#define INTC_H + +#include "qom/object.h" + +#define TYPE_INTERRUPT_STATS_PROVIDER "intctrl" + +#define INTERRUPT_STATS_PROVIDER_CLASS(klass) \ + OBJECT_CLASS_CHECK(InterruptStatsProviderClass, (klass), \ + TYPE_INTERRUPT_STATS_PROVIDER) +#define INTERRUPT_STATS_PROVIDER_GET_CLASS(obj) \ + OBJECT_GET_CLASS(InterruptStatsProviderClass, (obj), \ + TYPE_INTERRUPT_STATS_PROVIDER) +#define INTERRUPT_STATS_PROVIDER(obj) \ + INTERFACE_CHECK(InterruptStatsProvider, (obj), \ + TYPE_INTERRUPT_STATS_PROVIDER) + +typedef struct InterruptStatsProvider { + Object parent; +} InterruptStatsProvider; + +typedef struct InterruptStatsProviderClass { + InterfaceClass parent; + + /* The returned pointer and statistics must remain valid until + * the BQL is next dropped. + */ + bool (*get_statistics)(InterruptStatsProvider *obj, uint64_t **irq_counts, + unsigned int *nb_irqs); + void (*print_info)(InterruptStatsProvider *obj, Monitor *mon); +} InterruptStatsProviderClass; + +#endif diff --git a/include/hw/lm32/lm32_pic.h b/include/hw/lm32/lm32_pic.h index 189fa386f7..e6479b8f63 100644 --- a/include/hw/lm32/lm32_pic.h +++ b/include/hw/lm32/lm32_pic.h @@ -8,7 +8,4 @@ uint32_t lm32_pic_get_im(DeviceState *d); void lm32_pic_set_ip(DeviceState *d, uint32_t ip); void lm32_pic_set_im(DeviceState *d, uint32_t im); -void lm32_hmp_info_pic(Monitor *mon, const QDict *qdict); -void lm32_hmp_info_irq(Monitor *mon, const QDict *qdict); - #endif /* QEMU_HW_LM32_PIC_H */ diff --git a/include/hw/sparc/sun4m.h b/include/hw/sparc/sun4m.h index 9c17425a43..580d87b252 100644 --- a/include/hw/sparc/sun4m.h +++ b/include/hw/sparc/sun4m.h @@ -24,14 +24,6 @@ static inline void sparc_iommu_memory_write(void *opaque, sparc_iommu_memory_rw(opaque, addr, buf, len, 1); } -/* slavio_intctl.c */ -void slavio_pic_info(Monitor *mon, DeviceState *dev); -void slavio_irq_info(Monitor *mon, DeviceState *dev); - -/* sun4m.c */ -void sun4m_hmp_info_pic(Monitor *mon, const QDict *qdict); -void sun4m_hmp_info_irq(Monitor *mon, const QDict *qdict); - /* sparc32_dma.c */ #include "hw/sparc/sparc32_dma.h" diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index 0cce246ea9..c4f6950fcb 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -82,7 +82,7 @@ */ #if defined(__SANITIZE_THREAD__) #define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); }) -#elsif defined(__alpha__) +#elif defined(__alpha__) #define smp_read_barrier_depends() asm volatile("mb":::"memory") #else #define smp_read_barrier_depends() barrier() @@ -92,6 +92,12 @@ /* Weak atomic operations prevent the compiler moving other * loads/stores past the atomic operation load/store. However there is * no explicit memory barrier for the processor. + * + * The C11 memory model says that variables that are accessed from + * different threads should at least be done with __ATOMIC_RELAXED + * primitives or the result is undefined. Generally this has little to + * no effect on the generated code but not using the atomic primitives + * will get flagged by sanitizers as a violation. */ #define atomic_read(ptr) \ ({ \ diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 9e9fa61546..384bfe245f 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -388,6 +388,16 @@ void os_mem_prealloc(int fd, char *area, size_t sz, Error **errp); int qemu_read_password(char *buf, int buf_size); /** + * qemu_get_pid_name: + * @pid: pid of a process + * + * For given @pid fetch its name. Caller is responsible for + * freeing the string when no longer needed. + * Returns allocated string on success, NULL on failure. + */ +char *qemu_get_pid_name(pid_t pid); + +/** * qemu_fork: * * A version of fork that avoids signal handler race diff --git a/include/qemu/seqlock.h b/include/qemu/seqlock.h index 2e2be4c4f0..8dee11d101 100644 --- a/include/qemu/seqlock.h +++ b/include/qemu/seqlock.h @@ -31,7 +31,7 @@ static inline void seqlock_init(QemuSeqLock *sl) /* Lock out other writers and update the count. */ static inline void seqlock_write_begin(QemuSeqLock *sl) { - ++sl->sequence; + atomic_set(&sl->sequence, sl->sequence + 1); /* Write sequence before updating other fields. */ smp_wmb(); @@ -42,7 +42,7 @@ static inline void seqlock_write_end(QemuSeqLock *sl) /* Write other fields before finalizing sequence. */ smp_wmb(); - ++sl->sequence; + atomic_set(&sl->sequence, sl->sequence + 1); } static inline unsigned seqlock_read_begin(QemuSeqLock *sl) diff --git a/include/sysemu/char.h b/include/sysemu/char.h index 0d0465ae0e..4593576cf7 100644 --- a/include/sysemu/char.h +++ b/include/sysemu/char.h @@ -92,6 +92,7 @@ struct CharDriverState { int explicit_be_open; int avail_connections; int is_mux; + int mux_idx; guint fd_in_tag; QemuOpts *opts; bool replay; diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 0815f30965..fa559be47f 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -7476,13 +7476,16 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, break; } + cpu_list_lock(); + if (CPU_NEXT(first_cpu)) { TaskState *ts; - cpu_list_lock(); /* Remove the CPU from the list. */ QTAILQ_REMOVE(&cpus, cpu, node); + cpu_list_unlock(); + ts = cpu->opaque; if (ts->child_tidptr) { put_user_u32(0, ts->child_tidptr); @@ -7495,6 +7498,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, rcu_unregister_thread(); pthread_exit(NULL); } + + cpu_list_unlock(); #ifdef TARGET_GPROF _mcleanup(); #endif @@ -81,12 +81,6 @@ #include "qemu/cutils.h" #include "qapi/qmp/dispatch.h" -/* for hmp_info_irq/pic */ -#if defined(TARGET_SPARC) -#include "hw/sparc/sun4m.h" -#endif -#include "hw/lm32/lm32_pic.h" - #if defined(TARGET_S390X) #include "hw/s390x/storage-keys.h" #endif diff --git a/qemu-char.c b/qemu-char.c index fb456cec34..4b330ea90f 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -165,6 +165,7 @@ CharDriverState *qemu_chr_alloc(ChardevCommon *backend, Error **errp) CharDriverState *chr = g_malloc0(sizeof(CharDriverState)); qemu_mutex_init(&chr->chr_write_lock); + chr->mux_idx = -1; if (backend->has_logfile) { int flags = O_WRONLY | O_CREAT; if (backend->has_logappend && @@ -468,7 +469,7 @@ void qemu_chr_add_handlers_full(CharDriverState *s, s->chr_read = fd_read; s->chr_event = fd_event; s->handler_opaque = opaque; - if (fe_open && s->chr_update_read_handler) { + if (s->chr_update_read_handler) { s->chr_update_read_handler(s, context); } @@ -738,17 +739,25 @@ static void mux_chr_update_read_handler(CharDriverState *chr, GMainContext *context) { MuxDriver *d = chr->opaque; + int idx; if (d->mux_cnt >= MAX_MUX) { fprintf(stderr, "Cannot add I/O handlers, MUX array is full\n"); return; } - d->ext_opaque[d->mux_cnt] = chr->handler_opaque; - d->chr_can_read[d->mux_cnt] = chr->chr_can_read; - d->chr_read[d->mux_cnt] = chr->chr_read; - d->chr_event[d->mux_cnt] = chr->chr_event; + + if (chr->mux_idx == -1) { + chr->mux_idx = d->mux_cnt++; + } + + idx = chr->mux_idx; + d->ext_opaque[idx] = chr->handler_opaque; + d->chr_can_read[idx] = chr->chr_can_read; + d->chr_read[idx] = chr->chr_read; + d->chr_event[idx] = chr->chr_event; + /* Fix up the real driver with mux routines */ - if (d->mux_cnt == 0) { + if (d->mux_cnt == 1) { qemu_chr_add_handlers_full(d->drv, mux_chr_can_read, mux_chr_read, mux_chr_event, @@ -757,8 +766,7 @@ static void mux_chr_update_read_handler(CharDriverState *chr, if (d->focus != -1) { mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_OUT); } - d->focus = d->mux_cnt; - d->mux_cnt++; + d->focus = idx; mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_IN); } diff --git a/qemu-doc.texi b/qemu-doc.texi index f37fd3130e..023c1406cc 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -32,11 +32,10 @@ @menu * Introduction:: -* Installation:: * QEMU PC System emulator:: * QEMU System emulator for non PC targets:: * QEMU User space emulator:: -* compilation:: Compilation from the sources +* Implementation notes:: * License:: * Index:: @end menu @@ -57,98 +56,69 @@ QEMU is a FAST! processor emulator using dynamic translation to achieve good emulation speed. +@cindex operating modes QEMU has two operating modes: @itemize -@cindex operating modes - -@item @cindex system emulation -Full system emulation. In this mode, QEMU emulates a full system (for +@item Full system emulation. In this mode, QEMU emulates a full system (for example a PC), including one or several processors and various peripherals. It can be used to launch different Operating Systems without rebooting the PC or to debug system code. -@item @cindex user mode emulation -User mode emulation. In this mode, QEMU can launch +@item User mode emulation. In this mode, QEMU can launch processes compiled for one CPU on another CPU. It can be used to launch the Wine Windows API emulator (@url{http://www.winehq.org}) or to ease cross-compilation and cross-debugging. @end itemize -QEMU can run without a host kernel driver and yet gives acceptable -performance. +QEMU has the following features: -For system emulation, the following hardware targets are supported: @itemize -@cindex emulated target systems -@cindex supported target systems -@item PC (x86 or x86_64 processor) -@item ISA PC (old style PC without PCI bus) -@item PREP (PowerPC processor) -@item G3 Beige PowerMac (PowerPC processor) -@item Mac99 PowerMac (PowerPC processor, in progress) -@item Sun4m/Sun4c/Sun4d (32-bit Sparc processor) -@item Sun4u/Sun4v (64-bit Sparc processor, in progress) -@item Malta board (32-bit and 64-bit MIPS processors) -@item MIPS Magnum (64-bit MIPS processor) -@item ARM Integrator/CP (ARM) -@item ARM Versatile baseboard (ARM) -@item ARM RealView Emulation/Platform baseboard (ARM) -@item Spitz, Akita, Borzoi, Terrier and Tosa PDAs (PXA270 processor) -@item Luminary Micro LM3S811EVB (ARM Cortex-M3) -@item Luminary Micro LM3S6965EVB (ARM Cortex-M3) -@item Freescale MCF5208EVB (ColdFire V2). -@item Arnewsh MCF5206 evaluation board (ColdFire V2). -@item Palm Tungsten|E PDA (OMAP310 processor) -@item N800 and N810 tablets (OMAP2420 processor) -@item MusicPal (MV88W8618 ARM processor) -@item Gumstix "Connex" and "Verdex" motherboards (PXA255/270). -@item Siemens SX1 smartphone (OMAP310 processor) -@item AXIS-Devboard88 (CRISv32 ETRAX-FS). -@item Petalogix Spartan 3aDSP1800 MMU ref design (MicroBlaze). -@item Avnet LX60/LX110/LX200 boards (Xtensa) -@end itemize +@item QEMU can run without a host kernel driver and yet gives acceptable +performance. It uses dynamic translation to native code for reasonable speed, +with support for self-modifying code and precise exceptions. -@cindex supported user mode targets -For user emulation, x86 (32 and 64 bit), PowerPC (32 and 64 bit), -ARM, MIPS (32 bit only), Sparc (32 and 64 bit), -Alpha, ColdFire(m68k), CRISv32 and MicroBlaze CPUs are supported. +@item It is portable to several operating systems (GNU/Linux, *BSD, Mac OS X, +Windows) and architectures. -@node Installation -@chapter Installation +@item It performs accurate software emulation of the FPU. +@end itemize -If you want to compile QEMU yourself, see @ref{compilation}. +QEMU user mode emulation has the following features: +@itemize +@item Generic Linux system call converter, including most ioctls. -@menu -* install_linux:: Linux -* install_windows:: Windows -* install_mac:: Macintosh -@end menu +@item clone() emulation using native CPU clone() to use Linux scheduler for threads. -@node install_linux -@section Linux -@cindex installation (Linux) +@item Accurate signal handling by remapping host signals to target signals. +@end itemize + +QEMU full system emulation has the following features: +@itemize +@item +QEMU uses a full software MMU for maximum portability. -If a precompiled package is available for your distribution - you just -have to install it. Otherwise, see @ref{compilation}. +@item +QEMU can optionally use an in-kernel accelerator, like kvm. The accelerators +execute most of the guest code natively, while +continuing to emulate the rest of the machine. -@node install_windows -@section Windows -@cindex installation (Windows) +@item +Various hardware devices can be emulated and in some cases, host +devices (e.g. serial and parallel ports, USB, drives) can be used +transparently by the guest Operating System. Host device passthrough +can be used for talking to external physical peripherals (e.g. a +webcam, modem or tape drive). -Download the experimental binary installer at -@url{http://www.free.oszoo.org/@/download.html}. -TODO (no longer available) +@item +Symmetric multiprocessing (SMP) support. Currently, an in-kernel +accelerator is required to use more than one host CPU for emulation. -@node install_mac -@section Mac OS X +@end itemize -Download the experimental binary installer at -@url{http://www.free.oszoo.org/@/download.html}. -TODO (no longer available) @node QEMU PC System emulator @chapter QEMU PC System emulator @@ -2660,6 +2630,7 @@ so should only be used with trusted guest OS. @menu * Supported Operating Systems :: +* Features:: * Linux User space emulator:: * BSD User space emulator :: @end menu @@ -2676,6 +2647,39 @@ Linux (referred as qemu-linux-user) BSD (referred as qemu-bsd-user) @end itemize +@node Features +@section Features + +QEMU user space emulation has the following notable features: + +@table @strong +@item System call translation: +QEMU includes a generic system call translator. This means that +the parameters of the system calls can be converted to fix +endianness and 32/64-bit mismatches between hosts and targets. +IOCTLs can be converted too. + +@item POSIX signal handling: +QEMU can redirect to the running program all signals coming from +the host (such as @code{SIGALRM}), as well as synthesize signals from +virtual CPU exceptions (for example @code{SIGFPE} when the program +executes a division by zero). + +QEMU relies on the host kernel to emulate most signal system +calls, for example to emulate the signal mask. On Linux, QEMU +supports both normal and real-time signals. + +@item Threading: +On Linux, QEMU can emulate the @code{clone} syscall and create a real +host thread (with a separate virtual CPU) for each emulated thread. +Note that not all targets currently emulate atomic operations correctly. +x86 and ARM use a global lock in order to preserve their semantics. +@end table + +QEMU was conceived so that ultimately it can emulate itself. Although +it is not very useful, it is an important test to show the power of the +emulator. + @node Linux User space emulator @section Linux User space emulator @@ -2945,220 +2949,8 @@ Act as if the host page size was 'pagesize' bytes Run the emulation in single step mode. @end table -@node compilation -@chapter Compilation from the sources - -@menu -* Linux/Unix:: -* Windows:: -* Cross compilation for Windows with Linux:: -* Mac OS X:: -* Make targets:: -@end menu - -@node Linux/Unix -@section Linux/Unix - -@subsection Compilation - -First you must decompress the sources: -@example -cd /tmp -tar zxvf qemu-x.y.z.tar.gz -cd qemu-x.y.z -@end example - -Then you configure QEMU and build it (usually no options are needed): -@example -./configure -make -@end example - -Then type as root user: -@example -make install -@end example -to install QEMU in @file{/usr/local}. - -@node Windows -@section Windows - -@itemize -@item Install the current versions of MSYS and MinGW from -@url{http://www.mingw.org/}. You can find detailed installation -instructions in the download section and the FAQ. - -@item Download -the MinGW development library of SDL 1.2.x -(@file{SDL-devel-1.2.x-@/mingw32.tar.gz}) from -@url{http://www.libsdl.org}. Unpack it in a temporary place and -edit the @file{sdl-config} script so that it gives the -correct SDL directory when invoked. - -@item Install the MinGW version of zlib and make sure -@file{zlib.h} and @file{libz.dll.a} are in -MinGW's default header and linker search paths. - -@item Extract the current version of QEMU. - -@item Start the MSYS shell (file @file{msys.bat}). - -@item Change to the QEMU directory. Launch @file{./configure} and -@file{make}. If you have problems using SDL, verify that -@file{sdl-config} can be launched from the MSYS command line. - -@item You can install QEMU in @file{Program Files/QEMU} by typing -@file{make install}. Don't forget to copy @file{SDL.dll} in -@file{Program Files/QEMU}. - -@end itemize - -@node Cross compilation for Windows with Linux -@section Cross compilation for Windows with Linux - -@itemize -@item -Install the MinGW cross compilation tools available at -@url{http://www.mingw.org/}. - -@item Download -the MinGW development library of SDL 1.2.x -(@file{SDL-devel-1.2.x-@/mingw32.tar.gz}) from -@url{http://www.libsdl.org}. Unpack it in a temporary place and -edit the @file{sdl-config} script so that it gives the -correct SDL directory when invoked. Set up the @code{PATH} environment -variable so that @file{sdl-config} can be launched by -the QEMU configuration script. - -@item Install the MinGW version of zlib and make sure -@file{zlib.h} and @file{libz.dll.a} are in -MinGW's default header and linker search paths. - -@item -Configure QEMU for Windows cross compilation: -@example -PATH=/usr/i686-pc-mingw32/sys-root/mingw/bin:$PATH ./configure --cross-prefix='i686-pc-mingw32-' -@end example -The example assumes @file{sdl-config} is installed under @file{/usr/i686-pc-mingw32/sys-root/mingw/bin} and -MinGW cross compilation tools have names like @file{i686-pc-mingw32-gcc} and @file{i686-pc-mingw32-strip}. -We set the @code{PATH} environment variable to ensure the MinGW version of @file{sdl-config} is used and -use --cross-prefix to specify the name of the cross compiler. -You can also use --prefix to set the Win32 install path which defaults to @file{c:/Program Files/QEMU}. - -Under Fedora Linux, you can run: -@example -yum -y install mingw32-gcc mingw32-SDL mingw32-zlib -@end example -to get a suitable cross compilation environment. - -@item You can install QEMU in the installation directory by typing -@code{make install}. Don't forget to copy @file{SDL.dll} and @file{zlib1.dll} into the -installation directory. - -@end itemize - -Wine can be used to launch the resulting qemu-system-i386.exe -and all other qemu-system-@var{target}.exe compiled for Win32. - -@node Mac OS X -@section Mac OS X - -System Requirements: -@itemize -@item Mac OS 10.5 or higher -@item The clang compiler shipped with Xcode 4.2 or higher, -or GCC 4.3 or higher -@end itemize - -Additional Requirements (install in order): -@enumerate -@item libffi: @uref{https://sourceware.org/libffi/} -@item gettext: @uref{http://www.gnu.org/software/gettext/} -@item glib: @uref{http://ftp.gnome.org/pub/GNOME/sources/glib/} -@item pkg-config: @uref{http://www.freedesktop.org/wiki/Software/pkg-config/} -@item autoconf: @uref{http://www.gnu.org/software/autoconf/autoconf.html} -@item automake: @uref{http://www.gnu.org/software/automake/} -@item pixman: @uref{http://www.pixman.org/} -@end enumerate - -* You may find it easiest to get these from a third-party packager -such as Homebrew, Macports, or Fink. - -After downloading the QEMU source code, double-click it to expand it. - -Then configure and make QEMU: -@example -./configure -make -@end example - -If you have a recent version of Mac OS X (OSX 10.7 or better -with Xcode 4.2 or better) we recommend building QEMU with the -default compiler provided by Apple, for your version of Mac OS X -(which will be 'clang'). The configure script will -automatically pick this. -Note: If after the configure step you see a message like this: -@example -ERROR: Your compiler does not support the __thread specifier for - Thread-Local Storage (TLS). Please upgrade to a version that does. -@end example -you may have to build your own version of gcc from source. Expect that to take -several hours. More information can be found here: -@uref{https://gcc.gnu.org/install/} @* - -These are some of the third party binaries of gcc available for download: -@itemize -@item Homebrew: @uref{http://brew.sh/} -@item @uref{https://www.litebeam.net/gcc/gcc_472.pkg} -@item @uref{http://www.macports.org/ports.php?by=name&substr=gcc} -@end itemize - -You can have several versions of GCC on your system. To specify a certain version, -use the --cc and --cxx options. -@example -./configure --cxx=<path of your c++ compiler> --cc=<path of your c compiler> <other options> -@end example - -@node Make targets -@section Make targets - -@table @code - -@item make -@item make all -Make everything which is typically needed. - -@item install -TODO - -@item install-doc -TODO - -@item make clean -Remove most files which were built during make. - -@item make distclean -Remove everything which was built during make. - -@item make dvi -@item make html -@item make info -@item make pdf -Create documentation in dvi, html, info or pdf format. - -@item make cscope -TODO - -@item make defconfig -(Re-)create some build configuration files. -User made changes will be overwritten. - -@item tar -@item tarbin -TODO - -@end table +@include qemu-tech.texi @node License @appendix License diff --git a/qemu-nbd.c b/qemu-nbd.c index 99297a556f..705b95ec29 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -901,6 +901,14 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } + if (dev_offset >= fd_size) { + error_report("Offset (%lld) has to be smaller than the image size " + "(%lld)", + (long long int)dev_offset, (long long int)fd_size); + exit(EXIT_FAILURE); + } + fd_size -= dev_offset; + if (partition != -1) { ret = find_partition(blk, partition, &dev_offset, &fd_size); if (ret < 0) { diff --git a/qemu-tech.texi b/qemu-tech.texi index 1b048cb337..52a56ae25e 100644 --- a/qemu-tech.texi +++ b/qemu-tech.texi @@ -1,146 +1,27 @@ -\input texinfo @c -*- texinfo -*- -@c %**start of header -@setfilename qemu-tech.info - -@documentlanguage en -@documentencoding UTF-8 - -@settitle QEMU Internals -@exampleindent 0 -@paragraphindent 0 -@c %**end of header - -@ifinfo -@direntry -* QEMU Internals: (qemu-tech). The QEMU Emulator Internals. -@end direntry -@end ifinfo - -@iftex -@titlepage -@sp 7 -@center @titlefont{QEMU Internals} -@sp 3 -@end titlepage -@end iftex - -@ifnottex -@node Top -@top +@node Implementation notes +@appendix Implementation notes @menu -* Introduction:: -* QEMU Internals:: -* Regression Tests:: -* Index:: +* CPU emulation:: +* Translator Internals:: +* QEMU compared to other emulators:: +* Bibliography:: @end menu -@end ifnottex - -@contents -@node Introduction -@chapter Introduction +@node CPU emulation +@section CPU emulation @menu -* intro_features:: Features -* intro_x86_emulation:: x86 and x86-64 emulation -* intro_arm_emulation:: ARM emulation -* intro_mips_emulation:: MIPS emulation -* intro_ppc_emulation:: PowerPC emulation -* intro_sparc_emulation:: Sparc32 and Sparc64 emulation -* intro_xtensa_emulation:: Xtensa emulation -* intro_other_emulation:: Other CPU emulation +* x86:: x86 and x86-64 emulation +* ARM:: ARM emulation +* MIPS:: MIPS emulation +* PPC:: PowerPC emulation +* SPARC:: Sparc32 and Sparc64 emulation +* Xtensa:: Xtensa emulation @end menu -@node intro_features -@section Features - -QEMU is a FAST! processor emulator using a portable dynamic -translator. - -QEMU has two operating modes: - -@itemize @minus - -@item -Full system emulation. In this mode (full platform virtualization), -QEMU emulates a full system (usually a PC), including a processor and -various peripherals. It can be used to launch several different -Operating Systems at once without rebooting the host machine or to -debug system code. - -@item -User mode emulation. In this mode (application level virtualization), -QEMU can launch processes compiled for one CPU on another CPU, however -the Operating Systems must match. This can be used for example to ease -cross-compilation and cross-debugging. -@end itemize - -As QEMU requires no host kernel driver to run, it is very safe and -easy to use. - -QEMU generic features: - -@itemize - -@item User space only or full system emulation. - -@item Using dynamic translation to native code for reasonable speed. - -@item -Working on x86, x86_64 and PowerPC32/64 hosts. Being tested on ARM, -S390x, Sparc32 and Sparc64. - -@item Self-modifying code support. - -@item Precise exceptions support. - -@item -Floating point library supporting both full software emulation and -native host FPU instructions. - -@end itemize - -QEMU user mode emulation features: -@itemize -@item Generic Linux system call converter, including most ioctls. - -@item clone() emulation using native CPU clone() to use Linux scheduler for threads. - -@item Accurate signal handling by remapping host signals to target signals. -@end itemize - -Linux user emulator (Linux host only) can be used to launch the Wine -Windows API emulator (@url{http://www.winehq.org}). A BSD user emulator for BSD -hosts is under development. It would also be possible to develop a -similar user emulator for Solaris. - -QEMU full system emulation features: -@itemize -@item -QEMU uses a full software MMU for maximum portability. - -@item -QEMU can optionally use an in-kernel accelerator, like kvm. The accelerators -execute some of the guest code natively, while -continuing to emulate the rest of the machine. - -@item -Various hardware devices can be emulated and in some cases, host -devices (e.g. serial and parallel ports, USB, drives) can be used -transparently by the guest Operating System. Host device passthrough -can be used for talking to external physical peripherals (e.g. a -webcam, modem or tape drive). - -@item -Symmetric multiprocessing (SMP) even on a host with a single CPU. On a -SMP host system, QEMU can use only one CPU fully due to difficulty in -implementing atomic memory accesses efficiently. - -@end itemize - -@node intro_x86_emulation -@section x86 and x86-64 emulation +@node x86 +@subsection x86 and x86-64 emulation QEMU x86 target features: @@ -174,8 +55,8 @@ normal use. @end itemize -@node intro_arm_emulation -@section ARM emulation +@node ARM +@subsection ARM emulation @itemize @@ -187,8 +68,8 @@ normal use. @end itemize -@node intro_mips_emulation -@section MIPS emulation +@node MIPS +@subsection MIPS emulation @itemize @@ -214,8 +95,8 @@ Current QEMU limitations: @end itemize -@node intro_ppc_emulation -@section PowerPC emulation +@node PPC +@subsection PowerPC emulation @itemize @@ -226,8 +107,8 @@ FPU and MMU. @end itemize -@node intro_sparc_emulation -@section Sparc32 and Sparc64 emulation +@node SPARC +@subsection Sparc32 and Sparc64 emulation @itemize @@ -254,8 +135,8 @@ Current QEMU limitations: @end itemize -@node intro_xtensa_emulation -@section Xtensa emulation +@node Xtensa +@subsection Xtensa emulation @itemize @@ -279,96 +160,8 @@ may be created from overlay with minimal amount of hand-written code. @end itemize -@node intro_other_emulation -@section Other CPU emulation - -In addition to the above, QEMU supports emulation of other CPUs with -varying levels of success. These are: - -@itemize - -@item -Alpha -@item -CRIS -@item -M68k -@item -SH4 -@end itemize - -@node QEMU Internals -@chapter QEMU Internals - -@menu -* QEMU compared to other emulators:: -* Portable dynamic translation:: -* Condition code optimisations:: -* CPU state optimisations:: -* Translation cache:: -* Direct block chaining:: -* Self-modifying code and translated code invalidation:: -* Exception support:: -* MMU emulation:: -* Device emulation:: -* Hardware interrupts:: -* User emulation specific details:: -* Bibliography:: -@end menu - -@node QEMU compared to other emulators -@section QEMU compared to other emulators - -Like bochs [1], QEMU emulates an x86 CPU. But QEMU is much faster than -bochs as it uses dynamic compilation. Bochs is closely tied to x86 PC -emulation while QEMU can emulate several processors. - -Like Valgrind [2], QEMU does user space emulation and dynamic -translation. Valgrind is mainly a memory debugger while QEMU has no -support for it (QEMU could be used to detect out of bound memory -accesses as Valgrind, but it has no support to track uninitialised data -as Valgrind does). The Valgrind dynamic translator generates better code -than QEMU (in particular it does register allocation) but it is closely -tied to an x86 host and target and has no support for precise exceptions -and system emulation. - -EM86 [3] is the closest project to user space QEMU (and QEMU still uses -some of its code, in particular the ELF file loader). EM86 was limited -to an alpha host and used a proprietary and slow interpreter (the -interpreter part of the FX!32 Digital Win32 code translator [4]). - -TWIN from Willows Software was a Windows API emulator like Wine. It is less -accurate than Wine but includes a protected mode x86 interpreter to launch -x86 Windows executables. Such an approach has greater potential because most -of the Windows API is executed natively but it is far more difficult to -develop because all the data structures and function parameters exchanged -between the API and the x86 code must be converted. - -User mode Linux [5] was the only solution before QEMU to launch a -Linux kernel as a process while not needing any host kernel -patches. However, user mode Linux requires heavy kernel patches while -QEMU accepts unpatched Linux kernels. The price to pay is that QEMU is -slower. - -The Plex86 [6] PC virtualizer is done in the same spirit as the now -obsolete qemu-fast system emulator. It requires a patched Linux kernel -to work (you cannot launch the same kernel on your PC), but the -patches are really small. As it is a PC virtualizer (no emulation is -done except for some privileged instructions), it has the potential of -being faster than QEMU. The downside is that a complicated (and -potentially unsafe) host kernel patch is needed. - -The commercial PC Virtualizers (VMWare [7], VirtualPC [8]) are faster -than QEMU (without virtualization), but they all need specific, proprietary -and potentially unsafe host drivers. Moreover, they are unable to -provide cycle exact simulation as an emulator can. - -VirtualBox [9], Xen [10] and KVM [11] are based on QEMU. QEMU-SystemC -[12] uses QEMU to simulate a system where some hardware devices are -developed in SystemC. - -@node Portable dynamic translation -@section Portable dynamic translation +@node Translator Internals +@section Translator Internals QEMU is a dynamic translator. When it first encounters a piece of code, it converts it to the host instruction set. Usually dynamic translators @@ -376,68 +169,26 @@ are very complicated and highly CPU dependent. QEMU uses some tricks which make it relatively easily portable and simple while achieving good performances. -After the release of version 0.9.1, QEMU switched to a new method of -generating code, Tiny Code Generator or TCG. TCG relaxes the -dependency on the exact version of the compiler used. The basic idea -is to split every target instruction into a couple of RISC-like TCG -ops (see @code{target-i386/translate.c}). Some optimizations can be -performed at this stage, including liveness analysis and trivial -constant expression evaluation. TCG ops are then implemented in the -host CPU back end, also known as TCG target (see -@code{tcg/i386/tcg-target.inc.c}). For more information, please take a -look at @code{tcg/README}. - -@node Condition code optimisations -@section Condition code optimisations - -Lazy evaluation of CPU condition codes (@code{EFLAGS} register on x86) -is important for CPUs where every instruction sets the condition -codes. It tends to be less important on conventional RISC systems -where condition codes are only updated when explicitly requested. On -Sparc64, costly update of both 32 and 64 bit condition codes can be -avoided with lazy evaluation. - -Instead of computing the condition codes after each x86 instruction, -QEMU just stores one operand (called @code{CC_SRC}), the result -(called @code{CC_DST}) and the type of operation (called -@code{CC_OP}). When the condition codes are needed, the condition -codes can be calculated using this information. In addition, an -optimized calculation can be performed for some instruction types like -conditional branches. - -@code{CC_OP} is almost never explicitly set in the generated code -because it is known at translation time. - -The lazy condition code evaluation is used on x86, m68k, cris and -Sparc. ARM uses a simplified variant for the N and Z flags. - -@node CPU state optimisations -@section CPU state optimisations +QEMU's dynamic translation backend is called TCG, for "Tiny Code +Generator". For more information, please take a look at @code{tcg/README}. + +Some notable features of QEMU's dynamic translator are: + +@table @strong +@item CPU state optimisations: The target CPUs have many internal states which change the way it evaluates instructions. In order to achieve a good speed, the translation phase considers that some state information of the virtual CPU cannot change in it. The state is recorded in the Translation Block (TB). If the state changes (e.g. privilege level), a new TB will be generated and the previous TB won't be used anymore until the state -matches the state recorded in the previous TB. For example, if the SS, +matches the state recorded in the previous TB. The same idea can be applied +to other aspects of the CPU state. For example, on x86, if the SS, DS and ES segments have a zero base, then the translator does not even generate an addition for the segment base. -[The FPU stack pointer register is not handled that way yet]. - -@node Translation cache -@section Translation cache - -A 32 MByte cache holds the most recently used translations. For -simplicity, it is completely flushed when it is full. A translation unit -contains just a single basic block (a block of x86 instructions -terminated by a jump or by a virtual CPU state change which the -translator cannot deduce statically). - -@node Direct block chaining -@section Direct block chaining - +@item Direct block chaining: After each translated basic block is executed, QEMU uses the simulated Program Counter (PC) and other cpu state information (such as the CS segment base value) to find the next basic block. @@ -451,18 +202,17 @@ it easier to make the jump target modification atomic. On some host architectures (such as x86 or PowerPC), the @code{JUMP} opcode is directly patched so that the block chaining has no overhead. -@node Self-modifying code and translated code invalidation -@section Self-modifying code and translated code invalidation - +@item Self-modifying code and translated code invalidation: Self-modifying code is a special challenge in x86 emulation because no instruction cache invalidation is signaled by the application when code is modified. -When translated code is generated for a basic block, the corresponding -host page is write protected if it is not already read-only. Then, if -a write access is done to the page, Linux raises a SEGV signal. QEMU -then invalidates all the translated code in the page and enables write -accesses to the page. +User-mode emulation marks a host page as write-protected (if it is +not already read-only) every time translated code is generated for a +basic block. Then, if a write access is done to the page, Linux raises +a SEGV signal. QEMU then invalidates all the translated code in the page +and enables write accesses to the page. For system emulation, write +protection is achieved through the software MMU. Correct translated code invalidation is done efficiently by maintaining a linked list of every translated block contained in a given page. Other @@ -474,132 +224,95 @@ necessary. However, QEMU still requires that the generated code always matches the target instructions in memory in order to handle exceptions correctly. -@node Exception support -@section Exception support - +@item Exception support: longjmp() is used when an exception such as division by zero is encountered. The host SIGSEGV and SIGBUS signal handlers are used to get invalid -memory accesses. The simulated program counter is found by -retranslating the corresponding basic block and by looking where the -host program counter was at the exception point. - -The virtual CPU cannot retrieve the exact @code{EFLAGS} register because -in some cases it is not computed because of condition code -optimisations. It is not a big concern because the emulated code can -still be restarted in any cases. - -@node MMU emulation -@section MMU emulation - -For system emulation QEMU supports a soft MMU. In that mode, the MMU +memory accesses. QEMU keeps a map from host program counter to +target program counter, and looks up where the exception happened +based on the host program counter at the exception point. + +On some targets, some bits of the virtual CPU's state are not flushed to the +memory until the end of the translation block. This is done for internal +emulation state that is rarely accessed directly by the program and/or changes +very often throughout the execution of a translation block---this includes +condition codes on x86, delay slots on SPARC, conditional execution on +ARM, and so on. This state is stored for each target instruction, and +looked up on exceptions. + +@item MMU emulation: +For system emulation QEMU uses a software MMU. In that mode, the MMU virtual to physical address translation is done at every memory -access. QEMU uses an address translation cache to speed up the -translation. +access. +QEMU uses an address translation cache (TLB) to speed up the translation. In order to avoid flushing the translated code each time the MMU -mappings change, QEMU uses a physically indexed translation cache. It +mappings change, all caches in QEMU are physically indexed. This means that each basic block is indexed with its physical address. -When MMU mappings change, only the chaining of the basic blocks is -reset (i.e. a basic block can no longer jump directly to another one). - -@node Device emulation -@section Device emulation - -Systems emulated by QEMU are organized by boards. At initialization -phase, each board instantiates a number of CPUs, devices, RAM and -ROM. Each device in turn can assign I/O ports or memory areas (for -MMIO) to its handlers. When the emulation starts, an access to the -ports or MMIO memory areas assigned to the device causes the -corresponding handler to be called. - -RAM and ROM are handled more optimally, only the offset to the host -memory needs to be added to the guest address. - -The video RAM of VGA and other display cards is special: it can be -read or written directly like RAM, but write accesses cause the memory -to be marked with VGA_DIRTY flag as well. +In order to avoid invalidating the basic block chain when MMU mappings +change, chaining is only performed when the destination of the jump +shares a page with the basic block that is performing the jump. -QEMU supports some device classes like serial and parallel ports, USB, -drives and network devices, by providing APIs for easier connection to -the generic, higher level implementations. The API hides the -implementation details from the devices, like native device use or -advanced block device formats like QCOW. - -Usually the devices implement a reset method and register support for -saving and loading of the device state. The devices can also use -timers, especially together with the use of bottom halves (BHs). - -@node Hardware interrupts -@section Hardware interrupts - -In order to be faster, QEMU does not check at every basic block if a -hardware interrupt is pending. Instead, the user must asynchronously -call a specific function to tell that an interrupt is pending. This -function resets the chaining of the currently executing basic -block. It ensures that the execution will return soon in the main loop -of the CPU emulator. Then the main loop can test if the interrupt is -pending and handle it. - -@node User emulation specific details -@section User emulation specific details - -@subsection Linux system call translation - -QEMU includes a generic system call translator for Linux. It means that -the parameters of the system calls can be converted to fix the -endianness and 32/64 bit issues. The IOCTLs are converted with a generic -type description system (see @file{ioctls.h} and @file{thunk.c}). - -QEMU supports host CPUs which have pages bigger than 4KB. It records all -the mappings the process does and try to emulated the @code{mmap()} -system calls in cases where the host @code{mmap()} call would fail -because of bad page alignment. - -@subsection Linux signals - -Normal and real-time signals are queued along with their information -(@code{siginfo_t}) as it is done in the Linux kernel. Then an interrupt -request is done to the virtual CPU. When it is interrupted, one queued -signal is handled by generating a stack frame in the virtual CPU as the -Linux kernel does. The @code{sigreturn()} system call is emulated to return -from the virtual signal handler. +The MMU can also distinguish RAM and ROM memory areas from MMIO memory +areas. Access is faster for RAM and ROM because the translation cache also +hosts the offset between guest address and host memory. Accessing MMIO +memory areas instead calls out to C code for device emulation. +Finally, the MMU helps tracking dirty pages and pages pointed to by +translation blocks. +@end table -Some signals (such as SIGALRM) directly come from the host. Other -signals are synthesized from the virtual CPU exceptions such as SIGFPE -when a division by zero is done (see @code{main.c:cpu_loop()}). +@node QEMU compared to other emulators +@section QEMU compared to other emulators -The blocked signal mask is still handled by the host Linux kernel so -that most signal system calls can be redirected directly to the host -Linux kernel. Only the @code{sigaction()} and @code{sigreturn()} system -calls need to be fully emulated (see @file{signal.c}). +Like bochs [1], QEMU emulates an x86 CPU. But QEMU is much faster than +bochs as it uses dynamic compilation. Bochs is closely tied to x86 PC +emulation while QEMU can emulate several processors. -@subsection clone() system call and threads +Like Valgrind [2], QEMU does user space emulation and dynamic +translation. Valgrind is mainly a memory debugger while QEMU has no +support for it (QEMU could be used to detect out of bound memory +accesses as Valgrind, but it has no support to track uninitialised data +as Valgrind does). The Valgrind dynamic translator generates better code +than QEMU (in particular it does register allocation) but it is closely +tied to an x86 host and target and has no support for precise exceptions +and system emulation. -The Linux clone() system call is usually used to create a thread. QEMU -uses the host clone() system call so that real host threads are created -for each emulated thread. One virtual CPU instance is created for each -thread. +EM86 [3] is the closest project to user space QEMU (and QEMU still uses +some of its code, in particular the ELF file loader). EM86 was limited +to an alpha host and used a proprietary and slow interpreter (the +interpreter part of the FX!32 Digital Win32 code translator [4]). -The virtual x86 CPU atomic operations are emulated with a global lock so -that their semantic is preserved. +TWIN from Willows Software was a Windows API emulator like Wine. It is less +accurate than Wine but includes a protected mode x86 interpreter to launch +x86 Windows executables. Such an approach has greater potential because most +of the Windows API is executed natively but it is far more difficult to +develop because all the data structures and function parameters exchanged +between the API and the x86 code must be converted. -Note that currently there are still some locking issues in QEMU. In -particular, the translated cache flush is not protected yet against -reentrancy. +User mode Linux [5] was the only solution before QEMU to launch a +Linux kernel as a process while not needing any host kernel +patches. However, user mode Linux requires heavy kernel patches while +QEMU accepts unpatched Linux kernels. The price to pay is that QEMU is +slower. -@subsection Self-virtualization +The Plex86 [6] PC virtualizer is done in the same spirit as the now +obsolete qemu-fast system emulator. It requires a patched Linux kernel +to work (you cannot launch the same kernel on your PC), but the +patches are really small. As it is a PC virtualizer (no emulation is +done except for some privileged instructions), it has the potential of +being faster than QEMU. The downside is that a complicated (and +potentially unsafe) host kernel patch is needed. -QEMU was conceived so that ultimately it can emulate itself. Although -it is not very useful, it is an important test to show the power of the -emulator. +The commercial PC Virtualizers (VMWare [7], VirtualPC [8]) are faster +than QEMU (without virtualization), but they all need specific, proprietary +and potentially unsafe host drivers. Moreover, they are unable to +provide cycle exact simulation as an emulator can. -Achieving self-virtualization is not easy because there may be address -space conflicts. QEMU user emulators solve this problem by being an -executable ELF shared object as the ld-linux.so ELF interpreter. That -way, it can be relocated at load time. +VirtualBox [9], Xen [10] and KVM [11] are based on QEMU. QEMU-SystemC +[12] uses QEMU to simulate a system where some hardware devices are +developed in SystemC. @node Bibliography @section Bibliography @@ -656,43 +369,3 @@ Kernel Based Virtual Machine (KVM). QEMU-SystemC, a hardware co-simulator. @end table - -@node Regression Tests -@chapter Regression Tests - -In the directory @file{tests/}, various interesting testing programs -are available. They are used for regression testing. - -@menu -* test-i386:: -* linux-test:: -@end menu - -@node test-i386 -@section @file{test-i386} - -This program executes most of the 16 bit and 32 bit x86 instructions and -generates a text output. It can be compared with the output obtained with -a real CPU or another emulator. The target @code{make test} runs this -program and a @code{diff} on the generated output. - -The Linux system call @code{modify_ldt()} is used to create x86 selectors -to test some 16 bit addressing and 32 bit with segmentation cases. - -The Linux system call @code{vm86()} is used to test vm86 emulation. - -Various exceptions are raised to test most of the x86 user space -exception reporting. - -@node linux-test -@section @file{linux-test} - -This program tests various Linux system calls. It is used to verify -that the system call parameters are correctly converted between target -and host CPUs. - -@node Index -@chapter Index -@printindex cp - -@bye @@ -171,10 +171,8 @@ SectionEnd Section "Documentation" SectionDoc SetOutPath "$INSTDIR" File "${BINDIR}\qemu-doc.html" - File "${BINDIR}\qemu-tech.html" CreateDirectory "$SMPROGRAMS\${PRODUCT}" CreateShortCut "$SMPROGRAMS\${PRODUCT}\User Documentation.lnk" "$INSTDIR\qemu-doc.html" "" "$INSTDIR\qemu-doc.html" 0 - CreateShortCut "$SMPROGRAMS\${PRODUCT}\Technical Documentation.lnk" "$INSTDIR\qemu-tech.html" "" "$INSTDIR\qemu-tech.html" 0 SectionEnd !endif @@ -219,7 +217,6 @@ Section "Uninstall" Delete "$INSTDIR\qemu.exe" Delete "$INSTDIR\qemu-system-*.exe" Delete "$INSTDIR\qemu-doc.html" - Delete "$INSTDIR\qemu-tech.html" RMDir /r "$INSTDIR\keymaps" RMDir /r "$INSTDIR\share" ; Remove generated files diff --git a/qga/commands.c b/qga/commands.c index 50fd26a817..edd3e830e6 100644 --- a/qga/commands.c +++ b/qga/commands.c @@ -16,6 +16,7 @@ #include "qapi/qmp/qerror.h" #include "qemu/base64.h" #include "qemu/cutils.h" +#include "qemu/atomic.h" /* Maximum captured guest-exec out_data/err_data - 16MB */ #define GUEST_EXEC_MAX_OUTPUT (16*1024*1024) @@ -82,7 +83,7 @@ struct GuestExecIOData { guchar *data; gsize size; gsize length; - gint closed; + bool closed; bool truncated; const char *name; }; @@ -93,7 +94,7 @@ struct GuestExecInfo { int64_t pid_numeric; gint status; bool has_output; - gint finished; + bool finished; GuestExecIOData in; GuestExecIOData out; GuestExecIOData err; @@ -156,13 +157,13 @@ GuestExecStatus *qmp_guest_exec_status(int64_t pid, Error **err) ges = g_new0(GuestExecStatus, 1); - bool finished = g_atomic_int_get(&gei->finished); + bool finished = atomic_mb_read(&gei->finished); /* need to wait till output channels are closed * to be sure we captured all output at this point */ if (gei->has_output) { - finished = finished && g_atomic_int_get(&gei->out.closed); - finished = finished && g_atomic_int_get(&gei->err.closed); + finished = finished && atomic_mb_read(&gei->out.closed); + finished = finished && atomic_mb_read(&gei->err.closed); } ges->exited = finished; @@ -264,7 +265,7 @@ static void guest_exec_child_watch(GPid pid, gint status, gpointer data) (int32_t)gpid_to_int64(pid), (uint32_t)status); gei->status = status; - gei->finished = true; + atomic_mb_set(&gei->finished, true); g_spawn_close_pid(pid); } @@ -320,7 +321,7 @@ static gboolean guest_exec_input_watch(GIOChannel *ch, done: g_io_channel_shutdown(ch, true, NULL); g_io_channel_unref(ch); - g_atomic_int_set(&p->closed, 1); + atomic_mb_set(&p->closed, true); g_free(p->data); return false; @@ -374,7 +375,7 @@ static gboolean guest_exec_output_watch(GIOChannel *ch, close: g_io_channel_shutdown(ch, true, NULL); g_io_channel_unref(ch); - g_atomic_int_set(&p->closed, 1); + atomic_mb_set(&p->closed, true); return false; } @@ -120,10 +120,10 @@ void cpu_reset_interrupt(CPUState *cpu, int mask) void cpu_exit(CPUState *cpu) { - cpu->exit_request = 1; + atomic_set(&cpu->exit_request, 1); /* Ensure cpu_exec will see the exit request after TCG has exited. */ smp_wmb(); - cpu->tcg_exit_req = 1; + atomic_set(&cpu->tcg_exit_req, 1); } int cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu, @@ -253,6 +253,7 @@ void cpu_reset(CPUState *cpu) static void cpu_common_reset(CPUState *cpu) { CPUClass *cc = CPU_GET_CLASS(cpu); + int i; if (qemu_loglevel_mask(CPU_LOG_RESET)) { qemu_log("CPU Reset (CPU %d)\n", cpu->cpu_index); @@ -268,7 +269,10 @@ static void cpu_common_reset(CPUState *cpu) cpu->can_do_io = 1; cpu->exception_index = -1; cpu->crash_occurred = false; - memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *)); + + for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) { + atomic_set(&cpu->tb_jmp_cache[i], NULL); + } } static bool cpu_common_has_work(CPUState *cs) diff --git a/qom/object.c b/qom/object.c index 8166b7dace..7a05e35ed9 100644 --- a/qom/object.c +++ b/qom/object.c @@ -614,7 +614,7 @@ Object *object_dynamic_cast_assert(Object *obj, const char *typename, Object *inst; for (i = 0; obj && i < OBJECT_CLASS_CAST_CACHE; i++) { - if (obj->class->object_cast_cache[i] == typename) { + if (atomic_read(&obj->class->object_cast_cache[i]) == typename) { goto out; } } @@ -631,10 +631,10 @@ Object *object_dynamic_cast_assert(Object *obj, const char *typename, if (obj && obj == inst) { for (i = 1; i < OBJECT_CLASS_CAST_CACHE; i++) { - obj->class->object_cast_cache[i - 1] = - obj->class->object_cast_cache[i]; + atomic_set(&obj->class->object_cast_cache[i - 1], + atomic_read(&obj->class->object_cast_cache[i])); } - obj->class->object_cast_cache[i - 1] = typename; + atomic_set(&obj->class->object_cast_cache[i - 1], typename); } out: @@ -704,7 +704,7 @@ ObjectClass *object_class_dynamic_cast_assert(ObjectClass *class, int i; for (i = 0; class && i < OBJECT_CLASS_CAST_CACHE; i++) { - if (class->class_cast_cache[i] == typename) { + if (atomic_read(&class->class_cast_cache[i]) == typename) { ret = class; goto out; } @@ -725,9 +725,10 @@ ObjectClass *object_class_dynamic_cast_assert(ObjectClass *class, #ifdef CONFIG_QOM_CAST_DEBUG if (class && ret == class) { for (i = 1; i < OBJECT_CLASS_CAST_CACHE; i++) { - class->class_cast_cache[i - 1] = class->class_cast_cache[i]; + atomic_set(&class->class_cast_cache[i - 1], + atomic_read(&class->class_cast_cache[i])); } - class->class_cast_cache[i - 1] = typename; + atomic_set(&class->class_cast_cache[i - 1], typename); } out: #endif diff --git a/target-cris/cpu.h b/target-cris/cpu.h index 7d7fe6eb1c..43d5f9d1da 100644 --- a/target-cris/cpu.h +++ b/target-cris/cpu.h @@ -223,6 +223,13 @@ int cpu_cris_signal_handler(int host_signum, void *pinfo, void cris_initialize_tcg(void); void cris_initialize_crisv10_tcg(void); +/* Instead of computing the condition codes after each CRIS instruction, + * QEMU just stores one operand (called CC_SRC), the result + * (called CC_DEST) and the type of operation (called CC_OP). When the + * condition codes are needed, the condition codes can be calculated + * using this information. Condition codes are not generated if they + * are only needed for conditional branches. + */ enum { CC_OP_DYNAMIC, /* Use env->cc_op */ CC_OP_FLAGS, diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 1cb32ae456..e64569854f 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -698,6 +698,13 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; /* Use a clearer name for this. */ #define CPU_INTERRUPT_INIT CPU_INTERRUPT_RESET +/* Instead of computing the condition codes after each x86 instruction, + * QEMU just stores one operand (called CC_SRC), the result + * (called CC_DST) and the type of operation (called CC_OP). When the + * condition codes are needed, the condition codes can be calculated + * using this information. Condition codes are not generated if they + * are only needed for conditional branches. + */ typedef enum { CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ CC_OP_EFLAGS, /* all cc are explicitly computed, CC_SRC = flags */ diff --git a/target-m68k/cpu.h b/target-m68k/cpu.h index c2d40cb1cc..471f490dc1 100644 --- a/target-m68k/cpu.h +++ b/target-m68k/cpu.h @@ -154,6 +154,14 @@ int cpu_m68k_signal_handler(int host_signum, void *pinfo, void *puc); void cpu_m68k_flush_flags(CPUM68KState *, int); + +/* Instead of computing the condition codes after each m68k instruction, + * QEMU just stores one operand (called CC_SRC), the result + * (called CC_DEST) and the type of operation (called CC_OP). When the + * condition codes are needed, the condition codes can be calculated + * using this information. Condition codes are not generated if they + * are only needed for conditional branches. + */ enum { CC_OP_DYNAMIC, /* Use env->cc_op */ CC_OP_FLAGS, /* CC_DEST = CVZN, CC_SRC = unused */ diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h index 4fb34b598d..4e58cdee3e 100644 --- a/target-s390x/cpu.h +++ b/target-s390x/cpu.h @@ -671,6 +671,13 @@ ObjectClass *s390_cpu_class_by_name(const char *name); /* CC optimization */ +/* Instead of computing the condition codes after each x86 instruction, + * QEMU just stores the result (called CC_DST), the type of operation + * (called CC_OP) and whatever operands are needed (CC_SRC and possibly + * CC_VR). When the condition codes are needed, the condition codes can + * be calculated using this information. Condition codes are not generated + * if they are only needed for conditional branches. + */ enum cc_op { CC_OP_CONST0 = 0, /* CC is 0 */ CC_OP_CONST1, /* CC is 1 */ diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h index a3d64a4e52..646a103513 100644 --- a/target-sparc/cpu.h +++ b/target-sparc/cpu.h @@ -102,6 +102,11 @@ #define CC_DST (env->cc_dst) #define CC_OP (env->cc_op) +/* Even though lazy evaluation of CPU condition codes tends to be less + * important on RISC systems where condition codes are only updated + * when explicitly requested, SPARC uses it to update 32-bit and 64-bit + * condition codes. + */ enum { CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ CC_OP_FLAGS, /* all cc are back in status register */ diff --git a/tcg/README b/tcg/README index 1d48aa963f..ae31388c59 100644 --- a/tcg/README +++ b/tcg/README @@ -8,6 +8,11 @@ in the QOP code generator written by Paul Brook. 2) Definitions +TCG receives RISC-like "TCG ops" and performs some optimizations on them, +including liveness analysis and trivial constant expression +evaluation. TCG ops are then implemented in the host CPU back end, +also known as the TCG "target". + The TCG "target" is the architecture for which we generate the code. It is of course not the same as the "target" of QEMU which is the emulated architecture. As TCG started as a generic C backend used diff --git a/tcg/optimize.c b/tcg/optimize.c index 9998ac7413..0f1349086b 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -468,9 +468,8 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, default: return 2; } - } else { - return 2; } + return 2; } /* Return 2 if the condition can't be simplified, and the result diff --git a/tests/tcg/README b/tests/tcg/README new file mode 100644 index 0000000000..5dcfb4852b --- /dev/null +++ b/tests/tcg/README @@ -0,0 +1,76 @@ +This directory contains various interesting programs for +regression testing. + +The target "make test" runs the programs and, if applicable, +runs "diff" to detect mismatches between output on the host and +output on QEMU. + +i386 +==== + +test-i386 +--------- + +This program executes most of the 16 bit and 32 bit x86 instructions and +generates a text output, for comparison with the output obtained with +a real CPU or another emulator. + +The Linux system call modify_ldt() is used to create x86 selectors +to test some 16 bit addressing and 32 bit with segmentation cases. + +The Linux system call vm86() is used to test vm86 emulation. + +Various exceptions are raised to test most of the x86 user space +exception reporting. + +linux-test +---------- + +This program tests various Linux system calls. It is used to verify +that the system call parameters are correctly converted between target +and host CPUs. + +test-i386-fprem +--------------- + +runcom +------ + +test-mmap +--------- + +sha1 +---- + +hello-i386 +---------- + + +ARM +=== + +hello-arm +--------- + +test-arm-iwmmxt +--------------- + +MIPS +==== + +hello-mips +---------- + +hello-mipsel +------------ + +CRIS +==== +The testsuite for CRIS is in tests/tcg/cris. You can run it +with "make test-cris". + +LM32 +==== +The testsuite for LM32 is in tests/tcg/cris. You can run it +with "make test-lm32". + diff --git a/tests/test-qht.c b/tests/test-qht.c index 46a64b6731..9b7423abb6 100644 --- a/tests/test-qht.c +++ b/tests/test-qht.c @@ -6,6 +6,7 @@ */ #include "qemu/osdep.h" #include "qemu/qht.h" +#include "qemu/rcu.h" #define N 5000 @@ -51,6 +52,7 @@ static void check(int a, int b, bool expected) struct qht_stats stats; int i; + rcu_read_lock(); for (i = a; i < b; i++) { void *p; uint32_t hash; @@ -61,6 +63,8 @@ static void check(int a, int b, bool expected) p = qht_lookup(&ht, is_equal, &val, hash); g_assert_true(!!p == expected); } + rcu_read_unlock(); + qht_statistics_init(&ht, &stats); if (stats.used_head_buckets) { g_assert_cmpfloat(qdist_avg(&stats.chain), >=, 1.0); diff --git a/ui/cocoa.m b/ui/cocoa.m index ba0e98a297..26d4a1c07f 100644 --- a/ui/cocoa.m +++ b/ui/cocoa.m @@ -814,7 +814,6 @@ QemuCocoaView *cocoaView; - (void)doToggleFullScreen:(id)sender; - (void)toggleFullScreen:(id)sender; - (void)showQEMUDoc:(id)sender; -- (void)showQEMUTec:(id)sender; - (void)zoomToFit:(id) sender; - (void)displayConsole:(id)sender; - (void)pauseQEMU:(id)sender; @@ -998,13 +997,6 @@ QemuCocoaView *cocoaView; [self openDocumentation: @"qemu-doc.html"]; } -- (void)showQEMUTec:(id)sender -{ - COCOA_DEBUG("QemuCocoaAppController: showQEMUTec\n"); - - [self openDocumentation: @"qemu-tech.html"]; -} - /* Stretches video to fit host monitor size */ - (void)zoomToFit:(id) sender { @@ -1335,7 +1327,6 @@ int main (int argc, const char * argv[]) { // Help menu menu = [[NSMenu alloc] initWithTitle:@"Help"]; [menu addItem: [[[NSMenuItem alloc] initWithTitle:@"QEMU Documentation" action:@selector(showQEMUDoc:) keyEquivalent:@"?"] autorelease]]; // QEMU Help - [menu addItem: [[[NSMenuItem alloc] initWithTitle:@"QEMU Technology" action:@selector(showQEMUTec:) keyEquivalent:@""] autorelease]]; // QEMU Help menuItem = [[[NSMenuItem alloc] initWithTitle:@"Window" action:nil keyEquivalent:@""] autorelease]; [menuItem setSubmenu:menu]; [[NSApp mainMenu] addItem:menuItem]; diff --git a/util/oslib-posix.c b/util/oslib-posix.c index aaec1891f5..8ec99ccb4f 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -46,6 +46,7 @@ #ifdef __FreeBSD__ #include <sys/sysctl.h> +#include <libutil.h> #endif #include "qemu/mmap-alloc.h" @@ -434,6 +435,32 @@ int qemu_read_password(char *buf, int buf_size) } +char *qemu_get_pid_name(pid_t pid) +{ + char *name = NULL; + +#if defined(__FreeBSD__) + /* BSDs don't have /proc, but they provide a nice substitute */ + struct kinfo_proc *proc = kinfo_getproc(pid); + + if (proc) { + name = g_strdup(proc->ki_comm); + free(proc); + } +#else + /* Assume a system with reasonable procfs */ + char *pid_path; + size_t len; + + pid_path = g_strdup_printf("/proc/%d/cmdline", pid); + g_file_get_contents(pid_path, &name, &len, NULL); + g_free(pid_path); +#endif + + return name; +} + + pid_t qemu_fork(Error **errp) { sigset_t oldmask, newmask; diff --git a/util/oslib-win32.c b/util/oslib-win32.c index 4c1dcf1e66..d09863cc9d 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -575,6 +575,13 @@ int qemu_read_password(char *buf, int buf_size) } +char *qemu_get_pid_name(pid_t pid) +{ + /* XXX Implement me */ + abort(); +} + + pid_t qemu_fork(Error **errp) { errno = ENOSYS; diff --git a/util/qht.c b/util/qht.c index 16a8d7950e..ff4d2e6974 100644 --- a/util/qht.c +++ b/util/qht.c @@ -133,7 +133,8 @@ struct qht_map { /* trigger a resize when n_added_buckets > n_buckets / div */ #define QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV 8 -static void qht_do_resize(struct qht *ht, struct qht_map *new); +static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, + bool reset); static void qht_grow_maybe(struct qht *ht); #ifdef QHT_DEBUG @@ -379,7 +380,7 @@ static void qht_bucket_reset__locked(struct qht_bucket *head) if (b->pointers[i] == NULL) { goto done; } - b->hashes[i] = 0; + atomic_set(&b->hashes[i], 0); atomic_set(&b->pointers[i], NULL); } b = b->next; @@ -408,12 +409,21 @@ void qht_reset(struct qht *ht) qht_map_unlock_buckets(map); } +static inline void qht_do_resize(struct qht *ht, struct qht_map *new) +{ + qht_do_resize_reset(ht, new, false); +} + +static inline void qht_do_resize_and_reset(struct qht *ht, struct qht_map *new) +{ + qht_do_resize_reset(ht, new, true); +} + bool qht_reset_size(struct qht *ht, size_t n_elems) { - struct qht_map *new; + struct qht_map *new = NULL; struct qht_map *map; size_t n_buckets; - bool resize = false; n_buckets = qht_elems_to_buckets(n_elems); @@ -421,18 +431,11 @@ bool qht_reset_size(struct qht *ht, size_t n_elems) map = ht->map; if (n_buckets != map->n_buckets) { new = qht_map_create(n_buckets); - resize = true; - } - - qht_map_lock_buckets(map); - qht_map_reset__all_locked(map); - if (resize) { - qht_do_resize(ht, new); } - qht_map_unlock_buckets(map); + qht_do_resize_and_reset(ht, new); qemu_mutex_unlock(&ht->lock); - return resize; + return !!new; } static inline @@ -444,7 +447,7 @@ void *qht_do_lookup(struct qht_bucket *head, qht_lookup_func_t func, do { for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { - if (b->hashes[i] == hash) { + if (atomic_read(&b->hashes[i]) == hash) { /* The pointer is dereferenced before seqlock_read_retry, * so (unlike qht_insert__locked) we need to use * atomic_rcu_read here. @@ -538,8 +541,8 @@ static bool qht_insert__locked(struct qht *ht, struct qht_map *map, if (new) { atomic_rcu_set(&prev->next, b); } - b->hashes[i] = hash; /* smp_wmb() implicit in seqlock_write_begin. */ + atomic_set(&b->hashes[i], hash); atomic_set(&b->pointers[i], p); seqlock_write_end(&head->sequence); return true; @@ -561,9 +564,7 @@ static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht) if (qht_map_needs_resize(map)) { struct qht_map *new = qht_map_create(map->n_buckets * 2); - qht_map_lock_buckets(map); qht_do_resize(ht, new); - qht_map_unlock_buckets(map); } qemu_mutex_unlock(&ht->lock); } @@ -607,10 +608,10 @@ qht_entry_move(struct qht_bucket *to, int i, struct qht_bucket *from, int j) qht_debug_assert(to->pointers[i]); qht_debug_assert(from->pointers[j]); - to->hashes[i] = from->hashes[j]; + atomic_set(&to->hashes[i], from->hashes[j]); atomic_set(&to->pointers[i], from->pointers[j]); - from->hashes[j] = 0; + atomic_set(&from->hashes[j], 0); atomic_set(&from->pointers[j], NULL); } @@ -739,24 +740,31 @@ static void qht_map_copy(struct qht *ht, void *p, uint32_t hash, void *userp) } /* - * Call with ht->lock and all bucket locks held. - * - * Creating the @new map here would add unnecessary delay while all the locks - * are held--holding up the bucket locks is particularly bad, since no writes - * can occur while these are held. Thus, we let callers create the new map, - * hopefully without the bucket locks held. + * Atomically perform a resize and/or reset. + * Call with ht->lock held. */ -static void qht_do_resize(struct qht *ht, struct qht_map *new) +static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, bool reset) { struct qht_map *old; old = ht->map; - g_assert_cmpuint(new->n_buckets, !=, old->n_buckets); + qht_map_lock_buckets(old); + if (reset) { + qht_map_reset__all_locked(old); + } + + if (new == NULL) { + qht_map_unlock_buckets(old); + return; + } + + g_assert_cmpuint(new->n_buckets, !=, old->n_buckets); qht_map_iter__all_locked(ht, old, qht_map_copy, new); qht_map_debug__all_locked(new); atomic_rcu_set(&ht->map, new); + qht_map_unlock_buckets(old); call_rcu(old, qht_map_destroy, rcu); } @@ -768,12 +776,9 @@ bool qht_resize(struct qht *ht, size_t n_elems) qemu_mutex_lock(&ht->lock); if (n_buckets != ht->map->n_buckets) { struct qht_map *new; - struct qht_map *old = ht->map; new = qht_map_create(n_buckets); - qht_map_lock_buckets(old); qht_do_resize(ht, new); - qht_map_unlock_buckets(old); ret = true; } qemu_mutex_unlock(&ht->lock); @@ -1675,8 +1675,12 @@ static void qemu_kill_report(void) */ error_report("terminating on signal %d", shutdown_signal); } else { - error_report("terminating on signal %d from pid " FMT_pid, - shutdown_signal, shutdown_pid); + char *shutdown_cmd = qemu_get_pid_name(shutdown_pid); + + error_report("terminating on signal %d from pid " FMT_pid " (%s)", + shutdown_signal, shutdown_pid, + shutdown_cmd ? shutdown_cmd : "<unknown process>"); + g_free(shutdown_cmd); } shutdown_signal = -1; } |