28 files changed, 2709 insertions, 214 deletions
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 114d6578c4..89dd80ef86 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -1,13 +1,20 @@
 util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o
-util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o
-util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o qemu-openpty.o
+util-obj-$(CONFIG_POSIX) += compatfd.o
+util-obj-$(CONFIG_POSIX) += event_notifier-posix.o
+util-obj-$(CONFIG_POSIX) += mmap-alloc.o
+util-obj-$(CONFIG_POSIX) += oslib-posix.o
+util-obj-$(CONFIG_POSIX) += qemu-openpty.o
+util-obj-$(CONFIG_POSIX) += qemu-thread-posix.o
+util-obj-$(CONFIG_WIN32) += event_notifier-win32.o
+util-obj-$(CONFIG_POSIX) += memfd.o
+util-obj-$(CONFIG_WIN32) += oslib-win32.o
+util-obj-$(CONFIG_WIN32) += qemu-thread-win32.o
 util-obj-y += envlist.o path.o module.o
 util-obj-$(call lnot,$(CONFIG_INT128)) += host-utils.o
 util-obj-y += bitmap.o bitops.o hbitmap.o
 util-obj-y += fifo8.o
 util-obj-y += acl.o
 util-obj-y += error.o qemu-error.o
-util-obj-$(CONFIG_POSIX) += compatfd.o
 util-obj-y += id.o
 util-obj-y += iov.o qemu-config.o qemu-sockets.o uri.o notify.o
 util-obj-y += qemu-option.o qemu-progress.o
@@ -18,3 +25,8 @@ util-obj-y += getauxval.o
 util-obj-y += readline.o
 util-obj-y += rfifolock.o
 util-obj-y += rcu.o
+util-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
+util-obj-y += qemu-coroutine-sleep.o
+util-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o
+util-obj-y += buffer.o
+util-obj-y += timed-average.o
diff --git a/util/bitmap.c b/util/bitmap.c
index 300a68e38c..44f0f481be 100644
--- a/util/bitmap.c
+++ b/util/bitmap.c
@@ -14,7 +14,7 @@
 #include "qemu/atomic.h"
 
 /*
- * bitmaps provide an array of bits, implemented using an an
+ * bitmaps provide an array of bits, implemented using an
  * array of unsigned longs.  The number of valid bits in a
  * given bitmap does _not_ need to be an exact multiple of
  * BITS_PER_LONG.
diff --git a/util/buffer.c b/util/buffer.c
new file mode 100644
index 0000000000..8b27c08aac
--- /dev/null
+++ b/util/buffer.c
@@ -0,0 +1,171 @@
+/*
+ * QEMU generic buffers
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/buffer.h"
+#include "trace.h"
+
+#define BUFFER_MIN_INIT_SIZE     4096
+#define BUFFER_MIN_SHRINK_SIZE  65536
+
+/* define the factor alpha for the expentional smoothing
+ * that is used in the average size calculation. a shift
+ * of 7 results in an alpha of 1/2^7. */
+#define BUFFER_AVG_SIZE_SHIFT       7
+
+static size_t buffer_req_size(Buffer *buffer, size_t len)
+{
+    return MAX(BUFFER_MIN_INIT_SIZE,
+               pow2ceil(buffer->offset + len));
+}
+
+static void buffer_adj_size(Buffer *buffer, size_t len)
+{
+    size_t old = buffer->capacity;
+    buffer->capacity = buffer_req_size(buffer, len);
+    buffer->buffer = g_realloc(buffer->buffer, buffer->capacity);
+    trace_buffer_resize(buffer->name ?: "unnamed",
+                        old, buffer->capacity);
+
+    /* make it even harder for the buffer to shrink, reset average size
+     * to currenty capacity if it is larger than the average. */
+    buffer->avg_size = MAX(buffer->avg_size,
+                           buffer->capacity << BUFFER_AVG_SIZE_SHIFT);
+}
+
+void buffer_init(Buffer *buffer, const char *name, ...)
+{
+    va_list ap;
+
+    va_start(ap, name);
+    buffer->name = g_strdup_vprintf(name, ap);
+    va_end(ap);
+}
+
+static uint64_t buffer_get_avg_size(Buffer *buffer)
+{
+    return buffer->avg_size >> BUFFER_AVG_SIZE_SHIFT;
+}
+
+void buffer_shrink(Buffer *buffer)
+{
+    size_t new;
+
+    /* Calculate the average size of the buffer as
+     * avg_size = avg_size * ( 1 - a ) + required_size * a
+     * where a is 1 / 2 ^ BUFFER_AVG_SIZE_SHIFT. */
+    buffer->avg_size *= (1 << BUFFER_AVG_SIZE_SHIFT) - 1;
+    buffer->avg_size >>= BUFFER_AVG_SIZE_SHIFT;
+    buffer->avg_size += buffer_req_size(buffer, 0);
+
+    /* And then only shrink if the average size of the buffer is much
+     * too big, to avoid bumping up & down the buffers all the time.
+     * realloc() isn't exactly cheap ...  */
+    new = buffer_req_size(buffer, buffer_get_avg_size(buffer));
+    if (new < buffer->capacity >> 3 &&
+        new >= BUFFER_MIN_SHRINK_SIZE) {
+        buffer_adj_size(buffer, buffer_get_avg_size(buffer));
+    }
+
+    buffer_adj_size(buffer, 0);
+}
+
+void buffer_reserve(Buffer *buffer, size_t len)
+{
+    if ((buffer->capacity - buffer->offset) < len) {
+        buffer_adj_size(buffer, len);
+    }
+}
+
+gboolean buffer_empty(Buffer *buffer)
+{
+    return buffer->offset == 0;
+}
+
+uint8_t *buffer_end(Buffer *buffer)
+{
+    return buffer->buffer + buffer->offset;
+}
+
+void buffer_reset(Buffer *buffer)
+{
+    buffer->offset = 0;
+    buffer_shrink(buffer);
+}
+
+void buffer_free(Buffer *buffer)
+{
+    trace_buffer_free(buffer->name ?: "unnamed", buffer->capacity);
+    g_free(buffer->buffer);
+    g_free(buffer->name);
+    buffer->offset = 0;
+    buffer->capacity = 0;
+    buffer->buffer = NULL;
+    buffer->name = NULL;
+}
+
+void buffer_append(Buffer *buffer, const void *data, size_t len)
+{
+    memcpy(buffer->buffer + buffer->offset, data, len);
+    buffer->offset += len;
+}
+
+void buffer_advance(Buffer *buffer, size_t len)
+{
+    memmove(buffer->buffer, buffer->buffer + len,
+            (buffer->offset - len));
+    buffer->offset -= len;
+    buffer_shrink(buffer);
+}
+
+void buffer_move_empty(Buffer *to, Buffer *from)
+{
+    trace_buffer_move_empty(to->name ?: "unnamed",
+                            from->offset,
+                            from->name ?: "unnamed");
+    assert(to->offset == 0);
+
+    g_free(to->buffer);
+    to->offset = from->offset;
+    to->capacity = from->capacity;
+    to->buffer = from->buffer;
+
+    from->offset = 0;
+    from->capacity = 0;
+    from->buffer = NULL;
+}
+
+void buffer_move(Buffer *to, Buffer *from)
+{
+    if (to->offset == 0) {
+        buffer_move_empty(to, from);
+        return;
+    }
+
+    trace_buffer_move(to->name ?: "unnamed",
+                      from->offset,
+                      from->name ?: "unnamed");
+    buffer_reserve(to, from->offset);
+    buffer_append(to, from->buffer, from->offset);
+
+    g_free(from->buffer);
+    from->offset = 0;
+    from->capacity = 0;
+    from->buffer = NULL;
+}
diff --git a/util/coroutine-gthread.c b/util/coroutine-gthread.c
new file mode 100644
index 0000000000..0bcd77867d
--- /dev/null
+++ b/util/coroutine-gthread.c
@@ -0,0 +1,198 @@
+/*
+ * GThread coroutine initialization code
+ *
+ * Copyright (C) 2006  Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011  Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <glib.h>
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+typedef struct {
+    Coroutine base;
+    GThread *thread;
+    bool runnable;
+    bool free_on_thread_exit;
+    CoroutineAction action;
+} CoroutineGThread;
+
+static CompatGMutex coroutine_lock;
+static CompatGCond coroutine_cond;
+
+/* GLib 2.31 and beyond deprecated various parts of the thread API,
+ * but the new interfaces are not available in older GLib versions
+ * so we have to cope with both.
+ */
+#if GLIB_CHECK_VERSION(2, 31, 0)
+/* Awkwardly, the GPrivate API doesn't provide a way to update the
+ * GDestroyNotify handler for the coroutine key dynamically. So instead
+ * we track whether or not the CoroutineGThread should be freed on
+ * thread exit / coroutine key update using the free_on_thread_exit
+ * field.
+ */
+static void coroutine_destroy_notify(gpointer data)
+{
+    CoroutineGThread *co = data;
+    if (co && co->free_on_thread_exit) {
+        g_free(co);
+    }
+}
+
+static GPrivate coroutine_key = G_PRIVATE_INIT(coroutine_destroy_notify);
+
+static inline CoroutineGThread *get_coroutine_key(void)
+{
+    return g_private_get(&coroutine_key);
+}
+
+static inline void set_coroutine_key(CoroutineGThread *co,
+                                     bool free_on_thread_exit)
+{
+    /* Unlike g_static_private_set() this does not call the GDestroyNotify
+     * if the previous value of the key was NULL. Fortunately we only need
+     * the GDestroyNotify in the non-NULL key case.
+     */
+    co->free_on_thread_exit = free_on_thread_exit;
+    g_private_replace(&coroutine_key, co);
+}
+
+static inline GThread *create_thread(GThreadFunc func, gpointer data)
+{
+    return g_thread_new("coroutine", func, data);
+}
+
+#else
+
+/* Handle older GLib versions */
+
+static GStaticPrivate coroutine_key = G_STATIC_PRIVATE_INIT;
+
+static inline CoroutineGThread *get_coroutine_key(void)
+{
+    return g_static_private_get(&coroutine_key);
+}
+
+static inline void set_coroutine_key(CoroutineGThread *co,
+                                     bool free_on_thread_exit)
+{
+    g_static_private_set(&coroutine_key, co,
+                         free_on_thread_exit ? (GDestroyNotify)g_free : NULL);
+}
+
+static inline GThread *create_thread(GThreadFunc func, gpointer data)
+{
+    return g_thread_create_full(func, data, 0, TRUE, TRUE,
+                                G_THREAD_PRIORITY_NORMAL, NULL);
+}
+
+#endif
+
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+#if !GLIB_CHECK_VERSION(2, 31, 0)
+    if (!g_thread_supported()) {
+        g_thread_init(NULL);
+    }
+#endif
+}
+
+static void coroutine_wait_runnable_locked(CoroutineGThread *co)
+{
+    while (!co->runnable) {
+        g_cond_wait(&coroutine_cond, &coroutine_lock);
+    }
+}
+
+static void coroutine_wait_runnable(CoroutineGThread *co)
+{
+    g_mutex_lock(&coroutine_lock);
+    coroutine_wait_runnable_locked(co);
+    g_mutex_unlock(&coroutine_lock);
+}
+
+static gpointer coroutine_thread(gpointer opaque)
+{
+    CoroutineGThread *co = opaque;
+
+    set_coroutine_key(co, false);
+    coroutine_wait_runnable(co);
+    co->base.entry(co->base.entry_arg);
+    qemu_coroutine_switch(&co->base, co->base.caller, COROUTINE_TERMINATE);
+    return NULL;
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    CoroutineGThread *co;
+
+    co = g_malloc0(sizeof(*co));
+    co->thread = create_thread(coroutine_thread, co);
+    if (!co->thread) {
+        g_free(co);
+        return NULL;
+    }
+    return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineGThread *co = DO_UPCAST(CoroutineGThread, base, co_);
+
+    g_thread_join(co->thread);
+    g_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_,
+                                      Coroutine *to_,
+                                      CoroutineAction action)
+{
+    CoroutineGThread *from = DO_UPCAST(CoroutineGThread, base, from_);
+    CoroutineGThread *to = DO_UPCAST(CoroutineGThread, base, to_);
+
+    g_mutex_lock(&coroutine_lock);
+    from->runnable = false;
+    from->action = action;
+    to->runnable = true;
+    to->action = action;
+    g_cond_broadcast(&coroutine_cond);
+
+    if (action != COROUTINE_TERMINATE) {
+        coroutine_wait_runnable_locked(from);
+    }
+    g_mutex_unlock(&coroutine_lock);
+    return from->action;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    CoroutineGThread *co = get_coroutine_key();
+    if (!co) {
+        co = g_malloc0(sizeof(*co));
+        co->runnable = true;
+        set_coroutine_key(co, true);
+    }
+
+    return &co->base;
+}
+
+bool qemu_in_coroutine(void)
+{
+    CoroutineGThread *co = get_coroutine_key();
+
+    return co && co->base.caller;
+}
diff --git a/util/coroutine-sigaltstack.c b/util/coroutine-sigaltstack.c
new file mode 100644
index 0000000000..39842a4a90
--- /dev/null
+++ b/util/coroutine-sigaltstack.c
@@ -0,0 +1,293 @@
+/*
+ * sigaltstack coroutine initialization code
+ *
+ * Copyright (C) 2006  Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011  Kevin Wolf <kwolf@redhat.com>
+ * Copyright (C) 2012  Alex Barcelo <abarcelo@ac.upc.edu>
+** This file is partly based on pth_mctx.c, from the GNU Portable Threads
+**  Copyright (c) 1999-2006 Ralf S. Engelschall <rse@engelschall.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include <stdlib.h>
+#include <setjmp.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <signal.h>
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+typedef struct {
+    Coroutine base;
+    void *stack;
+    sigjmp_buf env;
+} CoroutineUContext;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+typedef struct {
+    /** Currently executing coroutine */
+    Coroutine *current;
+
+    /** The default coroutine */
+    CoroutineUContext leader;
+
+    /** Information for the signal handler (trampoline) */
+    sigjmp_buf tr_reenter;
+    volatile sig_atomic_t tr_called;
+    void *tr_handler;
+} CoroutineThreadState;
+
+static pthread_key_t thread_state_key;
+
+static CoroutineThreadState *coroutine_get_thread_state(void)
+{
+    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+    if (!s) {
+        s = g_malloc0(sizeof(*s));
+        s->current = &s->leader.base;
+        pthread_setspecific(thread_state_key, s);
+    }
+    return s;
+}
+
+static void qemu_coroutine_thread_cleanup(void *opaque)
+{
+    CoroutineThreadState *s = opaque;
+
+    g_free(s);
+}
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+    int ret;
+
+    ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
+    if (ret != 0) {
+        fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
+        abort();
+    }
+}
+
+/* "boot" function
+ * This is what starts the coroutine, is called from the trampoline
+ * (from the signal handler when it is not signal handling, read ahead
+ * for more information).
+ */
+static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co)
+{
+    /* Initialize longjmp environment and switch back the caller */
+    if (!sigsetjmp(self->env, 0)) {
+        siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
+    }
+
+    while (true) {
+        co->entry(co->entry_arg);
+        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+    }
+}
+
+/*
+ * This is used as the signal handler. This is called with the brand new stack
+ * (thanks to sigaltstack). We have to return, given that this is a signal
+ * handler and the sigmask and some other things are changed.
+ */
+static void coroutine_trampoline(int signal)
+{
+    CoroutineUContext *self;
+    Coroutine *co;
+    CoroutineThreadState *coTS;
+
+    /* Get the thread specific information */
+    coTS = coroutine_get_thread_state();
+    self = coTS->tr_handler;
+    coTS->tr_called = 1;
+    co = &self->base;
+
+    /*
+     * Here we have to do a bit of a ping pong between the caller, given that
+     * this is a signal handler and we have to do a return "soon". Then the
+     * caller can reestablish everything and do a siglongjmp here again.
+     */
+    if (!sigsetjmp(coTS->tr_reenter, 0)) {
+        return;
+    }
+
+    /*
+     * Ok, the caller has siglongjmp'ed back to us, so now prepare
+     * us for the real machine state switching. We have to jump
+     * into another function here to get a new stack context for
+     * the auto variables (which have to be auto-variables
+     * because the start of the thread happens later). Else with
+     * PIC (i.e. Position Independent Code which is used when PTH
+     * is built as a shared library) most platforms would
+     * horrible core dump as experience showed.
+     */
+    coroutine_bootstrap(self, co);
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    const size_t stack_size = 1 << 20;
+    CoroutineUContext *co;
+    CoroutineThreadState *coTS;
+    struct sigaction sa;
+    struct sigaction osa;
+    stack_t ss;
+    stack_t oss;
+    sigset_t sigs;
+    sigset_t osigs;
+    sigjmp_buf old_env;
+
+    /* The way to manipulate stack is with the sigaltstack function. We
+     * prepare a stack, with it delivering a signal to ourselves and then
+     * put sigsetjmp/siglongjmp where needed.
+     * This has been done keeping coroutine-ucontext as a model and with the
+     * pth ideas (GNU Portable Threads). See coroutine-ucontext for the basics
+     * of the coroutines and see pth_mctx.c (from the pth project) for the
+     * sigaltstack way of manipulating stacks.
+     */
+
+    co = g_malloc0(sizeof(*co));
+    co->stack = g_malloc(stack_size);
+    co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+    coTS = coroutine_get_thread_state();
+    coTS->tr_handler = co;
+
+    /*
+     * Preserve the SIGUSR2 signal state, block SIGUSR2,
+     * and establish our signal handler. The signal will
+     * later transfer control onto the signal stack.
+     */
+    sigemptyset(&sigs);
+    sigaddset(&sigs, SIGUSR2);
+    pthread_sigmask(SIG_BLOCK, &sigs, &osigs);
+    sa.sa_handler = coroutine_trampoline;
+    sigfillset(&sa.sa_mask);
+    sa.sa_flags = SA_ONSTACK;
+    if (sigaction(SIGUSR2, &sa, &osa) != 0) {
+        abort();
+    }
+
+    /*
+     * Set the new stack.
+     */
+    ss.ss_sp = co->stack;
+    ss.ss_size = stack_size;
+    ss.ss_flags = 0;
+    if (sigaltstack(&ss, &oss) < 0) {
+        abort();
+    }
+
+    /*
+     * Now transfer control onto the signal stack and set it up.
+     * It will return immediately via "return" after the sigsetjmp()
+     * was performed. Be careful here with race conditions.  The
+     * signal can be delivered the first time sigsuspend() is
+     * called.
+     */
+    coTS->tr_called = 0;
+    pthread_kill(pthread_self(), SIGUSR2);
+    sigfillset(&sigs);
+    sigdelset(&sigs, SIGUSR2);
+    while (!coTS->tr_called) {
+        sigsuspend(&sigs);
+    }
+
+    /*
+     * Inform the system that we are back off the signal stack by
+     * removing the alternative signal stack. Be careful here: It
+     * first has to be disabled, before it can be removed.
+     */
+    sigaltstack(NULL, &ss);
+    ss.ss_flags = SS_DISABLE;
+    if (sigaltstack(&ss, NULL) < 0) {
+        abort();
+    }
+    sigaltstack(NULL, &ss);
+    if (!(oss.ss_flags & SS_DISABLE)) {
+        sigaltstack(&oss, NULL);
+    }
+
+    /*
+     * Restore the old SIGUSR2 signal handler and mask
+     */
+    sigaction(SIGUSR2, &osa, NULL);
+    pthread_sigmask(SIG_SETMASK, &osigs, NULL);
+
+    /*
+     * Now enter the trampoline again, but this time not as a signal
+     * handler. Instead we jump into it directly. The functionally
+     * redundant ping-pong pointer arithmetic is necessary to avoid
+     * type-conversion warnings related to the `volatile' qualifier and
+     * the fact that `jmp_buf' usually is an array type.
+     */
+    if (!sigsetjmp(old_env, 0)) {
+        siglongjmp(coTS->tr_reenter, 1);
+    }
+
+    /*
+     * Ok, we returned again, so now we're finished
+     */
+
+    return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+
+    g_free(co->stack);
+    g_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                                      CoroutineAction action)
+{
+    CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
+    CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+    CoroutineThreadState *s = coroutine_get_thread_state();
+    int ret;
+
+    s->current = to_;
+
+    ret = sigsetjmp(from->env, 0);
+    if (ret == 0) {
+        siglongjmp(to->env, action);
+    }
+    return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    CoroutineThreadState *s = coroutine_get_thread_state();
+
+    return s->current;
+}
+
+bool qemu_in_coroutine(void)
+{
+    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+    return s && s->current->caller;
+}
+
diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c
new file mode 100644
index 0000000000..26cbebb7a7
--- /dev/null
+++ b/util/coroutine-ucontext.c
@@ -0,0 +1,194 @@
+/*
+ * ucontext coroutine initialization code
+ *
+ * Copyright (C) 2006  Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011  Kevin Wolf <kwolf@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include <stdlib.h>
+#include <setjmp.h>
+#include <stdint.h>
+#include <ucontext.h>
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+#ifdef CONFIG_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
+typedef struct {
+    Coroutine base;
+    void *stack;
+    sigjmp_buf env;
+
+#ifdef CONFIG_VALGRIND_H
+    unsigned int valgrind_stack_id;
+#endif
+
+} CoroutineUContext;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+static __thread CoroutineUContext leader;
+static __thread Coroutine *current;
+
+/*
+ * va_args to makecontext() must be type 'int', so passing
+ * the pointer we need may require several int args. This
+ * union is a quick hack to let us do that
+ */
+union cc_arg {
+    void *p;
+    int i[2];
+};
+
+static void coroutine_trampoline(int i0, int i1)
+{
+    union cc_arg arg;
+    CoroutineUContext *self;
+    Coroutine *co;
+
+    arg.i[0] = i0;
+    arg.i[1] = i1;
+    self = arg.p;
+    co = &self->base;
+
+    /* Initialize longjmp environment and switch back the caller */
+    if (!sigsetjmp(self->env, 0)) {
+        siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
+    }
+
+    while (true) {
+        co->entry(co->entry_arg);
+        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+    }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    const size_t stack_size = 1 << 20;
+    CoroutineUContext *co;
+    ucontext_t old_uc, uc;
+    sigjmp_buf old_env;
+    union cc_arg arg = {0};
+
+    /* The ucontext functions preserve signal masks which incurs a
+     * system call overhead.  sigsetjmp(buf, 0)/siglongjmp() does not
+     * preserve signal masks but only works on the current stack.
+     * Since we need a way to create and switch to a new stack, use
+     * the ucontext functions for that but sigsetjmp()/siglongjmp() for
+     * everything else.
+     */
+
+    if (getcontext(&uc) == -1) {
+        abort();
+    }
+
+    co = g_malloc0(sizeof(*co));
+    co->stack = g_malloc(stack_size);
+    co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+    uc.uc_link = &old_uc;
+    uc.uc_stack.ss_sp = co->stack;
+    uc.uc_stack.ss_size = stack_size;
+    uc.uc_stack.ss_flags = 0;
+
+#ifdef CONFIG_VALGRIND_H
+    co->valgrind_stack_id =
+        VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size);
+#endif
+
+    arg.p = co;
+
+    makecontext(&uc, (void (*)(void))coroutine_trampoline,
+                2, arg.i[0], arg.i[1]);
+
+    /* swapcontext() in, siglongjmp() back out */
+    if (!sigsetjmp(old_env, 0)) {
+        swapcontext(&old_uc, &uc);
+    }
+    return &co->base;
+}
+
+#ifdef CONFIG_VALGRIND_H
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+/* Work around an unused variable in the valgrind.h macro... */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+static inline void valgrind_stack_deregister(CoroutineUContext *co)
+{
+    VALGRIND_STACK_DEREGISTER(co->valgrind_stack_id);
+}
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+#pragma GCC diagnostic pop
+#endif
+#endif
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+
+#ifdef CONFIG_VALGRIND_H
+    valgrind_stack_deregister(co);
+#endif
+
+    g_free(co->stack);
+    g_free(co);
+}
+
+/* This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * the sigsetjmp() call may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                      CoroutineAction action)
+{
+    CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
+    CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+    int ret;
+
+    current = to_;
+
+    ret = sigsetjmp(from->env, 0);
+    if (ret == 0) {
+        siglongjmp(to->env, action);
+    }
+    return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    if (!current) {
+        current = &leader.base;
+    }
+    return current;
+}
+
+bool qemu_in_coroutine(void)
+{
+    return current && current->caller;
+}
diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c
new file mode 100644
index 0000000000..990a3b3b0c
--- /dev/null
+++ b/util/coroutine-win32.c
@@ -0,0 +1,104 @@
+/*
+ * Win32 coroutine initialization code
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+typedef struct
+{
+    Coroutine base;
+
+    LPVOID fiber;
+    CoroutineAction action;
+} CoroutineWin32;
+
+static __thread CoroutineWin32 leader;
+static __thread Coroutine *current;
+
+/* This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * the SwitchToFiber() call may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                      CoroutineAction action)
+{
+    CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_);
+    CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_);
+
+    current = to_;
+
+    to->action = action;
+    SwitchToFiber(to->fiber);
+    return from->action;
+}
+
+static void CALLBACK coroutine_trampoline(void *co_)
+{
+    Coroutine *co = co_;
+
+    while (true) {
+        co->entry(co->entry_arg);
+        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+    }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    const size_t stack_size = 1 << 20;
+    CoroutineWin32 *co;
+
+    co = g_malloc0(sizeof(*co));
+    co->fiber = CreateFiber(stack_size, coroutine_trampoline, &co->base);
+
+    g_assert(co->fiber);
+
+    return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineWin32 *co = DO_UPCAST(CoroutineWin32, base, co_);
+
+    DeleteFiber(co->fiber);
+    g_free(co);
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    if (!current) {
+        current = &leader.base;
+        leader.fiber = ConvertThreadToFiber(NULL);
+    }
+    return current;
+}
+
+bool qemu_in_coroutine(void)
+{
+    return current && current->caller;
+}
diff --git a/util/cutils.c b/util/cutils.c
index 5d1c9ebe05..cfeb848d19 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -145,11 +145,6 @@ time_t mktimegm(struct tm *tm)
     return t;
 }
 
-int qemu_fls(int i)
-{
-    return 32 - clz32(i);
-}
-
 /*
  * Make sure data goes on disk, but if possible do not bother to
  * write out the inode just for timestamp updates.
@@ -281,19 +276,19 @@ int fcntl_setfl(int fd, int flag)
 static int64_t suffix_mul(char suffix, int64_t unit)
 {
     switch (qemu_toupper(suffix)) {
-    case STRTOSZ_DEFSUFFIX_B:
+    case QEMU_STRTOSZ_DEFSUFFIX_B:
         return 1;
-    case STRTOSZ_DEFSUFFIX_KB:
+    case QEMU_STRTOSZ_DEFSUFFIX_KB:
         return unit;
-    case STRTOSZ_DEFSUFFIX_MB:
+    case QEMU_STRTOSZ_DEFSUFFIX_MB:
         return unit * unit;
-    case STRTOSZ_DEFSUFFIX_GB:
+    case QEMU_STRTOSZ_DEFSUFFIX_GB:
         return unit * unit * unit;
-    case STRTOSZ_DEFSUFFIX_TB:
+    case QEMU_STRTOSZ_DEFSUFFIX_TB:
         return unit * unit * unit * unit;
-    case STRTOSZ_DEFSUFFIX_PB:
+    case QEMU_STRTOSZ_DEFSUFFIX_PB:
         return unit * unit * unit * unit * unit;
-    case STRTOSZ_DEFSUFFIX_EB:
+    case QEMU_STRTOSZ_DEFSUFFIX_EB:
         return unit * unit * unit * unit * unit * unit;
     }
     return -1;
@@ -305,7 +300,7 @@ static int64_t suffix_mul(char suffix, int64_t unit)
  * in *end, if not NULL. Return -ERANGE on overflow, Return -EINVAL on
  * other error.
  */
-int64_t strtosz_suffix_unit(const char *nptr, char **end,
+int64_t qemu_strtosz_suffix_unit(const char *nptr, char **end,
                             const char default_suffix, int64_t unit)
 {
     int64_t retval = -EINVAL;
@@ -348,14 +343,165 @@ fail:
     return retval;
 }
 
-int64_t strtosz_suffix(const char *nptr, char **end, const char default_suffix)
+int64_t qemu_strtosz_suffix(const char *nptr, char **end,
+                            const char default_suffix)
 {
-    return strtosz_suffix_unit(nptr, end, default_suffix, 1024);
+    return qemu_strtosz_suffix_unit(nptr, end, default_suffix, 1024);
 }
 
-int64_t strtosz(const char *nptr, char **end)
+int64_t qemu_strtosz(const char *nptr, char **end)
+{
+    return qemu_strtosz_suffix(nptr, end, QEMU_STRTOSZ_DEFSUFFIX_MB);
+}
+
+/**
+ * Helper function for qemu_strto*l() functions.
+ */
+static int check_strtox_error(const char *p, char *endptr, const char **next,
+                              int err)
+{
+    /* If no conversion was performed, prefer BSD behavior over glibc
+     * behavior.
+     */
+    if (err == 0 && endptr == p) {
+        err = EINVAL;
+    }
+    if (!next && *endptr) {
+        return -EINVAL;
+    }
+    if (next) {
+        *next = endptr;
+    }
+    return -err;
+}
+
+/**
+ * QEMU wrappers for strtol(), strtoll(), strtoul(), strotull() C functions.
+ *
+ * Convert ASCII string @nptr to a long integer value
+ * from the given @base. Parameters @nptr, @endptr, @base
+ * follows same semantics as strtol() C function.
+ *
+ * Unlike from strtol() function, if @endptr is not NULL, this
+ * function will return -EINVAL whenever it cannot fully convert
+ * the string in @nptr with given @base to a long. This function returns
+ * the result of the conversion only through the @result parameter.
+ *
+ * If NULL is passed in @endptr, then the whole string in @ntpr
+ * is a number otherwise it returns -EINVAL.
+ *
+ * RETURN VALUE
+ * Unlike from strtol() function, this wrapper returns either
+ * -EINVAL or the errno set by strtol() function (e.g -ERANGE).
+ * If the conversion overflows, -ERANGE is returned, and @result
+ * is set to the max value of the desired type
+ * (e.g. LONG_MAX, LLONG_MAX, ULONG_MAX, ULLONG_MAX). If the case
+ * of underflow, -ERANGE is returned, and @result is set to the min
+ * value of the desired type. For strtol(), strtoll(), @result is set to
+ * LONG_MIN, LLONG_MIN, respectively, and for strtoul(), strtoull() it
+ * is set to 0.
+ */
+int qemu_strtol(const char *nptr, const char **endptr, int base,
+                long *result)
 {
-    return strtosz_suffix(nptr, end, STRTOSZ_DEFSUFFIX_MB);
+    char *p;
+    int err = 0;
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        err = -EINVAL;
+    } else {
+        errno = 0;
+        *result = strtol(nptr, &p, base);
+        err = check_strtox_error(nptr, p, endptr, errno);
+    }
+    return err;
+}
+
+/**
+ * Converts ASCII string to an unsigned long integer.
+ *
+ * If string contains a negative number, value will be converted to
+ * the unsigned representation of the signed value, unless the original
+ * (nonnegated) value would overflow, in this case, it will set @result
+ * to ULONG_MAX, and return ERANGE.
+ *
+ * The same behavior holds, for qemu_strtoull() but sets @result to
+ * ULLONG_MAX instead of ULONG_MAX.
+ *
+ * See qemu_strtol() documentation for more info.
+ */
+int qemu_strtoul(const char *nptr, const char **endptr, int base,
+                 unsigned long *result)
+{
+    char *p;
+    int err = 0;
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        err = -EINVAL;
+    } else {
+        errno = 0;
+        *result = strtoul(nptr, &p, base);
+        /* Windows returns 1 for negative out-of-range values.  */
+        if (errno == ERANGE) {
+            *result = -1;
+        }
+        err = check_strtox_error(nptr, p, endptr, errno);
+    }
+    return err;
+}
+
+/**
+ * Converts ASCII string to a long long integer.
+ *
+ * See qemu_strtol() documentation for more info.
+ */
+int qemu_strtoll(const char *nptr, const char **endptr, int base,
+                 int64_t *result)
+{
+    char *p;
+    int err = 0;
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        err = -EINVAL;
+    } else {
+        errno = 0;
+        *result = strtoll(nptr, &p, base);
+        err = check_strtox_error(nptr, p, endptr, errno);
+    }
+    return err;
+}
+
+/**
+ * Converts ASCII string to an unsigned long long integer.
+ *
+ * See qemu_strtol() documentation for more info.
+ */
+int qemu_strtoull(const char *nptr, const char **endptr, int base,
+                  uint64_t *result)
+{
+    char *p;
+    int err = 0;
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        err = -EINVAL;
+    } else {
+        errno = 0;
+        *result = strtoull(nptr, &p, base);
+        /* Windows returns 1 for negative out-of-range values.  */
+        if (errno == ERANGE) {
+            *result = -1;
+        }
+        err = check_strtox_error(nptr, p, endptr, errno);
+    }
+    return err;
 }
 
 /**
@@ -474,29 +620,6 @@ int qemu_parse_fd(const char *param)
     return fd;
 }
 
-/* round down to the nearest power of 2*/
-int64_t pow2floor(int64_t value)
-{
-    if (!is_power_of_2(value)) {
-        value = 0x8000000000000000ULL >> clz64(value);
-    }
-    return value;
-}
-
-/* round up to the nearest power of 2 (0 if overflow) */
-uint64_t pow2ceil(uint64_t value)
-{
-    uint8_t nlz = clz64(value);
-
-    if (is_power_of_2(value)) {
-        return value;
-    }
-    if (!nlz) {
-        return 0;
-    }
-    return 1ULL << (64 - nlz);
-}
-
 /*
  * Implementation of  ULEB128 (http://en.wikipedia.org/wiki/LEB128)
  * Input is limited to 14-bit numbers
diff --git a/util/error.c b/util/error.c
index 14f4351879..80c89a2079 100644
--- a/util/error.c
+++ b/util/error.c
@@ -2,9 +2,11 @@
  * QEMU Error Objects
  *
  * Copyright IBM, Corp. 2011
+ * Copyright (C) 2011-2015 Red Hat, Inc.
  *
  * Authors:
  *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Markus Armbruster <armbru@redhat.com>,
  *
  * This work is licensed under the terms of the GNU LGPL, version 2.  See
  * the COPYING.LIB file in the top-level directory.
@@ -18,14 +20,33 @@ struct Error
 {
     char *msg;
     ErrorClass err_class;
+    const char *src, *func;
+    int line;
+    GString *hint;
 };
 
 Error *error_abort;
+Error *error_fatal;
 
-void error_set(Error **errp, ErrorClass err_class, const char *fmt, ...)
+static void error_handle_fatal(Error **errp, Error *err)
+{
+    if (errp == &error_abort) {
+        fprintf(stderr, "Unexpected error in %s() at %s:%d:\n",
+                err->func, err->src, err->line);
+        error_report_err(err);
+        abort();
+    }
+    if (errp == &error_fatal) {
+        error_report_err(err);
+        exit(1);
+    }
+}
+
+static void error_setv(Error **errp,
+                       const char *src, int line, const char *func,
+                       ErrorClass err_class, const char *fmt, va_list ap)
 {
     Error *err;
-    va_list ap;
     int saved_errno = errno;
 
     if (errp == NULL) {
@@ -34,99 +55,120 @@ void error_set(Error **errp, ErrorClass err_class, const char *fmt, ...)
     assert(*errp == NULL);
 
     err = g_malloc0(sizeof(*err));
-
-    va_start(ap, fmt);
     err->msg = g_strdup_vprintf(fmt, ap);
-    va_end(ap);
     err->err_class = err_class;
+    err->src = src;
+    err->line = line;
+    err->func = func;
 
-    if (errp == &error_abort) {
-        error_report_err(err);
-        abort();
-    }
-
+    error_handle_fatal(errp, err);
     *errp = err;
 
     errno = saved_errno;
 }
 
-void error_set_errno(Error **errp, int os_errno, ErrorClass err_class,
-                     const char *fmt, ...)
+void error_set_internal(Error **errp,
+                        const char *src, int line, const char *func,
+                        ErrorClass err_class, const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    error_setv(errp, src, line, func, err_class, fmt, ap);
+    va_end(ap);
+}
+
+void error_setg_internal(Error **errp,
+                         const char *src, int line, const char *func,
+                         const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap);
+    va_end(ap);
+}
+
+void error_setg_errno_internal(Error **errp,
+                               const char *src, int line, const char *func,
+                               int os_errno, const char *fmt, ...)
 {
-    Error *err;
-    char *msg1;
     va_list ap;
+    char *msg;
     int saved_errno = errno;
 
     if (errp == NULL) {
         return;
     }
-    assert(*errp == NULL);
-
-    err = g_malloc0(sizeof(*err));
 
     va_start(ap, fmt);
-    msg1 = g_strdup_vprintf(fmt, ap);
-    if (os_errno != 0) {
-        err->msg = g_strdup_printf("%s: %s", msg1, strerror(os_errno));
-        g_free(msg1);
-    } else {
-        err->msg = msg1;
-    }
+    error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap);
     va_end(ap);
-    err->err_class = err_class;
 
-    if (errp == &error_abort) {
-        error_report_err(err);
-        abort();
+    if (os_errno != 0) {
+        msg = (*errp)->msg;
+        (*errp)->msg = g_strdup_printf("%s: %s", msg, strerror(os_errno));
+        g_free(msg);
     }
 
-    *errp = err;
-
     errno = saved_errno;
 }
 
-void error_setg_file_open(Error **errp, int os_errno, const char *filename)
+void error_setg_file_open_internal(Error **errp,
+                                   const char *src, int line, const char *func,
+                                   int os_errno, const char *filename)
 {
-    error_setg_errno(errp, os_errno, "Could not open '%s'", filename);
+    error_setg_errno_internal(errp, src, line, func, os_errno,
+                              "Could not open '%s'", filename);
+}
+
+void error_append_hint(Error **errp, const char *fmt, ...)
+{
+    va_list ap;
+    int saved_errno = errno;
+    Error *err;
+
+    if (!errp) {
+        return;
+    }
+    err = *errp;
+    assert(err && errp != &error_abort);
+
+    if (!err->hint) {
+        err->hint = g_string_new(NULL);
+    }
+    va_start(ap, fmt);
+    g_string_append_vprintf(err->hint, fmt, ap);
+    va_end(ap);
+
+    errno = saved_errno;
 }
 
 #ifdef _WIN32
 
-void error_set_win32(Error **errp, int win32_err, ErrorClass err_class,
-                     const char *fmt, ...)
+void error_setg_win32_internal(Error **errp,
+                               const char *src, int line, const char *func,
+                               int win32_err, const char *fmt, ...)
 {
-    Error *err;
-    char *msg1;
     va_list ap;
+    char *msg1, *msg2;
 
     if (errp == NULL) {
         return;
     }
-    assert(*errp == NULL);
-
-    err = g_malloc0(sizeof(*err));
 
     va_start(ap, fmt);
-    msg1 = g_strdup_vprintf(fmt, ap);
+    error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap);
+    va_end(ap);
+
     if (win32_err != 0) {
-        char *msg2 = g_win32_error_message(win32_err);
-        err->msg = g_strdup_printf("%s: %s (error: %x)", msg1, msg2,
-                                   (unsigned)win32_err);
+        msg1 = (*errp)->msg;
+        msg2 = g_win32_error_message(win32_err);
+        (*errp)->msg = g_strdup_printf("%s: %s (error: %x)", msg1, msg2,
+                                       (unsigned)win32_err);
         g_free(msg2);
         g_free(msg1);
-    } else {
-        err->msg = msg1;
-    }
-    va_end(ap);
-    err->err_class = err_class;
-
-    if (errp == &error_abort) {
-        error_report_err(err);
-        abort();
     }
-
-    *errp = err;
 }
 
 #endif
@@ -138,6 +180,12 @@ Error *error_copy(const Error *err)
     err_new = g_malloc0(sizeof(*err));
     err_new->msg = g_strdup(err->msg);
     err_new->err_class = err->err_class;
+    err_new->src = err->src;
+    err_new->line = err->line;
+    err_new->func = err->func;
+    if (err->hint) {
+        err_new->hint = g_string_new(err->hint->str);
+    }
 
     return err_new;
 }
@@ -155,6 +203,9 @@ const char *error_get_pretty(Error *err)
 void error_report_err(Error *err)
 {
     error_report("%s", error_get_pretty(err));
+    if (err->hint) {
+        error_printf_unless_qmp("%s\n", err->hint->str);
+    }
     error_free(err);
 }
 
@@ -162,18 +213,29 @@ void error_free(Error *err)
 {
     if (err) {
         g_free(err->msg);
+        if (err->hint) {
+            g_string_free(err->hint, true);
+        }
         g_free(err);
     }
 }
 
+void error_free_or_abort(Error **errp)
+{
+    assert(errp && *errp);
+    error_free(*errp);
+    *errp = NULL;
+}
+
 void error_propagate(Error **dst_errp, Error *local_err)
 {
-    if (local_err && dst_errp == &error_abort) {
-        error_report_err(local_err);
-        abort();
-    } else if (dst_errp && !*dst_errp) {
+    if (!local_err) {
+        return;
+    }
+    error_handle_fatal(dst_errp, local_err);
+    if (dst_errp && !*dst_errp) {
         *dst_errp = local_err;
-    } else if (local_err) {
+    } else {
         error_free(local_err);
     }
 }
diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c
index ed4ca2b01e..d4a0c63e12 100644
--- a/util/event_notifier-posix.c
+++ b/util/event_notifier-posix.c
@@ -77,7 +77,7 @@ void event_notifier_cleanup(EventNotifier *e)
     close(e->wfd);
 }
 
-int event_notifier_get_fd(EventNotifier *e)
+int event_notifier_get_fd(const EventNotifier *e)
 {
     return e->rfd;
 }
diff --git a/util/id.c b/util/id.c
index 09b22fb8fa..7883fbec79 100644
--- a/util/id.c
+++ b/util/id.c
@@ -26,3 +26,40 @@ bool id_wellformed(const char *id)
     }
     return true;
 }
+
+#define ID_SPECIAL_CHAR '#'
+
+static const char *const id_subsys_str[ID_MAX] = {
+    [ID_QDEV]  = "qdev",
+    [ID_BLOCK] = "block",
+};
+
+/*
+ *  Generates an ID of the form PREFIX SUBSYSTEM NUMBER
+ *  where:
+ *
+ *  - PREFIX is the reserved character '#'
+ *  - SUBSYSTEM identifies the subsystem creating the ID
+ *  - NUMBER is a decimal number unique within SUBSYSTEM.
+ *
+ *    Example: "#block146"
+ *
+ * Note that these IDs do not satisfy id_wellformed().
+ *
+ * The caller is responsible for freeing the returned string with g_free()
+ */
+char *id_generate(IdSubSystems id)
+{
+    static uint64_t id_counters[ID_MAX];
+    uint32_t rnd;
+
+    assert(id < ARRAY_SIZE(id_subsys_str));
+    assert(id_subsys_str[id]);
+
+    rnd = g_random_int_range(0, 100);
+
+    return g_strdup_printf("%c%s%" PRIu64 "%02" PRId32, ID_SPECIAL_CHAR,
+                                                        id_subsys_str[id],
+                                                        id_counters[id]++,
+                                                        rnd);
+}
diff --git a/util/memfd.c b/util/memfd.c
new file mode 100644
index 0000000000..7c406914c5
--- /dev/null
+++ b/util/memfd.c
@@ -0,0 +1,162 @@
+/*
+ * memfd.c
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * QEMU library functions on POSIX which are shared between QEMU and
+ * the QEMU tools.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+
+#include <glib.h>
+#include <glib/gprintf.h>
+
+#include <sys/mman.h>
+
+#include "qemu/memfd.h"
+
+#ifdef CONFIG_MEMFD
+#include <sys/memfd.h>
+#elif defined CONFIG_LINUX
+#include <sys/syscall.h>
+#include <asm/unistd.h>
+
+static int memfd_create(const char *name, unsigned int flags)
+{
+#ifdef __NR_memfd_create
+    return syscall(__NR_memfd_create, name, flags);
+#else
+    return -1;
+#endif
+}
+#endif
+
+#ifndef MFD_CLOEXEC
+#define MFD_CLOEXEC 0x0001U
+#endif
+
+#ifndef MFD_ALLOW_SEALING
+#define MFD_ALLOW_SEALING 0x0002U
+#endif
+
+/*
+ * This is a best-effort helper for shared memory allocation, with
+ * optional sealing. The helper will do his best to allocate using
+ * memfd with sealing, but may fallback on other methods without
+ * sealing.
+ */
+void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
+                       int *fd)
+{
+    void *ptr;
+    int mfd = -1;
+
+    *fd = -1;
+
+#ifdef CONFIG_LINUX
+    if (seals) {
+        mfd = memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC);
+    }
+
+    if (mfd == -1) {
+        /* some systems have memfd without sealing */
+        mfd = memfd_create(name, MFD_CLOEXEC);
+        seals = 0;
+    }
+#endif
+
+    if (mfd != -1) {
+        if (ftruncate(mfd, size) == -1) {
+            perror("ftruncate");
+            close(mfd);
+            return NULL;
+        }
+
+        if (seals && fcntl(mfd, F_ADD_SEALS, seals) == -1) {
+            perror("fcntl");
+            close(mfd);
+            return NULL;
+        }
+    } else {
+        const char *tmpdir = g_get_tmp_dir();
+        gchar *fname;
+
+        fname = g_strdup_printf("%s/memfd-XXXXXX", tmpdir);
+        mfd = mkstemp(fname);
+        unlink(fname);
+        g_free(fname);
+
+        if (mfd == -1) {
+            perror("mkstemp");
+            return NULL;
+        }
+
+        if (ftruncate(mfd, size) == -1) {
+            perror("ftruncate");
+            close(mfd);
+            return NULL;
+        }
+    }
+
+    ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0);
+    if (ptr == MAP_FAILED) {
+        perror("mmap");
+        close(mfd);
+        return NULL;
+    }
+
+    *fd = mfd;
+    return ptr;
+}
+
+void qemu_memfd_free(void *ptr, size_t size, int fd)
+{
+    if (ptr) {
+        munmap(ptr, size);
+    }
+
+    if (fd != -1) {
+        close(fd);
+    }
+}
+
+enum {
+    MEMFD_KO,
+    MEMFD_OK,
+    MEMFD_TODO
+};
+
+bool qemu_memfd_check(void)
+{
+    static int memfd_check = MEMFD_TODO;
+
+    if (memfd_check == MEMFD_TODO) {
+        int fd;
+        void *ptr;
+
+        ptr = qemu_memfd_alloc("test", 4096, 0, &fd);
+        memfd_check = ptr ? MEMFD_OK : MEMFD_KO;
+        qemu_memfd_free(ptr, 4096, fd);
+    }
+
+    return memfd_check == MEMFD_OK;
+}
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
new file mode 100644
index 0000000000..54793a5dcf
--- /dev/null
+++ b/util/mmap-alloc.c
@@ -0,0 +1,110 @@
+/*
+ * Support for RAM backed by mmaped host memory.
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+#include <qemu/mmap-alloc.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <assert.h>
+
+#define HUGETLBFS_MAGIC       0x958458f6
+
+#ifdef CONFIG_LINUX
+#include <sys/vfs.h>
+#endif
+
+size_t qemu_fd_getpagesize(int fd)
+{
+#ifdef CONFIG_LINUX
+    struct statfs fs;
+    int ret;
+
+    if (fd != -1) {
+        do {
+            ret = fstatfs(fd, &fs);
+        } while (ret != 0 && errno == EINTR);
+
+        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
+            return fs.f_bsize;
+        }
+    }
+#endif
+
+    return getpagesize();
+}
+
+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
+{
+    /*
+     * Note: this always allocates at least one extra page of virtual address
+     * space, even if size is already aligned.
+     */
+    size_t total = size + align;
+#if defined(__powerpc64__) && defined(__linux__)
+    /* On ppc64 mappings in the same segment (aka slice) must share the same
+     * page size. Since we will be re-allocating part of this segment
+     * from the supplied fd, we should make sure to use the same page size,
+     * unless we are using the system page size, in which case anonymous memory
+     * is OK. Use align as a hint for the page size.
+     * In this case, set MAP_NORESERVE to avoid allocating backing store memory.
+     */
+    int anonfd = fd == -1 || qemu_fd_getpagesize(fd) == getpagesize() ? -1 : fd;
+    int flags = anonfd == -1 ? MAP_ANONYMOUS : MAP_NORESERVE;
+    void *ptr = mmap(0, total, PROT_NONE, flags | MAP_PRIVATE, anonfd, 0);
+#else
+    void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+#endif
+    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
+    void *ptr1;
+
+    if (ptr == MAP_FAILED) {
+        return MAP_FAILED;
+    }
+
+    /* Make sure align is a power of 2 */
+    assert(!(align & (align - 1)));
+    /* Always align to host page size */
+    assert(align >= getpagesize());
+
+    ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
+                MAP_FIXED |
+                (fd == -1 ? MAP_ANONYMOUS : 0) |
+                (shared ? MAP_SHARED : MAP_PRIVATE),
+                fd, 0);
+    if (ptr1 == MAP_FAILED) {
+        munmap(ptr, total);
+        return MAP_FAILED;
+    }
+
+    ptr += offset;
+    total -= offset;
+
+    if (offset > 0) {
+        munmap(ptr - offset, offset);
+    }
+
+    /*
+     * Leave a single PROT_NONE page allocated after the RAM block, to serve as
+     * a guard page guarding against potential buffer overflows.
+     */
+    if (total > size + getpagesize()) {
+        munmap(ptr + size + getpagesize(), total - size - getpagesize());
+    }
+
+    return ptr;
+}
+
+void qemu_ram_munmap(void *ptr, size_t size)
+{
+    if (ptr) {
+        /* Unmap both the RAM block and the guard page */
+        munmap(ptr, size + getpagesize());
+    }
+}
diff --git a/util/osdep.c b/util/osdep.c
index 0092bb61b9..534b51147c 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -52,7 +52,14 @@ extern int madvise(caddr_t, size_t, int);
 
 static bool fips_enabled = false;
 
-static const char *qemu_version = QEMU_VERSION;
+/* Starting on QEMU 2.5, qemu_hw_version() returns "2.5+" by default
+ * instead of QEMU_VERSION, so setting hw_version on MachineClass
+ * is no longer mandatory.
+ *
+ * Do NOT change this string, or it will break compatibility on all
+ * machine classes that don't set hw_version.
+ */
+static const char *hw_version = "2.5+";
 
 int socket_set_cork(int fd, int v)
 {
@@ -311,14 +318,14 @@ int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
     return ret;
 }
 
-void qemu_set_version(const char *version)
+void qemu_set_hw_version(const char *version)
 {
-    qemu_version = version;
+    hw_version = version;
 }
 
-const char *qemu_get_version(void)
+const char *qemu_hw_version(void)
 {
-    return qemu_version;
+    return hw_version;
 }
 
 void fips_set_state(bool requested)
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 997f24d21b..6f18866cad 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -46,7 +46,6 @@ extern int daemon(int, int);
 #else
 #  define QEMU_VMALLOC_ALIGN getpagesize()
 #endif
-#define HUGETLBFS_MAGIC       0x958458f6
 
 #include <termios.h>
 #include <unistd.h>
@@ -66,13 +65,14 @@ extern int daemon(int, int);
 
 #ifdef CONFIG_LINUX
 #include <sys/syscall.h>
-#include <sys/vfs.h>
 #endif
 
 #ifdef __FreeBSD__
 #include <sys/sysctl.h>
 #endif
 
+#include <qemu/mmap-alloc.h>
+
 #ifdef CONFIG_MARU
 #include "../../tizen/src/emulator_common.h"
 #endif
@@ -148,10 +148,7 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
     }
 #endif
     size_t align = QEMU_VMALLOC_ALIGN;
-    size_t total = size + align - getpagesize();
-    void *ptr = mmap(0, total, PROT_READ | PROT_WRITE,
-                     MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
+    void *ptr = qemu_ram_mmap(-1, size, align, false);
 
     if (ptr == MAP_FAILED) {
         return NULL;
@@ -160,15 +157,6 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
     if (alignment) {
         *alignment = align;
     }
-    ptr += offset;
-    total -= offset;
-
-    if (offset > 0) {
-        munmap(ptr - offset, offset);
-    }
-    if (total > size) {
-        munmap(ptr + size, total - size);
-    }
 
     trace_qemu_anon_ram_alloc(size, ptr);
     return ptr;
@@ -183,9 +171,7 @@ void qemu_vfree(void *ptr)
 void qemu_anon_ram_free(void *ptr, size_t size)
 {
     trace_qemu_anon_ram_free(ptr, size);
-    if (ptr) {
-        munmap(ptr, size);
-    }
+    qemu_ram_munmap(ptr, size);
 }
 
 void qemu_set_block(int fd)
@@ -372,26 +358,6 @@ static void sigbus_handler(int signal)
     siglongjmp(sigjump, 1);
 }
 
-static size_t fd_getpagesize(int fd)
-{
-#ifdef CONFIG_LINUX
-    struct statfs fs;
-    int ret;
-
-    if (fd != -1) {
-        do {
-            ret = fstatfs(fd, &fs);
-        } while (ret != 0 && errno == EINTR);
-
-        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
-            return fs.f_bsize;
-        }
-    }
-#endif
-
-    return getpagesize();
-}
-
 void os_mem_prealloc(int fd, char *area, size_t memory)
 {
     int ret;
@@ -419,7 +385,7 @@ void os_mem_prealloc(int fd, char *area, size_t memory)
         exit(1);
     } else {
         int i;
-        size_t hpagesize = fd_getpagesize(fd);
+        size_t hpagesize = qemu_fd_getpagesize(fd);
         size_t numpages = DIV_ROUND_UP(memory, hpagesize);
 
         /* MAP_POPULATE silently ignores failures */
@@ -502,3 +468,74 @@ int qemu_read_password(char *buf, int buf_size)
     printf("\n");
     return ret;
 }
+
+
+pid_t qemu_fork(Error **errp)
+{
+    sigset_t oldmask, newmask;
+    struct sigaction sig_action;
+    int saved_errno;
+    pid_t pid;
+
+    /*
+     * Need to block signals now, so that child process can safely
+     * kill off caller's signal handlers without a race.
+     */
+    sigfillset(&newmask);
+    if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) {
+        error_setg_errno(errp, errno,
+                         "cannot block signals");
+        return -1;
+    }
+
+    pid = fork();
+    saved_errno = errno;
+
+    if (pid < 0) {
+        /* attempt to restore signal mask, but ignore failure, to
+         * avoid obscuring the fork failure */
+        (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
+        error_setg_errno(errp, saved_errno,
+                         "cannot fork child process");
+        errno = saved_errno;
+        return -1;
+    } else if (pid) {
+        /* parent process */
+
+        /* Restore our original signal mask now that the child is
+         * safely running. Only documented failures are EFAULT (not
+         * possible, since we are using just-grabbed mask) or EINVAL
+         * (not possible, since we are using correct arguments).  */
+        (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
+    } else {
+        /* child process */
+        size_t i;
+
+        /* Clear out all signal handlers from parent so nothing
+         * unexpected can happen in our child once we unblock
+         * signals */
+        sig_action.sa_handler = SIG_DFL;
+        sig_action.sa_flags = 0;
+        sigemptyset(&sig_action.sa_mask);
+
+        for (i = 1; i < NSIG; i++) {
+            /* Only possible errors are EFAULT or EINVAL The former
+             * won't happen, the latter we expect, so no need to check
+             * return value */
+            (void)sigaction(i, &sig_action, NULL);
+        }
+
+        /* Unmask all signals in child, since we've no idea what the
+         * caller's done with their signal mask and don't want to
+         * propagate that to children */
+        sigemptyset(&newmask);
+        if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) {
+            Error *local_err = NULL;
+            error_setg_errno(&local_err, errno,
+                             "cannot unblock signals");
+            error_report_err(local_err);
+            _exit(1);
+        }
+    }
+    return pid;
+}
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 217f28a201..11f528a4df 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -110,9 +110,7 @@ void qemu_anon_ram_free(void *ptr, size_t size)
     }
 }
 
-// CONFIG_MARU MODIFICATION
-// recent MinGW-w64 provides gmtime_r(), localtime_r(). we use them.
-#if 0
+#ifndef CONFIG_LOCALTIME_R
 /* FIXME: add proper locking */
 struct tm *gmtime_r(const time_t *timep, struct tm *result)
 {
@@ -136,7 +134,7 @@ struct tm *localtime_r(const time_t *timep, struct tm *result)
     }
     return p;
 }
-#endif
+#endif /* CONFIG_LOCALTIME_R */
 
 void qemu_set_block(int fd)
 {
@@ -471,7 +469,7 @@ gint g_poll(GPollFD *fds, guint nfds, gint timeout)
     return retval;
 }
 
-size_t getpagesize(void)
+int getpagesize(void)
 {
     SYSTEM_INFO system_info;
 
@@ -513,3 +511,12 @@ int qemu_read_password(char *buf, int buf_size)
     buf[i] = '\0';
     return 0;
 }
+
+
+pid_t qemu_fork(Error **errp)
+{
+    errno = ENOSYS;
+    error_setg_errno(errp, errno,
+                     "cannot fork child process");
+    return -1;
+}
diff --git a/util/qemu-coroutine-io.c b/util/qemu-coroutine-io.c
new file mode 100644
index 0000000000..e1eae7331e
--- /dev/null
+++ b/util/qemu-coroutine-io.c
@@ -0,0 +1,91 @@
+/*
+ * Coroutine-aware I/O functions
+ *
+ * Copyright (C) 2009-2010 Nippon Telegraph and Telephone Corporation.
+ * Copyright (c) 2011, Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "qemu/sockets.h"
+#include "qemu/coroutine.h"
+#include "qemu/iov.h"
+#include "qemu/main-loop.h"
+
+ssize_t coroutine_fn
+qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt,
+                    size_t offset, size_t bytes, bool do_send)
+{
+    size_t done = 0;
+    ssize_t ret;
+    int err;
+    while (done < bytes) {
+        ret = iov_send_recv(sockfd, iov, iov_cnt,
+                            offset + done, bytes - done, do_send);
+        if (ret > 0) {
+            done += ret;
+        } else if (ret < 0) {
+            err = socket_error();
+            if (err == EAGAIN || err == EWOULDBLOCK) {
+                qemu_coroutine_yield();
+            } else if (done == 0) {
+                return -err;
+            } else {
+                break;
+            }
+        } else if (ret == 0 && !do_send) {
+            /* write (send) should never return 0.
+             * read (recv) returns 0 for end-of-file (-data).
+             * In both cases there's little point retrying,
+             * but we do for write anyway, just in case */
+            break;
+        }
+    }
+    return done;
+}
+
+ssize_t coroutine_fn
+qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send)
+{
+    struct iovec iov = { .iov_base = buf, .iov_len = bytes };
+    return qemu_co_sendv_recvv(sockfd, &iov, 1, 0, bytes, do_send);
+}
+
+typedef struct {
+    Coroutine *co;
+    int fd;
+} FDYieldUntilData;
+
+static void fd_coroutine_enter(void *opaque)
+{
+    FDYieldUntilData *data = opaque;
+    qemu_set_fd_handler(data->fd, NULL, NULL, NULL);
+    qemu_coroutine_enter(data->co, NULL);
+}
+
+void coroutine_fn yield_until_fd_readable(int fd)
+{
+    FDYieldUntilData data;
+
+    assert(qemu_in_coroutine());
+    data.co = qemu_coroutine_self();
+    data.fd = fd;
+    qemu_set_fd_handler(fd, fd_coroutine_enter, NULL, &data);
+    qemu_coroutine_yield();
+}
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
new file mode 100644
index 0000000000..130ee19d17
--- /dev/null
+++ b/util/qemu-coroutine-lock.c
@@ -0,0 +1,186 @@
+/*
+ * coroutine queues and locks
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu/coroutine.h"
+#include "qemu/coroutine_int.h"
+#include "qemu/queue.h"
+#include "trace.h"
+
+void qemu_co_queue_init(CoQueue *queue)
+{
+    QTAILQ_INIT(&queue->entries);
+}
+
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
+{
+    Coroutine *self = qemu_coroutine_self();
+    QTAILQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
+    qemu_coroutine_yield();
+    assert(qemu_in_coroutine());
+}
+
+/**
+ * qemu_co_queue_run_restart:
+ *
+ * Enter each coroutine that was previously marked for restart by
+ * qemu_co_queue_next() or qemu_co_queue_restart_all().  This function is
+ * invoked by the core coroutine code when the current coroutine yields or
+ * terminates.
+ */
+void qemu_co_queue_run_restart(Coroutine *co)
+{
+    Coroutine *next;
+
+    trace_qemu_co_queue_run_restart(co);
+    while ((next = QTAILQ_FIRST(&co->co_queue_wakeup))) {
+        QTAILQ_REMOVE(&co->co_queue_wakeup, next, co_queue_next);
+        qemu_coroutine_enter(next, NULL);
+    }
+}
+
+static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
+{
+    Coroutine *self = qemu_coroutine_self();
+    Coroutine *next;
+
+    if (QTAILQ_EMPTY(&queue->entries)) {
+        return false;
+    }
+
+    while ((next = QTAILQ_FIRST(&queue->entries)) != NULL) {
+        QTAILQ_REMOVE(&queue->entries, next, co_queue_next);
+        QTAILQ_INSERT_TAIL(&self->co_queue_wakeup, next, co_queue_next);
+        trace_qemu_co_queue_next(next);
+        if (single) {
+            break;
+        }
+    }
+    return true;
+}
+
+bool coroutine_fn qemu_co_queue_next(CoQueue *queue)
+{
+    assert(qemu_in_coroutine());
+    return qemu_co_queue_do_restart(queue, true);
+}
+
+void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue)
+{
+    assert(qemu_in_coroutine());
+    qemu_co_queue_do_restart(queue, false);
+}
+
+bool qemu_co_enter_next(CoQueue *queue)
+{
+    Coroutine *next;
+
+    next = QTAILQ_FIRST(&queue->entries);
+    if (!next) {
+        return false;
+    }
+
+    QTAILQ_REMOVE(&queue->entries, next, co_queue_next);
+    qemu_coroutine_enter(next, NULL);
+    return true;
+}
+
+bool qemu_co_queue_empty(CoQueue *queue)
+{
+    return QTAILQ_FIRST(&queue->entries) == NULL;
+}
+
+void qemu_co_mutex_init(CoMutex *mutex)
+{
+    memset(mutex, 0, sizeof(*mutex));
+    qemu_co_queue_init(&mutex->queue);
+}
+
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    trace_qemu_co_mutex_lock_entry(mutex, self);
+
+    while (mutex->locked) {
+        qemu_co_queue_wait(&mutex->queue);
+    }
+
+    mutex->locked = true;
+
+    trace_qemu_co_mutex_lock_return(mutex, self);
+}
+
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    trace_qemu_co_mutex_unlock_entry(mutex, self);
+
+    assert(mutex->locked == true);
+    assert(qemu_in_coroutine());
+
+    mutex->locked = false;
+    qemu_co_queue_next(&mutex->queue);
+
+    trace_qemu_co_mutex_unlock_return(mutex, self);
+}
+
+void qemu_co_rwlock_init(CoRwlock *lock)
+{
+    memset(lock, 0, sizeof(*lock));
+    qemu_co_queue_init(&lock->queue);
+}
+
+void qemu_co_rwlock_rdlock(CoRwlock *lock)
+{
+    while (lock->writer) {
+        qemu_co_queue_wait(&lock->queue);
+    }
+    lock->reader++;
+}
+
+void qemu_co_rwlock_unlock(CoRwlock *lock)
+{
+    assert(qemu_in_coroutine());
+    if (lock->writer) {
+        lock->writer = false;
+        qemu_co_queue_restart_all(&lock->queue);
+    } else {
+        lock->reader--;
+        assert(lock->reader >= 0);
+        /* Wakeup only one waiting writer */
+        if (!lock->reader) {
+            qemu_co_queue_next(&lock->queue);
+        }
+    }
+}
+
+void qemu_co_rwlock_wrlock(CoRwlock *lock)
+{
+    while (lock->writer || lock->reader) {
+        qemu_co_queue_wait(&lock->queue);
+    }
+    lock->writer = true;
+}
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
new file mode 100644
index 0000000000..b35db56356
--- /dev/null
+++ b/util/qemu-coroutine-sleep.c
@@ -0,0 +1,41 @@
+/*
+ * QEMU coroutine sleep
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Stefan Hajnoczi    <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/coroutine.h"
+#include "qemu/timer.h"
+#include "block/aio.h"
+
+typedef struct CoSleepCB {
+    QEMUTimer *ts;
+    Coroutine *co;
+} CoSleepCB;
+
+static void co_sleep_cb(void *opaque)
+{
+    CoSleepCB *sleep_cb = opaque;
+
+    qemu_coroutine_enter(sleep_cb->co, NULL);
+}
+
+void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
+                                  int64_t ns)
+{
+    CoSleepCB sleep_cb = {
+        .co = qemu_coroutine_self(),
+    };
+    sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb);
+    timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns);
+    qemu_coroutine_yield();
+    timer_del(sleep_cb.ts);
+    timer_free(sleep_cb.ts);
+}
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
new file mode 100644
index 0000000000..9a04f28865
--- /dev/null
+++ b/util/qemu-coroutine.c
@@ -0,0 +1,150 @@
+/*
+ * QEMU coroutines
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Stefan Hajnoczi    <stefanha@linux.vnet.ibm.com>
+ *  Kevin Wolf         <kwolf@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "qemu-common.h"
+#include "qemu/thread.h"
+#include "qemu/atomic.h"
+#include "qemu/coroutine.h"
+#include "qemu/coroutine_int.h"
+
+enum {
+#if defined(_WIN32) && !defined(_WIN64)
+    POOL_BATCH_SIZE = 32,
+#else
+    POOL_BATCH_SIZE = 64,
+#endif
+};
+
+/** Free list to speed up creation */
+static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
+static unsigned int release_pool_size;
+static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
+static __thread unsigned int alloc_pool_size;
+static __thread Notifier coroutine_pool_cleanup_notifier;
+
+static void coroutine_pool_cleanup(Notifier *n, void *value)
+{
+    Coroutine *co;
+    Coroutine *tmp;
+
+    QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) {
+        QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+        qemu_coroutine_delete(co);
+    }
+}
+
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
+{
+    Coroutine *co = NULL;
+
+    if (CONFIG_COROUTINE_POOL) {
+        co = QSLIST_FIRST(&alloc_pool);
+        if (!co) {
+            if (release_pool_size > POOL_BATCH_SIZE) {
+                /* Slow path; a good place to register the destructor, too.  */
+                if (!coroutine_pool_cleanup_notifier.notify) {
+                    coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
+                    qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
+                }
+
+                /* This is not exact; there could be a little skew between
+                 * release_pool_size and the actual size of release_pool.  But
+                 * it is just a heuristic, it does not need to be perfect.
+                 */
+                alloc_pool_size = atomic_xchg(&release_pool_size, 0);
+                QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool);
+                co = QSLIST_FIRST(&alloc_pool);
+            }
+        }
+        if (co) {
+            QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+            alloc_pool_size--;
+        }
+    }
+
+    if (!co) {
+        co = qemu_coroutine_new();
+    }
+
+    co->entry = entry;
+    QTAILQ_INIT(&co->co_queue_wakeup);
+    return co;
+}
+
+static void coroutine_delete(Coroutine *co)
+{
+    co->caller = NULL;
+
+    if (CONFIG_COROUTINE_POOL) {
+        if (release_pool_size < POOL_BATCH_SIZE * 2) {
+            QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
+            atomic_inc(&release_pool_size);
+            return;
+        }
+        if (alloc_pool_size < POOL_BATCH_SIZE) {
+            QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
+            alloc_pool_size++;
+            return;
+        }
+    }
+
+    qemu_coroutine_delete(co);
+}
+
+void qemu_coroutine_enter(Coroutine *co, void *opaque)
+{
+    Coroutine *self = qemu_coroutine_self();
+    CoroutineAction ret;
+
+    trace_qemu_coroutine_enter(self, co, opaque);
+
+    if (co->caller) {
+        fprintf(stderr, "Co-routine re-entered recursively\n");
+        abort();
+    }
+
+    co->caller = self;
+    co->entry_arg = opaque;
+    ret = qemu_coroutine_switch(self, co, COROUTINE_ENTER);
+
+    qemu_co_queue_run_restart(co);
+
+    switch (ret) {
+    case COROUTINE_YIELD:
+        return;
+    case COROUTINE_TERMINATE:
+        trace_qemu_coroutine_terminate(co);
+        coroutine_delete(co);
+        return;
+    default:
+        abort();
+    }
+}
+
+void coroutine_fn qemu_coroutine_yield(void)
+{
+    Coroutine *self = qemu_coroutine_self();
+    Coroutine *to = self->caller;
+
+    trace_qemu_coroutine_yield(self, to);
+
+    if (!to) {
+        fprintf(stderr, "Co-routine is yielding to no one\n");
+        abort();
+    }
+
+    self->caller = NULL;
+    qemu_coroutine_switch(self, to, COROUTINE_YIELD);
+}
diff --git a/util/qemu-error.c b/util/qemu-error.c
index 050d5804c8..d9cc6337c0 100644
--- a/util/qemu-error.c
+++ b/util/qemu-error.c
@@ -238,7 +238,7 @@ void error_vreport(const char *fmt, va_list ap)
     GTimeVal tv;
     gchar *timestr;
 
-    if (enable_timestamp_msg) {
+    if (enable_timestamp_msg && !cur_mon) {
         g_get_current_time(&tv);
         timestr = g_time_val_to_iso8601(&tv);
         error_printf("%s ", timestr);
diff --git a/util/qemu-option.c b/util/qemu-option.c
index efe9d279c4..a50eceae4a 100644
--- a/util/qemu-option.c
+++ b/util/qemu-option.c
@@ -180,6 +180,11 @@ void parse_option_size(const char *name, const char *value,
 
     if (value != NULL) {
         sizef = strtod(value, &postfix);
+        if (sizef < 0 || sizef > UINT64_MAX) {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name,
+                             "a non-negative number below 2^64");
+            return;
+        }
         switch (*postfix) {
         case 'T':
             sizef *= 1024;
@@ -200,10 +205,8 @@ void parse_option_size(const char *name, const char *value,
             break;
         default:
             error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a size");
-#if 0 /* conversion from qerror_report() to error_set() broke this: */
-            error_printf_unless_qmp("You may use k, M, G or T suffixes for "
-                    "kilobytes, megabytes, gigabytes and terabytes.\n");
-#endif
+            error_append_hint(errp, "You may use k, M, G or T suffixes for "
+                    "kilobytes, megabytes, gigabytes and terabytes.");
             return;
         }
     } else {
@@ -643,9 +646,8 @@ QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
         if (!id_wellformed(id)) {
             error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "id",
                        "an identifier");
-#if 0 /* conversion from qerror_report() to error_set() broke this: */
-            error_printf_unless_qmp("Identifiers consist of letters, digits, '-', '.', '_', starting with a letter.\n");
-#endif
+            error_append_hint(errp, "Identifiers consist of letters, digits, "
+                              "'-', '.', '_', starting with a letter.");
             return NULL;
         }
         opts = qemu_opts_find(list, id);
@@ -730,14 +732,35 @@ void qemu_opts_del(QemuOpts *opts)
     g_free(opts);
 }
 
-void qemu_opts_print(QemuOpts *opts, const char *sep)
+/* print value, escaping any commas in value */
+static void escaped_print(const char *value)
+{
+    const char *ptr;
+
+    for (ptr = value; *ptr; ++ptr) {
+        if (*ptr == ',') {
+            putchar(',');
+        }
+        putchar(*ptr);
+    }
+}
+
+void qemu_opts_print(QemuOpts *opts, const char *separator)
 {
     QemuOpt *opt;
     QemuOptDesc *desc = opts->list->desc;
+    const char *sep = "";
+
+    if (opts->id) {
+        printf("id=%s", opts->id); /* passed id_wellformed -> no commas */
+        sep = separator;
+    }
 
     if (desc[0].name == NULL) {
         QTAILQ_FOREACH(opt, &opts->head, next) {
-            printf("%s%s=\"%s\"", sep, opt->name, opt->str);
+            printf("%s%s=", sep, opt->name);
+            escaped_print(opt->str);
+            sep = separator;
         }
         return;
     }
@@ -750,13 +773,15 @@ void qemu_opts_print(QemuOpts *opts, const char *sep)
             continue;
         }
         if (desc->type == QEMU_OPT_STRING) {
-            printf("%s%s='%s'", sep, desc->name, value);
+            printf("%s%s=", sep, desc->name);
+            escaped_print(value);
         } else if ((desc->type == QEMU_OPT_SIZE ||
                     desc->type == QEMU_OPT_NUMBER) && opt) {
             printf("%s%s=%" PRId64, sep, desc->name, opt->value.uint);
         } else {
             printf("%s%s=%s", sep, desc->name, value);
         }
+        sep = separator;
     }
 }
 
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index a3d0be6d28..a96a8d7960 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -25,6 +25,9 @@
 #include "monitor/monitor.h"
 #include "qemu/sockets.h"
 #include "qemu/main-loop.h"
+#include "qapi/qmp-input-visitor.h"
+#include "qapi/qmp-output-visitor.h"
+#include "qapi-visit.h"
 
 #ifndef AI_ADDRCONFIG
 # define AI_ADDRCONFIG 0
@@ -125,12 +128,15 @@ int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp)
     ai.ai_family = PF_UNSPEC;
     ai.ai_socktype = SOCK_STREAM;
 
-    if ((qemu_opt_get(opts, "host") == NULL) ||
-        (qemu_opt_get(opts, "port") == NULL)) {
-        error_setg(errp, "host and/or port not specified");
+    if ((qemu_opt_get(opts, "host") == NULL)) {
+        error_setg(errp, "host not specified");
         return -1;
     }
-    pstrcpy(port, sizeof(port), qemu_opt_get(opts, "port"));
+    if (qemu_opt_get(opts, "port") != NULL) {
+        pstrcpy(port, sizeof(port), qemu_opt_get(opts, "port"));
+    } else {
+        port[0] = '\0';
+    }
     addr = qemu_opt_get(opts, "host");
 
     to = qemu_opt_get_number(opts, "to", 0);
@@ -142,6 +148,10 @@ int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp)
     /* lookup */
     if (port_offset) {
         unsigned long long baseport;
+        if (strlen(port) == 0) {
+            error_setg(errp, "port not specified");
+            return -1;
+        }
         if (parse_uint_full(port, &baseport, 10) < 0) {
             error_setg(errp, "can't convert to a number: %s", port);
             return -1;
@@ -153,7 +163,8 @@ int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp)
         }
         snprintf(port, sizeof(port), "%d", (int)baseport + port_offset);
     }
-    rc = getaddrinfo(strlen(addr) ? addr : NULL, port, &ai, &res);
+    rc = getaddrinfo(strlen(addr) ? addr : NULL,
+                     strlen(port) ? port : NULL, &ai, &res);
     if (rc != 0) {
         error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
                    gai_strerror(rc));
@@ -594,12 +605,15 @@ fail:
 
 static void inet_addr_to_opts(QemuOpts *opts, const InetSocketAddress *addr)
 {
-    bool ipv4 = addr->ipv4 || !addr->has_ipv4;
-    bool ipv6 = addr->ipv6 || !addr->has_ipv6;
+    bool ipv4 = addr->has_ipv4 && addr->ipv4;
+    bool ipv6 = addr->has_ipv6 && addr->ipv6;
 
-    if (!ipv4 || !ipv6) {
+    if (ipv4 || ipv6) {
         qemu_opt_set_bool(opts, "ipv4", ipv4, &error_abort);
         qemu_opt_set_bool(opts, "ipv6", ipv6, &error_abort);
+    } else if (addr->has_ipv4 || addr->has_ipv6) {
+        qemu_opt_set_bool(opts, "ipv4", !addr->has_ipv4, &error_abort);
+        qemu_opt_set_bool(opts, "ipv6", !addr->has_ipv6, &error_abort);
     }
     if (addr->has_to) {
         qemu_opt_set_number(opts, "to", addr->to, &error_abort);
@@ -745,8 +759,7 @@ int unix_listen_opts(QemuOpts *opts, Error **errp)
         qemu_opt_set(opts, "path", un.sun_path, &error_abort);
     }
 
-    if ((access(un.sun_path, F_OK) == 0) &&
-        unlink(un.sun_path) < 0) {
+    if (unlink(un.sun_path) < 0 && errno != ENOENT) {
         error_setg_errno(errp, errno,
                          "Failed to unlink socket %s", un.sun_path);
         goto err;
@@ -912,23 +925,23 @@ SocketAddress *socket_parse(const char *str, Error **errp)
             error_setg(errp, "invalid Unix socket address");
             goto fail;
         } else {
-            addr->kind = SOCKET_ADDRESS_KIND_UNIX;
-            addr->q_unix = g_new(UnixSocketAddress, 1);
-            addr->q_unix->path = g_strdup(str + 5);
+            addr->type = SOCKET_ADDRESS_KIND_UNIX;
+            addr->u.q_unix = g_new(UnixSocketAddress, 1);
+            addr->u.q_unix->path = g_strdup(str + 5);
         }
     } else if (strstart(str, "fd:", NULL)) {
         if (str[3] == '\0') {
             error_setg(errp, "invalid file descriptor address");
             goto fail;
         } else {
-            addr->kind = SOCKET_ADDRESS_KIND_FD;
-            addr->fd = g_new(String, 1);
-            addr->fd->str = g_strdup(str + 3);
+            addr->type = SOCKET_ADDRESS_KIND_FD;
+            addr->u.fd = g_new(String, 1);
+            addr->u.fd->str = g_strdup(str + 3);
         }
     } else {
-        addr->kind = SOCKET_ADDRESS_KIND_INET;
-        addr->inet = inet_parse(str, errp);
-        if (addr->inet == NULL) {
+        addr->type = SOCKET_ADDRESS_KIND_INET;
+        addr->u.inet = inet_parse(str, errp);
+        if (addr->u.inet == NULL) {
             goto fail;
         }
     }
@@ -946,19 +959,19 @@ int socket_connect(SocketAddress *addr, Error **errp,
     int fd;
 
     opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
-    switch (addr->kind) {
+    switch (addr->type) {
     case SOCKET_ADDRESS_KIND_INET:
-        inet_addr_to_opts(opts, addr->inet);
+        inet_addr_to_opts(opts, addr->u.inet);
         fd = inet_connect_opts(opts, errp, callback, opaque);
         break;
 
     case SOCKET_ADDRESS_KIND_UNIX:
-        qemu_opt_set(opts, "path", addr->q_unix->path, &error_abort);
+        qemu_opt_set(opts, "path", addr->u.q_unix->path, &error_abort);
         fd = unix_connect_opts(opts, errp, callback, opaque);
         break;
 
     case SOCKET_ADDRESS_KIND_FD:
-        fd = monitor_get_fd(cur_mon, addr->fd->str, errp);
+        fd = monitor_get_fd(cur_mon, addr->u.fd->str, errp);
         if (fd >= 0 && callback) {
             qemu_set_nonblock(fd);
             callback(fd, NULL, opaque);
@@ -978,19 +991,19 @@ int socket_listen(SocketAddress *addr, Error **errp)
     int fd;
 
     opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
-    switch (addr->kind) {
+    switch (addr->type) {
     case SOCKET_ADDRESS_KIND_INET:
-        inet_addr_to_opts(opts, addr->inet);
+        inet_addr_to_opts(opts, addr->u.inet);
         fd = inet_listen_opts(opts, 0, errp);
         break;
 
     case SOCKET_ADDRESS_KIND_UNIX:
-        qemu_opt_set(opts, "path", addr->q_unix->path, &error_abort);
+        qemu_opt_set(opts, "path", addr->u.q_unix->path, &error_abort);
         fd = unix_listen_opts(opts, errp);
         break;
 
     case SOCKET_ADDRESS_KIND_FD:
-        fd = monitor_get_fd(cur_mon, addr->fd->str, errp);
+        fd = monitor_get_fd(cur_mon, addr->u.fd->str, errp);
         break;
 
     default:
@@ -1006,12 +1019,12 @@ int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp)
     int fd;
 
     opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
-    switch (remote->kind) {
+    switch (remote->type) {
     case SOCKET_ADDRESS_KIND_INET:
-        inet_addr_to_opts(opts, remote->inet);
+        inet_addr_to_opts(opts, remote->u.inet);
         if (local) {
-            qemu_opt_set(opts, "localaddr", local->inet->host, &error_abort);
-            qemu_opt_set(opts, "localport", local->inet->port, &error_abort);
+            qemu_opt_set(opts, "localaddr", local->u.inet->host, &error_abort);
+            qemu_opt_set(opts, "localport", local->u.inet->port, &error_abort);
         }
         fd = inet_dgram_opts(opts, errp);
         break;
@@ -1023,3 +1036,140 @@ int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp)
     qemu_opts_del(opts);
     return fd;
 }
+
+
+static SocketAddress *
+socket_sockaddr_to_address_inet(struct sockaddr_storage *sa,
+                                socklen_t salen,
+                                Error **errp)
+{
+    char host[NI_MAXHOST];
+    char serv[NI_MAXSERV];
+    SocketAddress *addr;
+    int ret;
+
+    ret = getnameinfo((struct sockaddr *)sa, salen,
+                      host, sizeof(host),
+                      serv, sizeof(serv),
+                      NI_NUMERICHOST | NI_NUMERICSERV);
+    if (ret != 0) {
+        error_setg(errp, "Cannot format numeric socket address: %s",
+                   gai_strerror(ret));
+        return NULL;
+    }
+
+    addr = g_new0(SocketAddress, 1);
+    addr->type = SOCKET_ADDRESS_KIND_INET;
+    addr->u.inet = g_new0(InetSocketAddress, 1);
+    addr->u.inet->host = g_strdup(host);
+    addr->u.inet->port = g_strdup(serv);
+    if (sa->ss_family == AF_INET) {
+        addr->u.inet->has_ipv4 = addr->u.inet->ipv4 = true;
+    } else {
+        addr->u.inet->has_ipv6 = addr->u.inet->ipv6 = true;
+    }
+
+    return addr;
+}
+
+
+#ifndef WIN32
+static SocketAddress *
+socket_sockaddr_to_address_unix(struct sockaddr_storage *sa,
+                                socklen_t salen,
+                                Error **errp)
+{
+    SocketAddress *addr;
+    struct sockaddr_un *su = (struct sockaddr_un *)sa;
+
+    addr = g_new0(SocketAddress, 1);
+    addr->type = SOCKET_ADDRESS_KIND_UNIX;
+    addr->u.q_unix = g_new0(UnixSocketAddress, 1);
+    if (su->sun_path[0]) {
+        addr->u.q_unix->path = g_strndup(su->sun_path,
+                                         sizeof(su->sun_path));
+    }
+
+    return addr;
+}
+#endif /* WIN32 */
+
+static SocketAddress *
+socket_sockaddr_to_address(struct sockaddr_storage *sa,
+                           socklen_t salen,
+                           Error **errp)
+{
+    switch (sa->ss_family) {
+    case AF_INET:
+    case AF_INET6:
+        return socket_sockaddr_to_address_inet(sa, salen, errp);
+
+#ifndef WIN32
+    case AF_UNIX:
+        return socket_sockaddr_to_address_unix(sa, salen, errp);
+#endif /* WIN32 */
+
+    default:
+        error_setg(errp, "socket family %d unsupported",
+                   sa->ss_family);
+        return NULL;
+    }
+    return 0;
+}
+
+
+SocketAddress *socket_local_address(int fd, Error **errp)
+{
+    struct sockaddr_storage ss;
+    socklen_t sslen = sizeof(ss);
+
+    if (getsockname(fd, (struct sockaddr *)&ss, &sslen) < 0) {
+        error_setg_errno(errp, socket_error(), "%s",
+                         "Unable to query local socket address");
+        return NULL;
+    }
+
+    return socket_sockaddr_to_address(&ss, sslen, errp);
+}
+
+
+SocketAddress *socket_remote_address(int fd, Error **errp)
+{
+    struct sockaddr_storage ss;
+    socklen_t sslen = sizeof(ss);
+
+    if (getpeername(fd, (struct sockaddr *)&ss, &sslen) < 0) {
+        error_setg_errno(errp, socket_error(), "%s",
+                         "Unable to query remote socket address");
+        return NULL;
+    }
+
+    return socket_sockaddr_to_address(&ss, sslen, errp);
+}
+
+
+void qapi_copy_SocketAddress(SocketAddress **p_dest,
+                             SocketAddress *src)
+{
+    QmpOutputVisitor *qov;
+    QmpInputVisitor *qiv;
+    Visitor *ov, *iv;
+    QObject *obj;
+
+    *p_dest = NULL;
+
+    qov = qmp_output_visitor_new();
+    ov = qmp_output_get_visitor(qov);
+    visit_type_SocketAddress(ov, &src, NULL, &error_abort);
+    obj = qmp_output_get_qobject(qov);
+    qmp_output_visitor_cleanup(qov);
+    if (!obj) {
+        return;
+    }
+
+    qiv = qmp_input_visitor_new(obj);
+    iv = qmp_input_get_visitor(qiv);
+    visit_type_SocketAddress(iv, p_dest, NULL, &error_abort);
+    qmp_input_visitor_cleanup(qiv);
+    qobject_decref(obj);
+}
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index 23f93b73b1..dde2a213cd 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -298,7 +298,16 @@ static inline void futex_wake(QemuEvent *ev, int n)
 
 static inline void futex_wait(QemuEvent *ev, unsigned val)
 {
-    futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0);
+    while (futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0)) {
+        switch (errno) {
+        case EWOULDBLOCK:
+            return;
+        case EINTR:
+            break; /* get out of switch and retry */
+        default:
+            abort();
+        }
+    }
 }
 #else
 static inline void futex_wake(QemuEvent *ev, int n)
@@ -389,7 +398,7 @@ void qemu_event_wait(QemuEvent *ev)
             /*
              * Leave the event reset and tell qemu_event_set that there
              * are waiters.  No need to retry, because there cannot be
-             * a concurent busy->free transition.  After the CAS, the
+             * a concurrent busy->free transition.  After the CAS, the
              * event will be either set or busy.
              */
             if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index 406b52f91d..6cdd553e9a 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -238,10 +238,34 @@ void qemu_sem_wait(QemuSemaphore *sem)
     }
 }
 
+/* Wrap a Win32 manual-reset event with a fast userspace path.  The idea
+ * is to reset the Win32 event lazily, as part of a test-reset-test-wait
+ * sequence.  Such a sequence is, indeed, how QemuEvents are used by
+ * RCU and other subsystems!
+ *
+ * Valid transitions:
+ * - free->set, when setting the event
+ * - busy->set, when setting the event, followed by futex_wake
+ * - set->free, when resetting the event
+ * - free->busy, when waiting
+ *
+ * set->busy does not happen (it can be observed from the outside but
+ * it really is set->free->busy).
+ *
+ * busy->free provably cannot happen; to enforce it, the set->free transition
+ * is done with an OR, which becomes a no-op if the event has concurrently
+ * transitioned to free or busy (and is faster than cmpxchg).
+ */
+
+#define EV_SET         0
+#define EV_FREE        1
+#define EV_BUSY       -1
+
 void qemu_event_init(QemuEvent *ev, bool init)
 {
     /* Manual reset.  */
-    ev->event = CreateEvent(NULL, TRUE, init, NULL);
+    ev->event = CreateEvent(NULL, TRUE, TRUE, NULL);
+    ev->value = (init ? EV_SET : EV_FREE);
 }
 
 void qemu_event_destroy(QemuEvent *ev)
@@ -251,17 +275,51 @@ void qemu_event_destroy(QemuEvent *ev)
 
 void qemu_event_set(QemuEvent *ev)
 {
-    SetEvent(ev->event);
+    if (atomic_mb_read(&ev->value) != EV_SET) {
+        if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
+            /* There were waiters, wake them up.  */
+            SetEvent(ev->event);
+        }
+    }
 }
 
 void qemu_event_reset(QemuEvent *ev)
 {
-    ResetEvent(ev->event);
+    if (atomic_mb_read(&ev->value) == EV_SET) {
+        /* If there was a concurrent reset (or even reset+wait),
+         * do nothing.  Otherwise change EV_SET->EV_FREE.
+         */
+        atomic_or(&ev->value, EV_FREE);
+    }
 }
 
 void qemu_event_wait(QemuEvent *ev)
 {
-    WaitForSingleObject(ev->event, INFINITE);
+    unsigned value;
+
+    value = atomic_mb_read(&ev->value);
+    if (value != EV_SET) {
+        if (value == EV_FREE) {
+            /* qemu_event_set is not yet going to call SetEvent, but we are
+             * going to do another check for EV_SET below when setting EV_BUSY.
+             * At that point it is safe to call WaitForSingleObject.
+             */
+            ResetEvent(ev->event);
+
+            /* Tell qemu_event_set that there are waiters.  No need to retry
+             * because there cannot be a concurent busy->free transition.
+             * After the CAS, the event will be either set or busy.
+             */
+            if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
+                value = EV_SET;
+            } else {
+                value = EV_BUSY;
+            }
+        }
+        if (value == EV_BUSY) {
+            WaitForSingleObject(ev->event, INFINITE);
+        }
+    }
 }
 
 struct QemuThreadData {
diff --git a/util/rcu.c b/util/rcu.c
index cdcad678b4..8ba304dc44 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -47,7 +47,8 @@
 unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
 
 QemuEvent rcu_gp_event;
-static QemuMutex rcu_gp_lock;
+static QemuMutex rcu_registry_lock;
+static QemuMutex rcu_sync_lock;
 
 /*
  * Check whether a quiescent state was crossed between the beginning of
@@ -66,7 +67,7 @@ static inline int rcu_gp_ongoing(unsigned long *ctr)
  */
 __thread struct rcu_reader_data rcu_reader;
 
-/* Protected by rcu_gp_lock.  */
+/* Protected by rcu_registry_lock.  */
 typedef QLIST_HEAD(, rcu_reader_data) ThreadList;
 static ThreadList registry = QLIST_HEAD_INITIALIZER(registry);
 
@@ -114,10 +115,26 @@ static void wait_for_readers(void)
             break;
         }
 
-        /* Wait for one thread to report a quiescent state and
-         * try again.
+        /* Wait for one thread to report a quiescent state and try again.
+         * Release rcu_registry_lock, so rcu_(un)register_thread() doesn't
+         * wait too much time.
+         *
+         * rcu_register_thread() may add nodes to &registry; it will not
+         * wake up synchronize_rcu, but that is okay because at least another
+         * thread must exit its RCU read-side critical section before
+         * synchronize_rcu is done.  The next iteration of the loop will
+         * move the new thread's rcu_reader from &registry to &qsreaders,
+         * because rcu_gp_ongoing() will return false.
+         *
+         * rcu_unregister_thread() may remove nodes from &qsreaders instead
+         * of &registry if it runs during qemu_event_wait.  That's okay;
+         * the node then will not be added back to &registry by QLIST_SWAP
+         * below.  The invariant is that the node is part of one list when
+         * rcu_registry_lock is released.
          */
+        qemu_mutex_unlock(&rcu_registry_lock);
         qemu_event_wait(&rcu_gp_event);
+        qemu_mutex_lock(&rcu_registry_lock);
     }
 
     /* put back the reader list in the registry */
@@ -126,7 +143,8 @@ static void wait_for_readers(void)
 
 void synchronize_rcu(void)
 {
-    qemu_mutex_lock(&rcu_gp_lock);
+    qemu_mutex_lock(&rcu_sync_lock);
+    qemu_mutex_lock(&rcu_registry_lock);
 
     if (!QLIST_EMPTY(&registry)) {
         /* In either case, the atomic_mb_set below blocks stores that free
@@ -149,7 +167,8 @@ void synchronize_rcu(void)
         wait_for_readers();
     }
 
-    qemu_mutex_unlock(&rcu_gp_lock);
+    qemu_mutex_unlock(&rcu_registry_lock);
+    qemu_mutex_unlock(&rcu_sync_lock);
 }
 
 
@@ -273,23 +292,24 @@ void call_rcu1(struct rcu_head *node, void (*func)(struct rcu_head *node))
 void rcu_register_thread(void)
 {
     assert(rcu_reader.ctr == 0);
-    qemu_mutex_lock(&rcu_gp_lock);
+    qemu_mutex_lock(&rcu_registry_lock);
     QLIST_INSERT_HEAD(&registry, &rcu_reader, node);
-    qemu_mutex_unlock(&rcu_gp_lock);
+    qemu_mutex_unlock(&rcu_registry_lock);
 }
 
 void rcu_unregister_thread(void)
 {
-    qemu_mutex_lock(&rcu_gp_lock);
+    qemu_mutex_lock(&rcu_registry_lock);
     QLIST_REMOVE(&rcu_reader, node);
-    qemu_mutex_unlock(&rcu_gp_lock);
+    qemu_mutex_unlock(&rcu_registry_lock);
 }
 
 static void rcu_init_complete(void)
 {
     QemuThread thread;
 
-    qemu_mutex_init(&rcu_gp_lock);
+    qemu_mutex_init(&rcu_registry_lock);
+    qemu_mutex_init(&rcu_sync_lock);
     qemu_event_init(&rcu_gp_event, true);
 
     qemu_event_init(&rcu_call_ready_event, false);
@@ -306,12 +326,14 @@ static void rcu_init_complete(void)
 #ifdef CONFIG_POSIX
 static void rcu_init_lock(void)
 {
-    qemu_mutex_lock(&rcu_gp_lock);
+    qemu_mutex_lock(&rcu_sync_lock);
+    qemu_mutex_lock(&rcu_registry_lock);
 }
 
 static void rcu_init_unlock(void)
 {
-    qemu_mutex_unlock(&rcu_gp_lock);
+    qemu_mutex_unlock(&rcu_registry_lock);
+    qemu_mutex_unlock(&rcu_sync_lock);
 }
 #endif
 
diff --git a/util/throttle.c b/util/throttle.c
index 706c13111e..1113671ecf 100644
--- a/util/throttle.c
+++ b/util/throttle.c
@@ -300,6 +300,21 @@ bool throttle_is_valid(ThrottleConfig *cfg)
     return !invalid;
 }
 
+/* check if bps_max/iops_max is used without bps/iops
+ * @cfg: the throttling configuration to inspect
+ */
+bool throttle_max_is_missing_limit(ThrottleConfig *cfg)
+{
+    int i;
+
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        if (cfg->buckets[i].max && !cfg->buckets[i].avg) {
+            return true;
+        }
+    }
+    return false;
+}
+
 /* fix bucket parameters */
 static void throttle_fix_bucket(LeakyBucket *bkt)
 {
diff --git a/util/timed-average.c b/util/timed-average.c
new file mode 100644
index 0000000000..a2dfb4834d
--- /dev/null
+++ b/util/timed-average.c
@@ -0,0 +1,231 @@
+/*
+ * QEMU timed average computation
+ *
+ * Copyright (C) Nodalink, EURL. 2014
+ * Copyright (C) Igalia, S.L. 2015
+ *
+ * Authors:
+ *   Benoît Canet <benoit.canet@nodalink.com>
+ *   Alberto Garcia <berto@igalia.com>
+ *
+ * This program is free sofware: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Sofware Foundation, either version 2 of the License, or
+ * (at your option) version 3 or any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+
+#include "qemu/timed-average.h"
+
+/* This module computes an average of a set of values within a time
+ * window.
+ *
+ * Algorithm:
+ *
+ * - Create two windows with a certain expiration period, and
+ *   offsetted by period / 2.
+ * - Each time you want to account a new value, do it in both windows.
+ * - The minimum / maximum / average values are always returned from
+ *   the oldest window.
+ *
+ * Example:
+ *
+ *        t=0          |t=0.5           |t=1          |t=1.5            |t=2
+ *        wnd0: [0,0.5)|wnd0: [0.5,1.5) |             |wnd0: [1.5,2.5)  |
+ *        wnd1: [0,1)  |                |wnd1: [1,2)  |                 |
+ *
+ * Values are returned from:
+ *
+ *        wnd0---------|wnd1------------|wnd0---------|wnd1-------------|
+ */
+
+/* Update the expiration of a time window
+ *
+ * @w:      the window used
+ * @now:    the current time in nanoseconds
+ * @period: the expiration period in nanoseconds
+ */
+static void update_expiration(TimedAverageWindow *w, int64_t now,
+                              int64_t period)
+{
+    /* time elapsed since the last theoretical expiration */
+    int64_t elapsed = (now - w->expiration) % period;
+    /* time remaininging until the next expiration */
+    int64_t remaining = period - elapsed;
+    /* compute expiration */
+    w->expiration = now + remaining;
+}
+
+/* Reset a window
+ *
+ * @w: the window to reset
+ */
+static void window_reset(TimedAverageWindow *w)
+{
+    w->min = UINT64_MAX;
+    w->max = 0;
+    w->sum = 0;
+    w->count = 0;
+}
+
+/* Get the current window (that is, the one with the earliest
+ * expiration time).
+ *
+ * @ta:  the TimedAverage structure
+ * @ret: a pointer to the current window
+ */
+static TimedAverageWindow *current_window(TimedAverage *ta)
+{
+     return &ta->windows[ta->current];
+}
+
+/* Initialize a TimedAverage structure
+ *
+ * @ta:         the TimedAverage structure
+ * @clock_type: the type of clock to use
+ * @period:     the time window period in nanoseconds
+ */
+void timed_average_init(TimedAverage *ta, QEMUClockType clock_type,
+                        uint64_t period)
+{
+    int64_t now = qemu_clock_get_ns(clock_type);
+
+    /* Returned values are from the oldest window, so they belong to
+     * the interval [ta->period/2,ta->period). By adjusting the
+     * requested period by 4/3, we guarantee that they're in the
+     * interval [2/3 period,4/3 period), closer to the requested
+     * period on average */
+    ta->period = (uint64_t) period * 4 / 3;
+    ta->clock_type = clock_type;
+    ta->current = 0;
+
+    window_reset(&ta->windows[0]);
+    window_reset(&ta->windows[1]);
+
+    /* Both windows are offsetted by half a period */
+    ta->windows[0].expiration = now + ta->period / 2;
+    ta->windows[1].expiration = now + ta->period;
+}
+
+/* Check if the time windows have expired, updating their counters and
+ * expiration time if that's the case.
+ *
+ * @ta: the TimedAverage structure
+ * @elapsed: if non-NULL, the elapsed time (in ns) within the current
+ *           window will be stored here
+ */
+static void check_expirations(TimedAverage *ta, uint64_t *elapsed)
+{
+    int64_t now = qemu_clock_get_ns(ta->clock_type);
+    int i;
+
+    assert(ta->period != 0);
+
+    /* Check if the windows have expired */
+    for (i = 0; i < 2; i++) {
+        TimedAverageWindow *w = &ta->windows[i];
+        if (w->expiration <= now) {
+            window_reset(w);
+            update_expiration(w, now, ta->period);
+        }
+    }
+
+    /* Make ta->current point to the oldest window */
+    if (ta->windows[0].expiration < ta->windows[1].expiration) {
+        ta->current = 0;
+    } else {
+        ta->current = 1;
+    }
+
+    /* Calculate the elapsed time within the current window */
+    if (elapsed) {
+        int64_t remaining = ta->windows[ta->current].expiration - now;
+        *elapsed = ta->period - remaining;
+    }
+}
+
+/* Account a value
+ *
+ * @ta:    the TimedAverage structure
+ * @value: the value to account
+ */
+void timed_average_account(TimedAverage *ta, uint64_t value)
+{
+    int i;
+    check_expirations(ta, NULL);
+
+    /* Do the accounting in both windows at the same time */
+    for (i = 0; i < 2; i++) {
+        TimedAverageWindow *w = &ta->windows[i];
+
+        w->sum += value;
+        w->count++;
+
+        if (value < w->min) {
+            w->min = value;
+        }
+
+        if (value > w->max) {
+            w->max = value;
+        }
+    }
+}
+
+/* Get the minimum value
+ *
+ * @ta:  the TimedAverage structure
+ * @ret: the minimum value
+ */
+uint64_t timed_average_min(TimedAverage *ta)
+{
+    TimedAverageWindow *w;
+    check_expirations(ta, NULL);
+    w = current_window(ta);
+    return w->min < UINT64_MAX ? w->min : 0;
+}
+
+/* Get the average value
+ *
+ * @ta:  the TimedAverage structure
+ * @ret: the average value
+ */
+uint64_t timed_average_avg(TimedAverage *ta)
+{
+    TimedAverageWindow *w;
+    check_expirations(ta, NULL);
+    w = current_window(ta);
+    return w->count > 0 ? w->sum / w->count : 0;
+}
+
+/* Get the maximum value
+ *
+ * @ta:  the TimedAverage structure
+ * @ret: the maximum value
+ */
+uint64_t timed_average_max(TimedAverage *ta)
+{
+    check_expirations(ta, NULL);
+    return current_window(ta)->max;
+}
+
+/* Get the sum of all accounted values
+ * @ta:      the TimedAverage structure
+ * @elapsed: if non-NULL, the elapsed time (in ns) will be stored here
+ * @ret:     the sum of all accounted values
+ */
+uint64_t timed_average_sum(TimedAverage *ta, uint64_t *elapsed)
+{
+    TimedAverageWindow *w;
+    check_expirations(ta, elapsed);
+    w = current_window(ta);
+    return w->sum;
+}