summaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
Diffstat (limited to 'util')
-rw-r--r--util/Makefile.objs18
-rw-r--r--util/bitmap.c2
-rw-r--r--util/buffer.c171
-rw-r--r--util/coroutine-gthread.c198
-rw-r--r--util/coroutine-sigaltstack.c293
-rw-r--r--util/coroutine-ucontext.c194
-rw-r--r--util/coroutine-win32.c104
-rw-r--r--util/cutils.c203
-rw-r--r--util/error.c180
-rw-r--r--util/event_notifier-posix.c2
-rw-r--r--util/id.c37
-rw-r--r--util/memfd.c162
-rw-r--r--util/mmap-alloc.c110
-rw-r--r--util/osdep.c17
-rw-r--r--util/oslib-posix.c115
-rw-r--r--util/oslib-win32.c17
-rw-r--r--util/qemu-coroutine-io.c91
-rw-r--r--util/qemu-coroutine-lock.c186
-rw-r--r--util/qemu-coroutine-sleep.c41
-rw-r--r--util/qemu-coroutine.c150
-rw-r--r--util/qemu-error.c2
-rw-r--r--util/qemu-option.c45
-rw-r--r--util/qemu-sockets.c212
-rw-r--r--util/qemu-thread-posix.c13
-rw-r--r--util/qemu-thread-win32.c66
-rw-r--r--util/rcu.c48
-rw-r--r--util/throttle.c15
-rw-r--r--util/timed-average.c231
28 files changed, 2709 insertions, 214 deletions
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 114d6578c4..89dd80ef86 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -1,13 +1,20 @@
util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o
-util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o
-util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o qemu-openpty.o
+util-obj-$(CONFIG_POSIX) += compatfd.o
+util-obj-$(CONFIG_POSIX) += event_notifier-posix.o
+util-obj-$(CONFIG_POSIX) += mmap-alloc.o
+util-obj-$(CONFIG_POSIX) += oslib-posix.o
+util-obj-$(CONFIG_POSIX) += qemu-openpty.o
+util-obj-$(CONFIG_POSIX) += qemu-thread-posix.o
+util-obj-$(CONFIG_WIN32) += event_notifier-win32.o
+util-obj-$(CONFIG_POSIX) += memfd.o
+util-obj-$(CONFIG_WIN32) += oslib-win32.o
+util-obj-$(CONFIG_WIN32) += qemu-thread-win32.o
util-obj-y += envlist.o path.o module.o
util-obj-$(call lnot,$(CONFIG_INT128)) += host-utils.o
util-obj-y += bitmap.o bitops.o hbitmap.o
util-obj-y += fifo8.o
util-obj-y += acl.o
util-obj-y += error.o qemu-error.o
-util-obj-$(CONFIG_POSIX) += compatfd.o
util-obj-y += id.o
util-obj-y += iov.o qemu-config.o qemu-sockets.o uri.o notify.o
util-obj-y += qemu-option.o qemu-progress.o
@@ -18,3 +25,8 @@ util-obj-y += getauxval.o
util-obj-y += readline.o
util-obj-y += rfifolock.o
util-obj-y += rcu.o
+util-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
+util-obj-y += qemu-coroutine-sleep.o
+util-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o
+util-obj-y += buffer.o
+util-obj-y += timed-average.o
diff --git a/util/bitmap.c b/util/bitmap.c
index 300a68e38c..44f0f481be 100644
--- a/util/bitmap.c
+++ b/util/bitmap.c
@@ -14,7 +14,7 @@
#include "qemu/atomic.h"
/*
- * bitmaps provide an array of bits, implemented using an an
+ * bitmaps provide an array of bits, implemented using an
* array of unsigned longs. The number of valid bits in a
* given bitmap does _not_ need to be an exact multiple of
* BITS_PER_LONG.
diff --git a/util/buffer.c b/util/buffer.c
new file mode 100644
index 0000000000..8b27c08aac
--- /dev/null
+++ b/util/buffer.c
@@ -0,0 +1,171 @@
+/*
+ * QEMU generic buffers
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/buffer.h"
+#include "trace.h"
+
+#define BUFFER_MIN_INIT_SIZE 4096
+#define BUFFER_MIN_SHRINK_SIZE 65536
+
+/* define the factor alpha for the expentional smoothing
+ * that is used in the average size calculation. a shift
+ * of 7 results in an alpha of 1/2^7. */
+#define BUFFER_AVG_SIZE_SHIFT 7
+
+static size_t buffer_req_size(Buffer *buffer, size_t len)
+{
+ return MAX(BUFFER_MIN_INIT_SIZE,
+ pow2ceil(buffer->offset + len));
+}
+
+static void buffer_adj_size(Buffer *buffer, size_t len)
+{
+ size_t old = buffer->capacity;
+ buffer->capacity = buffer_req_size(buffer, len);
+ buffer->buffer = g_realloc(buffer->buffer, buffer->capacity);
+ trace_buffer_resize(buffer->name ?: "unnamed",
+ old, buffer->capacity);
+
+ /* make it even harder for the buffer to shrink, reset average size
+ * to currenty capacity if it is larger than the average. */
+ buffer->avg_size = MAX(buffer->avg_size,
+ buffer->capacity << BUFFER_AVG_SIZE_SHIFT);
+}
+
+void buffer_init(Buffer *buffer, const char *name, ...)
+{
+ va_list ap;
+
+ va_start(ap, name);
+ buffer->name = g_strdup_vprintf(name, ap);
+ va_end(ap);
+}
+
+static uint64_t buffer_get_avg_size(Buffer *buffer)
+{
+ return buffer->avg_size >> BUFFER_AVG_SIZE_SHIFT;
+}
+
+void buffer_shrink(Buffer *buffer)
+{
+ size_t new;
+
+ /* Calculate the average size of the buffer as
+ * avg_size = avg_size * ( 1 - a ) + required_size * a
+ * where a is 1 / 2 ^ BUFFER_AVG_SIZE_SHIFT. */
+ buffer->avg_size *= (1 << BUFFER_AVG_SIZE_SHIFT) - 1;
+ buffer->avg_size >>= BUFFER_AVG_SIZE_SHIFT;
+ buffer->avg_size += buffer_req_size(buffer, 0);
+
+ /* And then only shrink if the average size of the buffer is much
+ * too big, to avoid bumping up & down the buffers all the time.
+ * realloc() isn't exactly cheap ... */
+ new = buffer_req_size(buffer, buffer_get_avg_size(buffer));
+ if (new < buffer->capacity >> 3 &&
+ new >= BUFFER_MIN_SHRINK_SIZE) {
+ buffer_adj_size(buffer, buffer_get_avg_size(buffer));
+ }
+
+ buffer_adj_size(buffer, 0);
+}
+
+void buffer_reserve(Buffer *buffer, size_t len)
+{
+ if ((buffer->capacity - buffer->offset) < len) {
+ buffer_adj_size(buffer, len);
+ }
+}
+
+gboolean buffer_empty(Buffer *buffer)
+{
+ return buffer->offset == 0;
+}
+
+uint8_t *buffer_end(Buffer *buffer)
+{
+ return buffer->buffer + buffer->offset;
+}
+
+void buffer_reset(Buffer *buffer)
+{
+ buffer->offset = 0;
+ buffer_shrink(buffer);
+}
+
+void buffer_free(Buffer *buffer)
+{
+ trace_buffer_free(buffer->name ?: "unnamed", buffer->capacity);
+ g_free(buffer->buffer);
+ g_free(buffer->name);
+ buffer->offset = 0;
+ buffer->capacity = 0;
+ buffer->buffer = NULL;
+ buffer->name = NULL;
+}
+
+void buffer_append(Buffer *buffer, const void *data, size_t len)
+{
+ memcpy(buffer->buffer + buffer->offset, data, len);
+ buffer->offset += len;
+}
+
+void buffer_advance(Buffer *buffer, size_t len)
+{
+ memmove(buffer->buffer, buffer->buffer + len,
+ (buffer->offset - len));
+ buffer->offset -= len;
+ buffer_shrink(buffer);
+}
+
+void buffer_move_empty(Buffer *to, Buffer *from)
+{
+ trace_buffer_move_empty(to->name ?: "unnamed",
+ from->offset,
+ from->name ?: "unnamed");
+ assert(to->offset == 0);
+
+ g_free(to->buffer);
+ to->offset = from->offset;
+ to->capacity = from->capacity;
+ to->buffer = from->buffer;
+
+ from->offset = 0;
+ from->capacity = 0;
+ from->buffer = NULL;
+}
+
+void buffer_move(Buffer *to, Buffer *from)
+{
+ if (to->offset == 0) {
+ buffer_move_empty(to, from);
+ return;
+ }
+
+ trace_buffer_move(to->name ?: "unnamed",
+ from->offset,
+ from->name ?: "unnamed");
+ buffer_reserve(to, from->offset);
+ buffer_append(to, from->buffer, from->offset);
+
+ g_free(from->buffer);
+ from->offset = 0;
+ from->capacity = 0;
+ from->buffer = NULL;
+}
diff --git a/util/coroutine-gthread.c b/util/coroutine-gthread.c
new file mode 100644
index 0000000000..0bcd77867d
--- /dev/null
+++ b/util/coroutine-gthread.c
@@ -0,0 +1,198 @@
+/*
+ * GThread coroutine initialization code
+ *
+ * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011 Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <glib.h>
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+typedef struct {
+ Coroutine base;
+ GThread *thread;
+ bool runnable;
+ bool free_on_thread_exit;
+ CoroutineAction action;
+} CoroutineGThread;
+
+static CompatGMutex coroutine_lock;
+static CompatGCond coroutine_cond;
+
+/* GLib 2.31 and beyond deprecated various parts of the thread API,
+ * but the new interfaces are not available in older GLib versions
+ * so we have to cope with both.
+ */
+#if GLIB_CHECK_VERSION(2, 31, 0)
+/* Awkwardly, the GPrivate API doesn't provide a way to update the
+ * GDestroyNotify handler for the coroutine key dynamically. So instead
+ * we track whether or not the CoroutineGThread should be freed on
+ * thread exit / coroutine key update using the free_on_thread_exit
+ * field.
+ */
+static void coroutine_destroy_notify(gpointer data)
+{
+ CoroutineGThread *co = data;
+ if (co && co->free_on_thread_exit) {
+ g_free(co);
+ }
+}
+
+static GPrivate coroutine_key = G_PRIVATE_INIT(coroutine_destroy_notify);
+
+static inline CoroutineGThread *get_coroutine_key(void)
+{
+ return g_private_get(&coroutine_key);
+}
+
+static inline void set_coroutine_key(CoroutineGThread *co,
+ bool free_on_thread_exit)
+{
+ /* Unlike g_static_private_set() this does not call the GDestroyNotify
+ * if the previous value of the key was NULL. Fortunately we only need
+ * the GDestroyNotify in the non-NULL key case.
+ */
+ co->free_on_thread_exit = free_on_thread_exit;
+ g_private_replace(&coroutine_key, co);
+}
+
+static inline GThread *create_thread(GThreadFunc func, gpointer data)
+{
+ return g_thread_new("coroutine", func, data);
+}
+
+#else
+
+/* Handle older GLib versions */
+
+static GStaticPrivate coroutine_key = G_STATIC_PRIVATE_INIT;
+
+static inline CoroutineGThread *get_coroutine_key(void)
+{
+ return g_static_private_get(&coroutine_key);
+}
+
+static inline void set_coroutine_key(CoroutineGThread *co,
+ bool free_on_thread_exit)
+{
+ g_static_private_set(&coroutine_key, co,
+ free_on_thread_exit ? (GDestroyNotify)g_free : NULL);
+}
+
+static inline GThread *create_thread(GThreadFunc func, gpointer data)
+{
+ return g_thread_create_full(func, data, 0, TRUE, TRUE,
+ G_THREAD_PRIORITY_NORMAL, NULL);
+}
+
+#endif
+
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+#if !GLIB_CHECK_VERSION(2, 31, 0)
+ if (!g_thread_supported()) {
+ g_thread_init(NULL);
+ }
+#endif
+}
+
+static void coroutine_wait_runnable_locked(CoroutineGThread *co)
+{
+ while (!co->runnable) {
+ g_cond_wait(&coroutine_cond, &coroutine_lock);
+ }
+}
+
+static void coroutine_wait_runnable(CoroutineGThread *co)
+{
+ g_mutex_lock(&coroutine_lock);
+ coroutine_wait_runnable_locked(co);
+ g_mutex_unlock(&coroutine_lock);
+}
+
+static gpointer coroutine_thread(gpointer opaque)
+{
+ CoroutineGThread *co = opaque;
+
+ set_coroutine_key(co, false);
+ coroutine_wait_runnable(co);
+ co->base.entry(co->base.entry_arg);
+ qemu_coroutine_switch(&co->base, co->base.caller, COROUTINE_TERMINATE);
+ return NULL;
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+ CoroutineGThread *co;
+
+ co = g_malloc0(sizeof(*co));
+ co->thread = create_thread(coroutine_thread, co);
+ if (!co->thread) {
+ g_free(co);
+ return NULL;
+ }
+ return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+ CoroutineGThread *co = DO_UPCAST(CoroutineGThread, base, co_);
+
+ g_thread_join(co->thread);
+ g_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_,
+ Coroutine *to_,
+ CoroutineAction action)
+{
+ CoroutineGThread *from = DO_UPCAST(CoroutineGThread, base, from_);
+ CoroutineGThread *to = DO_UPCAST(CoroutineGThread, base, to_);
+
+ g_mutex_lock(&coroutine_lock);
+ from->runnable = false;
+ from->action = action;
+ to->runnable = true;
+ to->action = action;
+ g_cond_broadcast(&coroutine_cond);
+
+ if (action != COROUTINE_TERMINATE) {
+ coroutine_wait_runnable_locked(from);
+ }
+ g_mutex_unlock(&coroutine_lock);
+ return from->action;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+ CoroutineGThread *co = get_coroutine_key();
+ if (!co) {
+ co = g_malloc0(sizeof(*co));
+ co->runnable = true;
+ set_coroutine_key(co, true);
+ }
+
+ return &co->base;
+}
+
+bool qemu_in_coroutine(void)
+{
+ CoroutineGThread *co = get_coroutine_key();
+
+ return co && co->base.caller;
+}
diff --git a/util/coroutine-sigaltstack.c b/util/coroutine-sigaltstack.c
new file mode 100644
index 0000000000..39842a4a90
--- /dev/null
+++ b/util/coroutine-sigaltstack.c
@@ -0,0 +1,293 @@
+/*
+ * sigaltstack coroutine initialization code
+ *
+ * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com>
+ * Copyright (C) 2012 Alex Barcelo <abarcelo@ac.upc.edu>
+** This file is partly based on pth_mctx.c, from the GNU Portable Threads
+** Copyright (c) 1999-2006 Ralf S. Engelschall <rse@engelschall.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include <stdlib.h>
+#include <setjmp.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <signal.h>
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+typedef struct {
+ Coroutine base;
+ void *stack;
+ sigjmp_buf env;
+} CoroutineUContext;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+typedef struct {
+ /** Currently executing coroutine */
+ Coroutine *current;
+
+ /** The default coroutine */
+ CoroutineUContext leader;
+
+ /** Information for the signal handler (trampoline) */
+ sigjmp_buf tr_reenter;
+ volatile sig_atomic_t tr_called;
+ void *tr_handler;
+} CoroutineThreadState;
+
+static pthread_key_t thread_state_key;
+
+static CoroutineThreadState *coroutine_get_thread_state(void)
+{
+ CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+ if (!s) {
+ s = g_malloc0(sizeof(*s));
+ s->current = &s->leader.base;
+ pthread_setspecific(thread_state_key, s);
+ }
+ return s;
+}
+
+static void qemu_coroutine_thread_cleanup(void *opaque)
+{
+ CoroutineThreadState *s = opaque;
+
+ g_free(s);
+}
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+ int ret;
+
+ ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
+ if (ret != 0) {
+ fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
+ abort();
+ }
+}
+
+/* "boot" function
+ * This is what starts the coroutine, is called from the trampoline
+ * (from the signal handler when it is not signal handling, read ahead
+ * for more information).
+ */
+static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co)
+{
+ /* Initialize longjmp environment and switch back the caller */
+ if (!sigsetjmp(self->env, 0)) {
+ siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
+ }
+
+ while (true) {
+ co->entry(co->entry_arg);
+ qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+ }
+}
+
+/*
+ * This is used as the signal handler. This is called with the brand new stack
+ * (thanks to sigaltstack). We have to return, given that this is a signal
+ * handler and the sigmask and some other things are changed.
+ */
+static void coroutine_trampoline(int signal)
+{
+ CoroutineUContext *self;
+ Coroutine *co;
+ CoroutineThreadState *coTS;
+
+ /* Get the thread specific information */
+ coTS = coroutine_get_thread_state();
+ self = coTS->tr_handler;
+ coTS->tr_called = 1;
+ co = &self->base;
+
+ /*
+ * Here we have to do a bit of a ping pong between the caller, given that
+ * this is a signal handler and we have to do a return "soon". Then the
+ * caller can reestablish everything and do a siglongjmp here again.
+ */
+ if (!sigsetjmp(coTS->tr_reenter, 0)) {
+ return;
+ }
+
+ /*
+ * Ok, the caller has siglongjmp'ed back to us, so now prepare
+ * us for the real machine state switching. We have to jump
+ * into another function here to get a new stack context for
+ * the auto variables (which have to be auto-variables
+ * because the start of the thread happens later). Else with
+ * PIC (i.e. Position Independent Code which is used when PTH
+ * is built as a shared library) most platforms would
+ * horrible core dump as experience showed.
+ */
+ coroutine_bootstrap(self, co);
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+ const size_t stack_size = 1 << 20;
+ CoroutineUContext *co;
+ CoroutineThreadState *coTS;
+ struct sigaction sa;
+ struct sigaction osa;
+ stack_t ss;
+ stack_t oss;
+ sigset_t sigs;
+ sigset_t osigs;
+ sigjmp_buf old_env;
+
+ /* The way to manipulate stack is with the sigaltstack function. We
+ * prepare a stack, with it delivering a signal to ourselves and then
+ * put sigsetjmp/siglongjmp where needed.
+ * This has been done keeping coroutine-ucontext as a model and with the
+ * pth ideas (GNU Portable Threads). See coroutine-ucontext for the basics
+ * of the coroutines and see pth_mctx.c (from the pth project) for the
+ * sigaltstack way of manipulating stacks.
+ */
+
+ co = g_malloc0(sizeof(*co));
+ co->stack = g_malloc(stack_size);
+ co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+ coTS = coroutine_get_thread_state();
+ coTS->tr_handler = co;
+
+ /*
+ * Preserve the SIGUSR2 signal state, block SIGUSR2,
+ * and establish our signal handler. The signal will
+ * later transfer control onto the signal stack.
+ */
+ sigemptyset(&sigs);
+ sigaddset(&sigs, SIGUSR2);
+ pthread_sigmask(SIG_BLOCK, &sigs, &osigs);
+ sa.sa_handler = coroutine_trampoline;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_ONSTACK;
+ if (sigaction(SIGUSR2, &sa, &osa) != 0) {
+ abort();
+ }
+
+ /*
+ * Set the new stack.
+ */
+ ss.ss_sp = co->stack;
+ ss.ss_size = stack_size;
+ ss.ss_flags = 0;
+ if (sigaltstack(&ss, &oss) < 0) {
+ abort();
+ }
+
+ /*
+ * Now transfer control onto the signal stack and set it up.
+ * It will return immediately via "return" after the sigsetjmp()
+ * was performed. Be careful here with race conditions. The
+ * signal can be delivered the first time sigsuspend() is
+ * called.
+ */
+ coTS->tr_called = 0;
+ pthread_kill(pthread_self(), SIGUSR2);
+ sigfillset(&sigs);
+ sigdelset(&sigs, SIGUSR2);
+ while (!coTS->tr_called) {
+ sigsuspend(&sigs);
+ }
+
+ /*
+ * Inform the system that we are back off the signal stack by
+ * removing the alternative signal stack. Be careful here: It
+ * first has to be disabled, before it can be removed.
+ */
+ sigaltstack(NULL, &ss);
+ ss.ss_flags = SS_DISABLE;
+ if (sigaltstack(&ss, NULL) < 0) {
+ abort();
+ }
+ sigaltstack(NULL, &ss);
+ if (!(oss.ss_flags & SS_DISABLE)) {
+ sigaltstack(&oss, NULL);
+ }
+
+ /*
+ * Restore the old SIGUSR2 signal handler and mask
+ */
+ sigaction(SIGUSR2, &osa, NULL);
+ pthread_sigmask(SIG_SETMASK, &osigs, NULL);
+
+ /*
+ * Now enter the trampoline again, but this time not as a signal
+ * handler. Instead we jump into it directly. The functionally
+ * redundant ping-pong pointer arithmetic is necessary to avoid
+ * type-conversion warnings related to the `volatile' qualifier and
+ * the fact that `jmp_buf' usually is an array type.
+ */
+ if (!sigsetjmp(old_env, 0)) {
+ siglongjmp(coTS->tr_reenter, 1);
+ }
+
+ /*
+ * Ok, we returned again, so now we're finished
+ */
+
+ return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+ CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+
+ g_free(co->stack);
+ g_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+ CoroutineAction action)
+{
+ CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
+ CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+ CoroutineThreadState *s = coroutine_get_thread_state();
+ int ret;
+
+ s->current = to_;
+
+ ret = sigsetjmp(from->env, 0);
+ if (ret == 0) {
+ siglongjmp(to->env, action);
+ }
+ return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+ CoroutineThreadState *s = coroutine_get_thread_state();
+
+ return s->current;
+}
+
+bool qemu_in_coroutine(void)
+{
+ CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+ return s && s->current->caller;
+}
+
diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c
new file mode 100644
index 0000000000..26cbebb7a7
--- /dev/null
+++ b/util/coroutine-ucontext.c
@@ -0,0 +1,194 @@
+/*
+ * ucontext coroutine initialization code
+ *
+ * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include <stdlib.h>
+#include <setjmp.h>
+#include <stdint.h>
+#include <ucontext.h>
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+#ifdef CONFIG_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
+typedef struct {
+ Coroutine base;
+ void *stack;
+ sigjmp_buf env;
+
+#ifdef CONFIG_VALGRIND_H
+ unsigned int valgrind_stack_id;
+#endif
+
+} CoroutineUContext;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+static __thread CoroutineUContext leader;
+static __thread Coroutine *current;
+
+/*
+ * va_args to makecontext() must be type 'int', so passing
+ * the pointer we need may require several int args. This
+ * union is a quick hack to let us do that
+ */
+union cc_arg {
+ void *p;
+ int i[2];
+};
+
+static void coroutine_trampoline(int i0, int i1)
+{
+ union cc_arg arg;
+ CoroutineUContext *self;
+ Coroutine *co;
+
+ arg.i[0] = i0;
+ arg.i[1] = i1;
+ self = arg.p;
+ co = &self->base;
+
+ /* Initialize longjmp environment and switch back the caller */
+ if (!sigsetjmp(self->env, 0)) {
+ siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
+ }
+
+ while (true) {
+ co->entry(co->entry_arg);
+ qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+ }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+ const size_t stack_size = 1 << 20;
+ CoroutineUContext *co;
+ ucontext_t old_uc, uc;
+ sigjmp_buf old_env;
+ union cc_arg arg = {0};
+
+ /* The ucontext functions preserve signal masks which incurs a
+ * system call overhead. sigsetjmp(buf, 0)/siglongjmp() does not
+ * preserve signal masks but only works on the current stack.
+ * Since we need a way to create and switch to a new stack, use
+ * the ucontext functions for that but sigsetjmp()/siglongjmp() for
+ * everything else.
+ */
+
+ if (getcontext(&uc) == -1) {
+ abort();
+ }
+
+ co = g_malloc0(sizeof(*co));
+ co->stack = g_malloc(stack_size);
+ co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+ uc.uc_link = &old_uc;
+ uc.uc_stack.ss_sp = co->stack;
+ uc.uc_stack.ss_size = stack_size;
+ uc.uc_stack.ss_flags = 0;
+
+#ifdef CONFIG_VALGRIND_H
+ co->valgrind_stack_id =
+ VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size);
+#endif
+
+ arg.p = co;
+
+ makecontext(&uc, (void (*)(void))coroutine_trampoline,
+ 2, arg.i[0], arg.i[1]);
+
+ /* swapcontext() in, siglongjmp() back out */
+ if (!sigsetjmp(old_env, 0)) {
+ swapcontext(&old_uc, &uc);
+ }
+ return &co->base;
+}
+
+#ifdef CONFIG_VALGRIND_H
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+/* Work around an unused variable in the valgrind.h macro... */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+static inline void valgrind_stack_deregister(CoroutineUContext *co)
+{
+ VALGRIND_STACK_DEREGISTER(co->valgrind_stack_id);
+}
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+#pragma GCC diagnostic pop
+#endif
+#endif
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+ CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+
+#ifdef CONFIG_VALGRIND_H
+ valgrind_stack_deregister(co);
+#endif
+
+ g_free(co->stack);
+ g_free(co);
+}
+
+/* This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * the sigsetjmp() call may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+ CoroutineAction action)
+{
+ CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
+ CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+ int ret;
+
+ current = to_;
+
+ ret = sigsetjmp(from->env, 0);
+ if (ret == 0) {
+ siglongjmp(to->env, action);
+ }
+ return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+ if (!current) {
+ current = &leader.base;
+ }
+ return current;
+}
+
+bool qemu_in_coroutine(void)
+{
+ return current && current->caller;
+}
diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c
new file mode 100644
index 0000000000..990a3b3b0c
--- /dev/null
+++ b/util/coroutine-win32.c
@@ -0,0 +1,104 @@
+/*
+ * Win32 coroutine initialization code
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+typedef struct
+{
+ Coroutine base;
+
+ LPVOID fiber;
+ CoroutineAction action;
+} CoroutineWin32;
+
+static __thread CoroutineWin32 leader;
+static __thread Coroutine *current;
+
+/* This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * the SwitchToFiber() call may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+ CoroutineAction action)
+{
+ CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_);
+ CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_);
+
+ current = to_;
+
+ to->action = action;
+ SwitchToFiber(to->fiber);
+ return from->action;
+}
+
+static void CALLBACK coroutine_trampoline(void *co_)
+{
+ Coroutine *co = co_;
+
+ while (true) {
+ co->entry(co->entry_arg);
+ qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+ }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+ const size_t stack_size = 1 << 20;
+ CoroutineWin32 *co;
+
+ co = g_malloc0(sizeof(*co));
+ co->fiber = CreateFiber(stack_size, coroutine_trampoline, &co->base);
+
+ g_assert(co->fiber);
+
+ return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+ CoroutineWin32 *co = DO_UPCAST(CoroutineWin32, base, co_);
+
+ DeleteFiber(co->fiber);
+ g_free(co);
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+ if (!current) {
+ current = &leader.base;
+ leader.fiber = ConvertThreadToFiber(NULL);
+ }
+ return current;
+}
+
+bool qemu_in_coroutine(void)
+{
+ return current && current->caller;
+}
diff --git a/util/cutils.c b/util/cutils.c
index 5d1c9ebe05..cfeb848d19 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -145,11 +145,6 @@ time_t mktimegm(struct tm *tm)
return t;
}
-int qemu_fls(int i)
-{
- return 32 - clz32(i);
-}
-
/*
* Make sure data goes on disk, but if possible do not bother to
* write out the inode just for timestamp updates.
@@ -281,19 +276,19 @@ int fcntl_setfl(int fd, int flag)
static int64_t suffix_mul(char suffix, int64_t unit)
{
switch (qemu_toupper(suffix)) {
- case STRTOSZ_DEFSUFFIX_B:
+ case QEMU_STRTOSZ_DEFSUFFIX_B:
return 1;
- case STRTOSZ_DEFSUFFIX_KB:
+ case QEMU_STRTOSZ_DEFSUFFIX_KB:
return unit;
- case STRTOSZ_DEFSUFFIX_MB:
+ case QEMU_STRTOSZ_DEFSUFFIX_MB:
return unit * unit;
- case STRTOSZ_DEFSUFFIX_GB:
+ case QEMU_STRTOSZ_DEFSUFFIX_GB:
return unit * unit * unit;
- case STRTOSZ_DEFSUFFIX_TB:
+ case QEMU_STRTOSZ_DEFSUFFIX_TB:
return unit * unit * unit * unit;
- case STRTOSZ_DEFSUFFIX_PB:
+ case QEMU_STRTOSZ_DEFSUFFIX_PB:
return unit * unit * unit * unit * unit;
- case STRTOSZ_DEFSUFFIX_EB:
+ case QEMU_STRTOSZ_DEFSUFFIX_EB:
return unit * unit * unit * unit * unit * unit;
}
return -1;
@@ -305,7 +300,7 @@ static int64_t suffix_mul(char suffix, int64_t unit)
* in *end, if not NULL. Return -ERANGE on overflow, Return -EINVAL on
* other error.
*/
-int64_t strtosz_suffix_unit(const char *nptr, char **end,
+int64_t qemu_strtosz_suffix_unit(const char *nptr, char **end,
const char default_suffix, int64_t unit)
{
int64_t retval = -EINVAL;
@@ -348,14 +343,165 @@ fail:
return retval;
}
-int64_t strtosz_suffix(const char *nptr, char **end, const char default_suffix)
+int64_t qemu_strtosz_suffix(const char *nptr, char **end,
+ const char default_suffix)
{
- return strtosz_suffix_unit(nptr, end, default_suffix, 1024);
+ return qemu_strtosz_suffix_unit(nptr, end, default_suffix, 1024);
}
-int64_t strtosz(const char *nptr, char **end)
+int64_t qemu_strtosz(const char *nptr, char **end)
+{
+ return qemu_strtosz_suffix(nptr, end, QEMU_STRTOSZ_DEFSUFFIX_MB);
+}
+
+/**
+ * Helper function for qemu_strto*l() functions.
+ */
+static int check_strtox_error(const char *p, char *endptr, const char **next,
+ int err)
+{
+ /* If no conversion was performed, prefer BSD behavior over glibc
+ * behavior.
+ */
+ if (err == 0 && endptr == p) {
+ err = EINVAL;
+ }
+ if (!next && *endptr) {
+ return -EINVAL;
+ }
+ if (next) {
+ *next = endptr;
+ }
+ return -err;
+}
+
+/**
+ * QEMU wrappers for strtol(), strtoll(), strtoul(), strotull() C functions.
+ *
+ * Convert ASCII string @nptr to a long integer value
+ * from the given @base. Parameters @nptr, @endptr, @base
+ * follows same semantics as strtol() C function.
+ *
+ * Unlike from strtol() function, if @endptr is not NULL, this
+ * function will return -EINVAL whenever it cannot fully convert
+ * the string in @nptr with given @base to a long. This function returns
+ * the result of the conversion only through the @result parameter.
+ *
+ * If NULL is passed in @endptr, then the whole string in @ntpr
+ * is a number otherwise it returns -EINVAL.
+ *
+ * RETURN VALUE
+ * Unlike from strtol() function, this wrapper returns either
+ * -EINVAL or the errno set by strtol() function (e.g -ERANGE).
+ * If the conversion overflows, -ERANGE is returned, and @result
+ * is set to the max value of the desired type
+ * (e.g. LONG_MAX, LLONG_MAX, ULONG_MAX, ULLONG_MAX). If the case
+ * of underflow, -ERANGE is returned, and @result is set to the min
+ * value of the desired type. For strtol(), strtoll(), @result is set to
+ * LONG_MIN, LLONG_MIN, respectively, and for strtoul(), strtoull() it
+ * is set to 0.
+ */
+int qemu_strtol(const char *nptr, const char **endptr, int base,
+ long *result)
{
- return strtosz_suffix(nptr, end, STRTOSZ_DEFSUFFIX_MB);
+ char *p;
+ int err = 0;
+ if (!nptr) {
+ if (endptr) {
+ *endptr = nptr;
+ }
+ err = -EINVAL;
+ } else {
+ errno = 0;
+ *result = strtol(nptr, &p, base);
+ err = check_strtox_error(nptr, p, endptr, errno);
+ }
+ return err;
+}
+
+/**
+ * Converts ASCII string to an unsigned long integer.
+ *
+ * If string contains a negative number, value will be converted to
+ * the unsigned representation of the signed value, unless the original
+ * (nonnegated) value would overflow, in this case, it will set @result
+ * to ULONG_MAX, and return ERANGE.
+ *
+ * The same behavior holds, for qemu_strtoull() but sets @result to
+ * ULLONG_MAX instead of ULONG_MAX.
+ *
+ * See qemu_strtol() documentation for more info.
+ */
+int qemu_strtoul(const char *nptr, const char **endptr, int base,
+ unsigned long *result)
+{
+ char *p;
+ int err = 0;
+ if (!nptr) {
+ if (endptr) {
+ *endptr = nptr;
+ }
+ err = -EINVAL;
+ } else {
+ errno = 0;
+ *result = strtoul(nptr, &p, base);
+ /* Windows returns 1 for negative out-of-range values. */
+ if (errno == ERANGE) {
+ *result = -1;
+ }
+ err = check_strtox_error(nptr, p, endptr, errno);
+ }
+ return err;
+}
+
+/**
+ * Converts ASCII string to a long long integer.
+ *
+ * See qemu_strtol() documentation for more info.
+ */
+int qemu_strtoll(const char *nptr, const char **endptr, int base,
+ int64_t *result)
+{
+ char *p;
+ int err = 0;
+ if (!nptr) {
+ if (endptr) {
+ *endptr = nptr;
+ }
+ err = -EINVAL;
+ } else {
+ errno = 0;
+ *result = strtoll(nptr, &p, base);
+ err = check_strtox_error(nptr, p, endptr, errno);
+ }
+ return err;
+}
+
+/**
+ * Converts ASCII string to an unsigned long long integer.
+ *
+ * See qemu_strtol() documentation for more info.
+ */
+int qemu_strtoull(const char *nptr, const char **endptr, int base,
+ uint64_t *result)
+{
+ char *p;
+ int err = 0;
+ if (!nptr) {
+ if (endptr) {
+ *endptr = nptr;
+ }
+ err = -EINVAL;
+ } else {
+ errno = 0;
+ *result = strtoull(nptr, &p, base);
+ /* Windows returns 1 for negative out-of-range values. */
+ if (errno == ERANGE) {
+ *result = -1;
+ }
+ err = check_strtox_error(nptr, p, endptr, errno);
+ }
+ return err;
}
/**
@@ -474,29 +620,6 @@ int qemu_parse_fd(const char *param)
return fd;
}
-/* round down to the nearest power of 2*/
-int64_t pow2floor(int64_t value)
-{
- if (!is_power_of_2(value)) {
- value = 0x8000000000000000ULL >> clz64(value);
- }
- return value;
-}
-
-/* round up to the nearest power of 2 (0 if overflow) */
-uint64_t pow2ceil(uint64_t value)
-{
- uint8_t nlz = clz64(value);
-
- if (is_power_of_2(value)) {
- return value;
- }
- if (!nlz) {
- return 0;
- }
- return 1ULL << (64 - nlz);
-}
-
/*
* Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128)
* Input is limited to 14-bit numbers
diff --git a/util/error.c b/util/error.c
index 14f4351879..80c89a2079 100644
--- a/util/error.c
+++ b/util/error.c
@@ -2,9 +2,11 @@
* QEMU Error Objects
*
* Copyright IBM, Corp. 2011
+ * Copyright (C) 2011-2015 Red Hat, Inc.
*
* Authors:
* Anthony Liguori <aliguori@us.ibm.com>
+ * Markus Armbruster <armbru@redhat.com>,
*
* This work is licensed under the terms of the GNU LGPL, version 2. See
* the COPYING.LIB file in the top-level directory.
@@ -18,14 +20,33 @@ struct Error
{
char *msg;
ErrorClass err_class;
+ const char *src, *func;
+ int line;
+ GString *hint;
};
Error *error_abort;
+Error *error_fatal;
-void error_set(Error **errp, ErrorClass err_class, const char *fmt, ...)
+static void error_handle_fatal(Error **errp, Error *err)
+{
+ if (errp == &error_abort) {
+ fprintf(stderr, "Unexpected error in %s() at %s:%d:\n",
+ err->func, err->src, err->line);
+ error_report_err(err);
+ abort();
+ }
+ if (errp == &error_fatal) {
+ error_report_err(err);
+ exit(1);
+ }
+}
+
+static void error_setv(Error **errp,
+ const char *src, int line, const char *func,
+ ErrorClass err_class, const char *fmt, va_list ap)
{
Error *err;
- va_list ap;
int saved_errno = errno;
if (errp == NULL) {
@@ -34,99 +55,120 @@ void error_set(Error **errp, ErrorClass err_class, const char *fmt, ...)
assert(*errp == NULL);
err = g_malloc0(sizeof(*err));
-
- va_start(ap, fmt);
err->msg = g_strdup_vprintf(fmt, ap);
- va_end(ap);
err->err_class = err_class;
+ err->src = src;
+ err->line = line;
+ err->func = func;
- if (errp == &error_abort) {
- error_report_err(err);
- abort();
- }
-
+ error_handle_fatal(errp, err);
*errp = err;
errno = saved_errno;
}
-void error_set_errno(Error **errp, int os_errno, ErrorClass err_class,
- const char *fmt, ...)
+void error_set_internal(Error **errp,
+ const char *src, int line, const char *func,
+ ErrorClass err_class, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ error_setv(errp, src, line, func, err_class, fmt, ap);
+ va_end(ap);
+}
+
+void error_setg_internal(Error **errp,
+ const char *src, int line, const char *func,
+ const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap);
+ va_end(ap);
+}
+
+void error_setg_errno_internal(Error **errp,
+ const char *src, int line, const char *func,
+ int os_errno, const char *fmt, ...)
{
- Error *err;
- char *msg1;
va_list ap;
+ char *msg;
int saved_errno = errno;
if (errp == NULL) {
return;
}
- assert(*errp == NULL);
-
- err = g_malloc0(sizeof(*err));
va_start(ap, fmt);
- msg1 = g_strdup_vprintf(fmt, ap);
- if (os_errno != 0) {
- err->msg = g_strdup_printf("%s: %s", msg1, strerror(os_errno));
- g_free(msg1);
- } else {
- err->msg = msg1;
- }
+ error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap);
va_end(ap);
- err->err_class = err_class;
- if (errp == &error_abort) {
- error_report_err(err);
- abort();
+ if (os_errno != 0) {
+ msg = (*errp)->msg;
+ (*errp)->msg = g_strdup_printf("%s: %s", msg, strerror(os_errno));
+ g_free(msg);
}
- *errp = err;
-
errno = saved_errno;
}
-void error_setg_file_open(Error **errp, int os_errno, const char *filename)
+void error_setg_file_open_internal(Error **errp,
+ const char *src, int line, const char *func,
+ int os_errno, const char *filename)
{
- error_setg_errno(errp, os_errno, "Could not open '%s'", filename);
+ error_setg_errno_internal(errp, src, line, func, os_errno,
+ "Could not open '%s'", filename);
+}
+
+void error_append_hint(Error **errp, const char *fmt, ...)
+{
+ va_list ap;
+ int saved_errno = errno;
+ Error *err;
+
+ if (!errp) {
+ return;
+ }
+ err = *errp;
+ assert(err && errp != &error_abort);
+
+ if (!err->hint) {
+ err->hint = g_string_new(NULL);
+ }
+ va_start(ap, fmt);
+ g_string_append_vprintf(err->hint, fmt, ap);
+ va_end(ap);
+
+ errno = saved_errno;
}
#ifdef _WIN32
-void error_set_win32(Error **errp, int win32_err, ErrorClass err_class,
- const char *fmt, ...)
+void error_setg_win32_internal(Error **errp,
+ const char *src, int line, const char *func,
+ int win32_err, const char *fmt, ...)
{
- Error *err;
- char *msg1;
va_list ap;
+ char *msg1, *msg2;
if (errp == NULL) {
return;
}
- assert(*errp == NULL);
-
- err = g_malloc0(sizeof(*err));
va_start(ap, fmt);
- msg1 = g_strdup_vprintf(fmt, ap);
+ error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap);
+ va_end(ap);
+
if (win32_err != 0) {
- char *msg2 = g_win32_error_message(win32_err);
- err->msg = g_strdup_printf("%s: %s (error: %x)", msg1, msg2,
- (unsigned)win32_err);
+ msg1 = (*errp)->msg;
+ msg2 = g_win32_error_message(win32_err);
+ (*errp)->msg = g_strdup_printf("%s: %s (error: %x)", msg1, msg2,
+ (unsigned)win32_err);
g_free(msg2);
g_free(msg1);
- } else {
- err->msg = msg1;
- }
- va_end(ap);
- err->err_class = err_class;
-
- if (errp == &error_abort) {
- error_report_err(err);
- abort();
}
-
- *errp = err;
}
#endif
@@ -138,6 +180,12 @@ Error *error_copy(const Error *err)
err_new = g_malloc0(sizeof(*err));
err_new->msg = g_strdup(err->msg);
err_new->err_class = err->err_class;
+ err_new->src = err->src;
+ err_new->line = err->line;
+ err_new->func = err->func;
+ if (err->hint) {
+ err_new->hint = g_string_new(err->hint->str);
+ }
return err_new;
}
@@ -155,6 +203,9 @@ const char *error_get_pretty(Error *err)
void error_report_err(Error *err)
{
error_report("%s", error_get_pretty(err));
+ if (err->hint) {
+ error_printf_unless_qmp("%s\n", err->hint->str);
+ }
error_free(err);
}
@@ -162,18 +213,29 @@ void error_free(Error *err)
{
if (err) {
g_free(err->msg);
+ if (err->hint) {
+ g_string_free(err->hint, true);
+ }
g_free(err);
}
}
+void error_free_or_abort(Error **errp)
+{
+ assert(errp && *errp);
+ error_free(*errp);
+ *errp = NULL;
+}
+
void error_propagate(Error **dst_errp, Error *local_err)
{
- if (local_err && dst_errp == &error_abort) {
- error_report_err(local_err);
- abort();
- } else if (dst_errp && !*dst_errp) {
+ if (!local_err) {
+ return;
+ }
+ error_handle_fatal(dst_errp, local_err);
+ if (dst_errp && !*dst_errp) {
*dst_errp = local_err;
- } else if (local_err) {
+ } else {
error_free(local_err);
}
}
diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c
index ed4ca2b01e..d4a0c63e12 100644
--- a/util/event_notifier-posix.c
+++ b/util/event_notifier-posix.c
@@ -77,7 +77,7 @@ void event_notifier_cleanup(EventNotifier *e)
close(e->wfd);
}
-int event_notifier_get_fd(EventNotifier *e)
+int event_notifier_get_fd(const EventNotifier *e)
{
return e->rfd;
}
diff --git a/util/id.c b/util/id.c
index 09b22fb8fa..7883fbec79 100644
--- a/util/id.c
+++ b/util/id.c
@@ -26,3 +26,40 @@ bool id_wellformed(const char *id)
}
return true;
}
+
+#define ID_SPECIAL_CHAR '#'
+
+static const char *const id_subsys_str[ID_MAX] = {
+ [ID_QDEV] = "qdev",
+ [ID_BLOCK] = "block",
+};
+
+/*
+ * Generates an ID of the form PREFIX SUBSYSTEM NUMBER
+ * where:
+ *
+ * - PREFIX is the reserved character '#'
+ * - SUBSYSTEM identifies the subsystem creating the ID
+ * - NUMBER is a decimal number unique within SUBSYSTEM.
+ *
+ * Example: "#block146"
+ *
+ * Note that these IDs do not satisfy id_wellformed().
+ *
+ * The caller is responsible for freeing the returned string with g_free()
+ */
+char *id_generate(IdSubSystems id)
+{
+ static uint64_t id_counters[ID_MAX];
+ uint32_t rnd;
+
+ assert(id < ARRAY_SIZE(id_subsys_str));
+ assert(id_subsys_str[id]);
+
+ rnd = g_random_int_range(0, 100);
+
+ return g_strdup_printf("%c%s%" PRIu64 "%02" PRId32, ID_SPECIAL_CHAR,
+ id_subsys_str[id],
+ id_counters[id]++,
+ rnd);
+}
diff --git a/util/memfd.c b/util/memfd.c
new file mode 100644
index 0000000000..7c406914c5
--- /dev/null
+++ b/util/memfd.c
@@ -0,0 +1,162 @@
+/*
+ * memfd.c
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * QEMU library functions on POSIX which are shared between QEMU and
+ * the QEMU tools.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+
+#include <glib.h>
+#include <glib/gprintf.h>
+
+#include <sys/mman.h>
+
+#include "qemu/memfd.h"
+
+#ifdef CONFIG_MEMFD
+#include <sys/memfd.h>
+#elif defined CONFIG_LINUX
+#include <sys/syscall.h>
+#include <asm/unistd.h>
+
+static int memfd_create(const char *name, unsigned int flags)
+{
+#ifdef __NR_memfd_create
+ return syscall(__NR_memfd_create, name, flags);
+#else
+ return -1;
+#endif
+}
+#endif
+
+#ifndef MFD_CLOEXEC
+#define MFD_CLOEXEC 0x0001U
+#endif
+
+#ifndef MFD_ALLOW_SEALING
+#define MFD_ALLOW_SEALING 0x0002U
+#endif
+
+/*
+ * This is a best-effort helper for shared memory allocation, with
+ * optional sealing. The helper will do his best to allocate using
+ * memfd with sealing, but may fallback on other methods without
+ * sealing.
+ */
+void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
+ int *fd)
+{
+ void *ptr;
+ int mfd = -1;
+
+ *fd = -1;
+
+#ifdef CONFIG_LINUX
+ if (seals) {
+ mfd = memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC);
+ }
+
+ if (mfd == -1) {
+ /* some systems have memfd without sealing */
+ mfd = memfd_create(name, MFD_CLOEXEC);
+ seals = 0;
+ }
+#endif
+
+ if (mfd != -1) {
+ if (ftruncate(mfd, size) == -1) {
+ perror("ftruncate");
+ close(mfd);
+ return NULL;
+ }
+
+ if (seals && fcntl(mfd, F_ADD_SEALS, seals) == -1) {
+ perror("fcntl");
+ close(mfd);
+ return NULL;
+ }
+ } else {
+ const char *tmpdir = g_get_tmp_dir();
+ gchar *fname;
+
+ fname = g_strdup_printf("%s/memfd-XXXXXX", tmpdir);
+ mfd = mkstemp(fname);
+ unlink(fname);
+ g_free(fname);
+
+ if (mfd == -1) {
+ perror("mkstemp");
+ return NULL;
+ }
+
+ if (ftruncate(mfd, size) == -1) {
+ perror("ftruncate");
+ close(mfd);
+ return NULL;
+ }
+ }
+
+ ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0);
+ if (ptr == MAP_FAILED) {
+ perror("mmap");
+ close(mfd);
+ return NULL;
+ }
+
+ *fd = mfd;
+ return ptr;
+}
+
+void qemu_memfd_free(void *ptr, size_t size, int fd)
+{
+ if (ptr) {
+ munmap(ptr, size);
+ }
+
+ if (fd != -1) {
+ close(fd);
+ }
+}
+
+enum {
+ MEMFD_KO,
+ MEMFD_OK,
+ MEMFD_TODO
+};
+
+bool qemu_memfd_check(void)
+{
+ static int memfd_check = MEMFD_TODO;
+
+ if (memfd_check == MEMFD_TODO) {
+ int fd;
+ void *ptr;
+
+ ptr = qemu_memfd_alloc("test", 4096, 0, &fd);
+ memfd_check = ptr ? MEMFD_OK : MEMFD_KO;
+ qemu_memfd_free(ptr, 4096, fd);
+ }
+
+ return memfd_check == MEMFD_OK;
+}
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
new file mode 100644
index 0000000000..54793a5dcf
--- /dev/null
+++ b/util/mmap-alloc.c
@@ -0,0 +1,110 @@
+/*
+ * Support for RAM backed by mmaped host memory.
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * Authors:
+ * Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+#include <qemu/mmap-alloc.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <assert.h>
+
+#define HUGETLBFS_MAGIC 0x958458f6
+
+#ifdef CONFIG_LINUX
+#include <sys/vfs.h>
+#endif
+
+size_t qemu_fd_getpagesize(int fd)
+{
+#ifdef CONFIG_LINUX
+ struct statfs fs;
+ int ret;
+
+ if (fd != -1) {
+ do {
+ ret = fstatfs(fd, &fs);
+ } while (ret != 0 && errno == EINTR);
+
+ if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
+ return fs.f_bsize;
+ }
+ }
+#endif
+
+ return getpagesize();
+}
+
+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
+{
+ /*
+ * Note: this always allocates at least one extra page of virtual address
+ * space, even if size is already aligned.
+ */
+ size_t total = size + align;
+#if defined(__powerpc64__) && defined(__linux__)
+ /* On ppc64 mappings in the same segment (aka slice) must share the same
+ * page size. Since we will be re-allocating part of this segment
+ * from the supplied fd, we should make sure to use the same page size,
+ * unless we are using the system page size, in which case anonymous memory
+ * is OK. Use align as a hint for the page size.
+ * In this case, set MAP_NORESERVE to avoid allocating backing store memory.
+ */
+ int anonfd = fd == -1 || qemu_fd_getpagesize(fd) == getpagesize() ? -1 : fd;
+ int flags = anonfd == -1 ? MAP_ANONYMOUS : MAP_NORESERVE;
+ void *ptr = mmap(0, total, PROT_NONE, flags | MAP_PRIVATE, anonfd, 0);
+#else
+ void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+#endif
+ size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
+ void *ptr1;
+
+ if (ptr == MAP_FAILED) {
+ return MAP_FAILED;
+ }
+
+ /* Make sure align is a power of 2 */
+ assert(!(align & (align - 1)));
+ /* Always align to host page size */
+ assert(align >= getpagesize());
+
+ ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
+ MAP_FIXED |
+ (fd == -1 ? MAP_ANONYMOUS : 0) |
+ (shared ? MAP_SHARED : MAP_PRIVATE),
+ fd, 0);
+ if (ptr1 == MAP_FAILED) {
+ munmap(ptr, total);
+ return MAP_FAILED;
+ }
+
+ ptr += offset;
+ total -= offset;
+
+ if (offset > 0) {
+ munmap(ptr - offset, offset);
+ }
+
+ /*
+ * Leave a single PROT_NONE page allocated after the RAM block, to serve as
+ * a guard page guarding against potential buffer overflows.
+ */
+ if (total > size + getpagesize()) {
+ munmap(ptr + size + getpagesize(), total - size - getpagesize());
+ }
+
+ return ptr;
+}
+
+void qemu_ram_munmap(void *ptr, size_t size)
+{
+ if (ptr) {
+ /* Unmap both the RAM block and the guard page */
+ munmap(ptr, size + getpagesize());
+ }
+}
diff --git a/util/osdep.c b/util/osdep.c
index 0092bb61b9..534b51147c 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -52,7 +52,14 @@ extern int madvise(caddr_t, size_t, int);
static bool fips_enabled = false;
-static const char *qemu_version = QEMU_VERSION;
+/* Starting on QEMU 2.5, qemu_hw_version() returns "2.5+" by default
+ * instead of QEMU_VERSION, so setting hw_version on MachineClass
+ * is no longer mandatory.
+ *
+ * Do NOT change this string, or it will break compatibility on all
+ * machine classes that don't set hw_version.
+ */
+static const char *hw_version = "2.5+";
int socket_set_cork(int fd, int v)
{
@@ -311,14 +318,14 @@ int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
return ret;
}
-void qemu_set_version(const char *version)
+void qemu_set_hw_version(const char *version)
{
- qemu_version = version;
+ hw_version = version;
}
-const char *qemu_get_version(void)
+const char *qemu_hw_version(void)
{
- return qemu_version;
+ return hw_version;
}
void fips_set_state(bool requested)
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 997f24d21b..6f18866cad 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -46,7 +46,6 @@ extern int daemon(int, int);
#else
# define QEMU_VMALLOC_ALIGN getpagesize()
#endif
-#define HUGETLBFS_MAGIC 0x958458f6
#include <termios.h>
#include <unistd.h>
@@ -66,13 +65,14 @@ extern int daemon(int, int);
#ifdef CONFIG_LINUX
#include <sys/syscall.h>
-#include <sys/vfs.h>
#endif
#ifdef __FreeBSD__
#include <sys/sysctl.h>
#endif
+#include <qemu/mmap-alloc.h>
+
#ifdef CONFIG_MARU
#include "../../tizen/src/emulator_common.h"
#endif
@@ -148,10 +148,7 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
}
#endif
size_t align = QEMU_VMALLOC_ALIGN;
- size_t total = size + align - getpagesize();
- void *ptr = mmap(0, total, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
+ void *ptr = qemu_ram_mmap(-1, size, align, false);
if (ptr == MAP_FAILED) {
return NULL;
@@ -160,15 +157,6 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
if (alignment) {
*alignment = align;
}
- ptr += offset;
- total -= offset;
-
- if (offset > 0) {
- munmap(ptr - offset, offset);
- }
- if (total > size) {
- munmap(ptr + size, total - size);
- }
trace_qemu_anon_ram_alloc(size, ptr);
return ptr;
@@ -183,9 +171,7 @@ void qemu_vfree(void *ptr)
void qemu_anon_ram_free(void *ptr, size_t size)
{
trace_qemu_anon_ram_free(ptr, size);
- if (ptr) {
- munmap(ptr, size);
- }
+ qemu_ram_munmap(ptr, size);
}
void qemu_set_block(int fd)
@@ -372,26 +358,6 @@ static void sigbus_handler(int signal)
siglongjmp(sigjump, 1);
}
-static size_t fd_getpagesize(int fd)
-{
-#ifdef CONFIG_LINUX
- struct statfs fs;
- int ret;
-
- if (fd != -1) {
- do {
- ret = fstatfs(fd, &fs);
- } while (ret != 0 && errno == EINTR);
-
- if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
- return fs.f_bsize;
- }
- }
-#endif
-
- return getpagesize();
-}
-
void os_mem_prealloc(int fd, char *area, size_t memory)
{
int ret;
@@ -419,7 +385,7 @@ void os_mem_prealloc(int fd, char *area, size_t memory)
exit(1);
} else {
int i;
- size_t hpagesize = fd_getpagesize(fd);
+ size_t hpagesize = qemu_fd_getpagesize(fd);
size_t numpages = DIV_ROUND_UP(memory, hpagesize);
/* MAP_POPULATE silently ignores failures */
@@ -502,3 +468,74 @@ int qemu_read_password(char *buf, int buf_size)
printf("\n");
return ret;
}
+
+
+pid_t qemu_fork(Error **errp)
+{
+ sigset_t oldmask, newmask;
+ struct sigaction sig_action;
+ int saved_errno;
+ pid_t pid;
+
+ /*
+ * Need to block signals now, so that child process can safely
+ * kill off caller's signal handlers without a race.
+ */
+ sigfillset(&newmask);
+ if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) {
+ error_setg_errno(errp, errno,
+ "cannot block signals");
+ return -1;
+ }
+
+ pid = fork();
+ saved_errno = errno;
+
+ if (pid < 0) {
+ /* attempt to restore signal mask, but ignore failure, to
+ * avoid obscuring the fork failure */
+ (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
+ error_setg_errno(errp, saved_errno,
+ "cannot fork child process");
+ errno = saved_errno;
+ return -1;
+ } else if (pid) {
+ /* parent process */
+
+ /* Restore our original signal mask now that the child is
+ * safely running. Only documented failures are EFAULT (not
+ * possible, since we are using just-grabbed mask) or EINVAL
+ * (not possible, since we are using correct arguments). */
+ (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
+ } else {
+ /* child process */
+ size_t i;
+
+ /* Clear out all signal handlers from parent so nothing
+ * unexpected can happen in our child once we unblock
+ * signals */
+ sig_action.sa_handler = SIG_DFL;
+ sig_action.sa_flags = 0;
+ sigemptyset(&sig_action.sa_mask);
+
+ for (i = 1; i < NSIG; i++) {
+ /* Only possible errors are EFAULT or EINVAL The former
+ * won't happen, the latter we expect, so no need to check
+ * return value */
+ (void)sigaction(i, &sig_action, NULL);
+ }
+
+ /* Unmask all signals in child, since we've no idea what the
+ * caller's done with their signal mask and don't want to
+ * propagate that to children */
+ sigemptyset(&newmask);
+ if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) {
+ Error *local_err = NULL;
+ error_setg_errno(&local_err, errno,
+ "cannot unblock signals");
+ error_report_err(local_err);
+ _exit(1);
+ }
+ }
+ return pid;
+}
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 217f28a201..11f528a4df 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -110,9 +110,7 @@ void qemu_anon_ram_free(void *ptr, size_t size)
}
}
-// CONFIG_MARU MODIFICATION
-// recent MinGW-w64 provides gmtime_r(), localtime_r(). we use them.
-#if 0
+#ifndef CONFIG_LOCALTIME_R
/* FIXME: add proper locking */
struct tm *gmtime_r(const time_t *timep, struct tm *result)
{
@@ -136,7 +134,7 @@ struct tm *localtime_r(const time_t *timep, struct tm *result)
}
return p;
}
-#endif
+#endif /* CONFIG_LOCALTIME_R */
void qemu_set_block(int fd)
{
@@ -471,7 +469,7 @@ gint g_poll(GPollFD *fds, guint nfds, gint timeout)
return retval;
}
-size_t getpagesize(void)
+int getpagesize(void)
{
SYSTEM_INFO system_info;
@@ -513,3 +511,12 @@ int qemu_read_password(char *buf, int buf_size)
buf[i] = '\0';
return 0;
}
+
+
+pid_t qemu_fork(Error **errp)
+{
+ errno = ENOSYS;
+ error_setg_errno(errp, errno,
+ "cannot fork child process");
+ return -1;
+}
diff --git a/util/qemu-coroutine-io.c b/util/qemu-coroutine-io.c
new file mode 100644
index 0000000000..e1eae7331e
--- /dev/null
+++ b/util/qemu-coroutine-io.c
@@ -0,0 +1,91 @@
+/*
+ * Coroutine-aware I/O functions
+ *
+ * Copyright (C) 2009-2010 Nippon Telegraph and Telephone Corporation.
+ * Copyright (c) 2011, Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "qemu/sockets.h"
+#include "qemu/coroutine.h"
+#include "qemu/iov.h"
+#include "qemu/main-loop.h"
+
+ssize_t coroutine_fn
+qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt,
+ size_t offset, size_t bytes, bool do_send)
+{
+ size_t done = 0;
+ ssize_t ret;
+ int err;
+ while (done < bytes) {
+ ret = iov_send_recv(sockfd, iov, iov_cnt,
+ offset + done, bytes - done, do_send);
+ if (ret > 0) {
+ done += ret;
+ } else if (ret < 0) {
+ err = socket_error();
+ if (err == EAGAIN || err == EWOULDBLOCK) {
+ qemu_coroutine_yield();
+ } else if (done == 0) {
+ return -err;
+ } else {
+ break;
+ }
+ } else if (ret == 0 && !do_send) {
+ /* write (send) should never return 0.
+ * read (recv) returns 0 for end-of-file (-data).
+ * In both cases there's little point retrying,
+ * but we do for write anyway, just in case */
+ break;
+ }
+ }
+ return done;
+}
+
+ssize_t coroutine_fn
+qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send)
+{
+ struct iovec iov = { .iov_base = buf, .iov_len = bytes };
+ return qemu_co_sendv_recvv(sockfd, &iov, 1, 0, bytes, do_send);
+}
+
+typedef struct {
+ Coroutine *co;
+ int fd;
+} FDYieldUntilData;
+
+static void fd_coroutine_enter(void *opaque)
+{
+ FDYieldUntilData *data = opaque;
+ qemu_set_fd_handler(data->fd, NULL, NULL, NULL);
+ qemu_coroutine_enter(data->co, NULL);
+}
+
+void coroutine_fn yield_until_fd_readable(int fd)
+{
+ FDYieldUntilData data;
+
+ assert(qemu_in_coroutine());
+ data.co = qemu_coroutine_self();
+ data.fd = fd;
+ qemu_set_fd_handler(fd, fd_coroutine_enter, NULL, &data);
+ qemu_coroutine_yield();
+}
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
new file mode 100644
index 0000000000..130ee19d17
--- /dev/null
+++ b/util/qemu-coroutine-lock.c
@@ -0,0 +1,186 @@
+/*
+ * coroutine queues and locks
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu/coroutine.h"
+#include "qemu/coroutine_int.h"
+#include "qemu/queue.h"
+#include "trace.h"
+
+void qemu_co_queue_init(CoQueue *queue)
+{
+ QTAILQ_INIT(&queue->entries);
+}
+
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
+{
+ Coroutine *self = qemu_coroutine_self();
+ QTAILQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
+ qemu_coroutine_yield();
+ assert(qemu_in_coroutine());
+}
+
+/**
+ * qemu_co_queue_run_restart:
+ *
+ * Enter each coroutine that was previously marked for restart by
+ * qemu_co_queue_next() or qemu_co_queue_restart_all(). This function is
+ * invoked by the core coroutine code when the current coroutine yields or
+ * terminates.
+ */
+void qemu_co_queue_run_restart(Coroutine *co)
+{
+ Coroutine *next;
+
+ trace_qemu_co_queue_run_restart(co);
+ while ((next = QTAILQ_FIRST(&co->co_queue_wakeup))) {
+ QTAILQ_REMOVE(&co->co_queue_wakeup, next, co_queue_next);
+ qemu_coroutine_enter(next, NULL);
+ }
+}
+
+static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
+{
+ Coroutine *self = qemu_coroutine_self();
+ Coroutine *next;
+
+ if (QTAILQ_EMPTY(&queue->entries)) {
+ return false;
+ }
+
+ while ((next = QTAILQ_FIRST(&queue->entries)) != NULL) {
+ QTAILQ_REMOVE(&queue->entries, next, co_queue_next);
+ QTAILQ_INSERT_TAIL(&self->co_queue_wakeup, next, co_queue_next);
+ trace_qemu_co_queue_next(next);
+ if (single) {
+ break;
+ }
+ }
+ return true;
+}
+
+bool coroutine_fn qemu_co_queue_next(CoQueue *queue)
+{
+ assert(qemu_in_coroutine());
+ return qemu_co_queue_do_restart(queue, true);
+}
+
+void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue)
+{
+ assert(qemu_in_coroutine());
+ qemu_co_queue_do_restart(queue, false);
+}
+
+bool qemu_co_enter_next(CoQueue *queue)
+{
+ Coroutine *next;
+
+ next = QTAILQ_FIRST(&queue->entries);
+ if (!next) {
+ return false;
+ }
+
+ QTAILQ_REMOVE(&queue->entries, next, co_queue_next);
+ qemu_coroutine_enter(next, NULL);
+ return true;
+}
+
+bool qemu_co_queue_empty(CoQueue *queue)
+{
+ return QTAILQ_FIRST(&queue->entries) == NULL;
+}
+
+void qemu_co_mutex_init(CoMutex *mutex)
+{
+ memset(mutex, 0, sizeof(*mutex));
+ qemu_co_queue_init(&mutex->queue);
+}
+
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
+{
+ Coroutine *self = qemu_coroutine_self();
+
+ trace_qemu_co_mutex_lock_entry(mutex, self);
+
+ while (mutex->locked) {
+ qemu_co_queue_wait(&mutex->queue);
+ }
+
+ mutex->locked = true;
+
+ trace_qemu_co_mutex_lock_return(mutex, self);
+}
+
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
+{
+ Coroutine *self = qemu_coroutine_self();
+
+ trace_qemu_co_mutex_unlock_entry(mutex, self);
+
+ assert(mutex->locked == true);
+ assert(qemu_in_coroutine());
+
+ mutex->locked = false;
+ qemu_co_queue_next(&mutex->queue);
+
+ trace_qemu_co_mutex_unlock_return(mutex, self);
+}
+
+void qemu_co_rwlock_init(CoRwlock *lock)
+{
+ memset(lock, 0, sizeof(*lock));
+ qemu_co_queue_init(&lock->queue);
+}
+
+void qemu_co_rwlock_rdlock(CoRwlock *lock)
+{
+ while (lock->writer) {
+ qemu_co_queue_wait(&lock->queue);
+ }
+ lock->reader++;
+}
+
+void qemu_co_rwlock_unlock(CoRwlock *lock)
+{
+ assert(qemu_in_coroutine());
+ if (lock->writer) {
+ lock->writer = false;
+ qemu_co_queue_restart_all(&lock->queue);
+ } else {
+ lock->reader--;
+ assert(lock->reader >= 0);
+ /* Wakeup only one waiting writer */
+ if (!lock->reader) {
+ qemu_co_queue_next(&lock->queue);
+ }
+ }
+}
+
+void qemu_co_rwlock_wrlock(CoRwlock *lock)
+{
+ while (lock->writer || lock->reader) {
+ qemu_co_queue_wait(&lock->queue);
+ }
+ lock->writer = true;
+}
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
new file mode 100644
index 0000000000..b35db56356
--- /dev/null
+++ b/util/qemu-coroutine-sleep.c
@@ -0,0 +1,41 @@
+/*
+ * QEMU coroutine sleep
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/coroutine.h"
+#include "qemu/timer.h"
+#include "block/aio.h"
+
+typedef struct CoSleepCB {
+ QEMUTimer *ts;
+ Coroutine *co;
+} CoSleepCB;
+
+static void co_sleep_cb(void *opaque)
+{
+ CoSleepCB *sleep_cb = opaque;
+
+ qemu_coroutine_enter(sleep_cb->co, NULL);
+}
+
+void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
+ int64_t ns)
+{
+ CoSleepCB sleep_cb = {
+ .co = qemu_coroutine_self(),
+ };
+ sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb);
+ timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns);
+ qemu_coroutine_yield();
+ timer_del(sleep_cb.ts);
+ timer_free(sleep_cb.ts);
+}
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
new file mode 100644
index 0000000000..9a04f28865
--- /dev/null
+++ b/util/qemu-coroutine.c
@@ -0,0 +1,150 @@
+/*
+ * QEMU coroutines
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Kevin Wolf <kwolf@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "qemu-common.h"
+#include "qemu/thread.h"
+#include "qemu/atomic.h"
+#include "qemu/coroutine.h"
+#include "qemu/coroutine_int.h"
+
+enum {
+#if defined(_WIN32) && !defined(_WIN64)
+ POOL_BATCH_SIZE = 32,
+#else
+ POOL_BATCH_SIZE = 64,
+#endif
+};
+
+/** Free list to speed up creation */
+static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
+static unsigned int release_pool_size;
+static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
+static __thread unsigned int alloc_pool_size;
+static __thread Notifier coroutine_pool_cleanup_notifier;
+
+static void coroutine_pool_cleanup(Notifier *n, void *value)
+{
+ Coroutine *co;
+ Coroutine *tmp;
+
+ QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) {
+ QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+ qemu_coroutine_delete(co);
+ }
+}
+
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
+{
+ Coroutine *co = NULL;
+
+ if (CONFIG_COROUTINE_POOL) {
+ co = QSLIST_FIRST(&alloc_pool);
+ if (!co) {
+ if (release_pool_size > POOL_BATCH_SIZE) {
+ /* Slow path; a good place to register the destructor, too. */
+ if (!coroutine_pool_cleanup_notifier.notify) {
+ coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
+ qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
+ }
+
+ /* This is not exact; there could be a little skew between
+ * release_pool_size and the actual size of release_pool. But
+ * it is just a heuristic, it does not need to be perfect.
+ */
+ alloc_pool_size = atomic_xchg(&release_pool_size, 0);
+ QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool);
+ co = QSLIST_FIRST(&alloc_pool);
+ }
+ }
+ if (co) {
+ QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+ alloc_pool_size--;
+ }
+ }
+
+ if (!co) {
+ co = qemu_coroutine_new();
+ }
+
+ co->entry = entry;
+ QTAILQ_INIT(&co->co_queue_wakeup);
+ return co;
+}
+
+static void coroutine_delete(Coroutine *co)
+{
+ co->caller = NULL;
+
+ if (CONFIG_COROUTINE_POOL) {
+ if (release_pool_size < POOL_BATCH_SIZE * 2) {
+ QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
+ atomic_inc(&release_pool_size);
+ return;
+ }
+ if (alloc_pool_size < POOL_BATCH_SIZE) {
+ QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
+ alloc_pool_size++;
+ return;
+ }
+ }
+
+ qemu_coroutine_delete(co);
+}
+
+void qemu_coroutine_enter(Coroutine *co, void *opaque)
+{
+ Coroutine *self = qemu_coroutine_self();
+ CoroutineAction ret;
+
+ trace_qemu_coroutine_enter(self, co, opaque);
+
+ if (co->caller) {
+ fprintf(stderr, "Co-routine re-entered recursively\n");
+ abort();
+ }
+
+ co->caller = self;
+ co->entry_arg = opaque;
+ ret = qemu_coroutine_switch(self, co, COROUTINE_ENTER);
+
+ qemu_co_queue_run_restart(co);
+
+ switch (ret) {
+ case COROUTINE_YIELD:
+ return;
+ case COROUTINE_TERMINATE:
+ trace_qemu_coroutine_terminate(co);
+ coroutine_delete(co);
+ return;
+ default:
+ abort();
+ }
+}
+
+void coroutine_fn qemu_coroutine_yield(void)
+{
+ Coroutine *self = qemu_coroutine_self();
+ Coroutine *to = self->caller;
+
+ trace_qemu_coroutine_yield(self, to);
+
+ if (!to) {
+ fprintf(stderr, "Co-routine is yielding to no one\n");
+ abort();
+ }
+
+ self->caller = NULL;
+ qemu_coroutine_switch(self, to, COROUTINE_YIELD);
+}
diff --git a/util/qemu-error.c b/util/qemu-error.c
index 050d5804c8..d9cc6337c0 100644
--- a/util/qemu-error.c
+++ b/util/qemu-error.c
@@ -238,7 +238,7 @@ void error_vreport(const char *fmt, va_list ap)
GTimeVal tv;
gchar *timestr;
- if (enable_timestamp_msg) {
+ if (enable_timestamp_msg && !cur_mon) {
g_get_current_time(&tv);
timestr = g_time_val_to_iso8601(&tv);
error_printf("%s ", timestr);
diff --git a/util/qemu-option.c b/util/qemu-option.c
index efe9d279c4..a50eceae4a 100644
--- a/util/qemu-option.c
+++ b/util/qemu-option.c
@@ -180,6 +180,11 @@ void parse_option_size(const char *name, const char *value,
if (value != NULL) {
sizef = strtod(value, &postfix);
+ if (sizef < 0 || sizef > UINT64_MAX) {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name,
+ "a non-negative number below 2^64");
+ return;
+ }
switch (*postfix) {
case 'T':
sizef *= 1024;
@@ -200,10 +205,8 @@ void parse_option_size(const char *name, const char *value,
break;
default:
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a size");
-#if 0 /* conversion from qerror_report() to error_set() broke this: */
- error_printf_unless_qmp("You may use k, M, G or T suffixes for "
- "kilobytes, megabytes, gigabytes and terabytes.\n");
-#endif
+ error_append_hint(errp, "You may use k, M, G or T suffixes for "
+ "kilobytes, megabytes, gigabytes and terabytes.");
return;
}
} else {
@@ -643,9 +646,8 @@ QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
if (!id_wellformed(id)) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "id",
"an identifier");
-#if 0 /* conversion from qerror_report() to error_set() broke this: */
- error_printf_unless_qmp("Identifiers consist of letters, digits, '-', '.', '_', starting with a letter.\n");
-#endif
+ error_append_hint(errp, "Identifiers consist of letters, digits, "
+ "'-', '.', '_', starting with a letter.");
return NULL;
}
opts = qemu_opts_find(list, id);
@@ -730,14 +732,35 @@ void qemu_opts_del(QemuOpts *opts)
g_free(opts);
}
-void qemu_opts_print(QemuOpts *opts, const char *sep)
+/* print value, escaping any commas in value */
+static void escaped_print(const char *value)
+{
+ const char *ptr;
+
+ for (ptr = value; *ptr; ++ptr) {
+ if (*ptr == ',') {
+ putchar(',');
+ }
+ putchar(*ptr);
+ }
+}
+
+void qemu_opts_print(QemuOpts *opts, const char *separator)
{
QemuOpt *opt;
QemuOptDesc *desc = opts->list->desc;
+ const char *sep = "";
+
+ if (opts->id) {
+ printf("id=%s", opts->id); /* passed id_wellformed -> no commas */
+ sep = separator;
+ }
if (desc[0].name == NULL) {
QTAILQ_FOREACH(opt, &opts->head, next) {
- printf("%s%s=\"%s\"", sep, opt->name, opt->str);
+ printf("%s%s=", sep, opt->name);
+ escaped_print(opt->str);
+ sep = separator;
}
return;
}
@@ -750,13 +773,15 @@ void qemu_opts_print(QemuOpts *opts, const char *sep)
continue;
}
if (desc->type == QEMU_OPT_STRING) {
- printf("%s%s='%s'", sep, desc->name, value);
+ printf("%s%s=", sep, desc->name);
+ escaped_print(value);
} else if ((desc->type == QEMU_OPT_SIZE ||
desc->type == QEMU_OPT_NUMBER) && opt) {
printf("%s%s=%" PRId64, sep, desc->name, opt->value.uint);
} else {
printf("%s%s=%s", sep, desc->name, value);
}
+ sep = separator;
}
}
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index a3d0be6d28..a96a8d7960 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -25,6 +25,9 @@
#include "monitor/monitor.h"
#include "qemu/sockets.h"
#include "qemu/main-loop.h"
+#include "qapi/qmp-input-visitor.h"
+#include "qapi/qmp-output-visitor.h"
+#include "qapi-visit.h"
#ifndef AI_ADDRCONFIG
# define AI_ADDRCONFIG 0
@@ -125,12 +128,15 @@ int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp)
ai.ai_family = PF_UNSPEC;
ai.ai_socktype = SOCK_STREAM;
- if ((qemu_opt_get(opts, "host") == NULL) ||
- (qemu_opt_get(opts, "port") == NULL)) {
- error_setg(errp, "host and/or port not specified");
+ if ((qemu_opt_get(opts, "host") == NULL)) {
+ error_setg(errp, "host not specified");
return -1;
}
- pstrcpy(port, sizeof(port), qemu_opt_get(opts, "port"));
+ if (qemu_opt_get(opts, "port") != NULL) {
+ pstrcpy(port, sizeof(port), qemu_opt_get(opts, "port"));
+ } else {
+ port[0] = '\0';
+ }
addr = qemu_opt_get(opts, "host");
to = qemu_opt_get_number(opts, "to", 0);
@@ -142,6 +148,10 @@ int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp)
/* lookup */
if (port_offset) {
unsigned long long baseport;
+ if (strlen(port) == 0) {
+ error_setg(errp, "port not specified");
+ return -1;
+ }
if (parse_uint_full(port, &baseport, 10) < 0) {
error_setg(errp, "can't convert to a number: %s", port);
return -1;
@@ -153,7 +163,8 @@ int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp)
}
snprintf(port, sizeof(port), "%d", (int)baseport + port_offset);
}
- rc = getaddrinfo(strlen(addr) ? addr : NULL, port, &ai, &res);
+ rc = getaddrinfo(strlen(addr) ? addr : NULL,
+ strlen(port) ? port : NULL, &ai, &res);
if (rc != 0) {
error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
gai_strerror(rc));
@@ -594,12 +605,15 @@ fail:
static void inet_addr_to_opts(QemuOpts *opts, const InetSocketAddress *addr)
{
- bool ipv4 = addr->ipv4 || !addr->has_ipv4;
- bool ipv6 = addr->ipv6 || !addr->has_ipv6;
+ bool ipv4 = addr->has_ipv4 && addr->ipv4;
+ bool ipv6 = addr->has_ipv6 && addr->ipv6;
- if (!ipv4 || !ipv6) {
+ if (ipv4 || ipv6) {
qemu_opt_set_bool(opts, "ipv4", ipv4, &error_abort);
qemu_opt_set_bool(opts, "ipv6", ipv6, &error_abort);
+ } else if (addr->has_ipv4 || addr->has_ipv6) {
+ qemu_opt_set_bool(opts, "ipv4", !addr->has_ipv4, &error_abort);
+ qemu_opt_set_bool(opts, "ipv6", !addr->has_ipv6, &error_abort);
}
if (addr->has_to) {
qemu_opt_set_number(opts, "to", addr->to, &error_abort);
@@ -745,8 +759,7 @@ int unix_listen_opts(QemuOpts *opts, Error **errp)
qemu_opt_set(opts, "path", un.sun_path, &error_abort);
}
- if ((access(un.sun_path, F_OK) == 0) &&
- unlink(un.sun_path) < 0) {
+ if (unlink(un.sun_path) < 0 && errno != ENOENT) {
error_setg_errno(errp, errno,
"Failed to unlink socket %s", un.sun_path);
goto err;
@@ -912,23 +925,23 @@ SocketAddress *socket_parse(const char *str, Error **errp)
error_setg(errp, "invalid Unix socket address");
goto fail;
} else {
- addr->kind = SOCKET_ADDRESS_KIND_UNIX;
- addr->q_unix = g_new(UnixSocketAddress, 1);
- addr->q_unix->path = g_strdup(str + 5);
+ addr->type = SOCKET_ADDRESS_KIND_UNIX;
+ addr->u.q_unix = g_new(UnixSocketAddress, 1);
+ addr->u.q_unix->path = g_strdup(str + 5);
}
} else if (strstart(str, "fd:", NULL)) {
if (str[3] == '\0') {
error_setg(errp, "invalid file descriptor address");
goto fail;
} else {
- addr->kind = SOCKET_ADDRESS_KIND_FD;
- addr->fd = g_new(String, 1);
- addr->fd->str = g_strdup(str + 3);
+ addr->type = SOCKET_ADDRESS_KIND_FD;
+ addr->u.fd = g_new(String, 1);
+ addr->u.fd->str = g_strdup(str + 3);
}
} else {
- addr->kind = SOCKET_ADDRESS_KIND_INET;
- addr->inet = inet_parse(str, errp);
- if (addr->inet == NULL) {
+ addr->type = SOCKET_ADDRESS_KIND_INET;
+ addr->u.inet = inet_parse(str, errp);
+ if (addr->u.inet == NULL) {
goto fail;
}
}
@@ -946,19 +959,19 @@ int socket_connect(SocketAddress *addr, Error **errp,
int fd;
opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
- switch (addr->kind) {
+ switch (addr->type) {
case SOCKET_ADDRESS_KIND_INET:
- inet_addr_to_opts(opts, addr->inet);
+ inet_addr_to_opts(opts, addr->u.inet);
fd = inet_connect_opts(opts, errp, callback, opaque);
break;
case SOCKET_ADDRESS_KIND_UNIX:
- qemu_opt_set(opts, "path", addr->q_unix->path, &error_abort);
+ qemu_opt_set(opts, "path", addr->u.q_unix->path, &error_abort);
fd = unix_connect_opts(opts, errp, callback, opaque);
break;
case SOCKET_ADDRESS_KIND_FD:
- fd = monitor_get_fd(cur_mon, addr->fd->str, errp);
+ fd = monitor_get_fd(cur_mon, addr->u.fd->str, errp);
if (fd >= 0 && callback) {
qemu_set_nonblock(fd);
callback(fd, NULL, opaque);
@@ -978,19 +991,19 @@ int socket_listen(SocketAddress *addr, Error **errp)
int fd;
opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
- switch (addr->kind) {
+ switch (addr->type) {
case SOCKET_ADDRESS_KIND_INET:
- inet_addr_to_opts(opts, addr->inet);
+ inet_addr_to_opts(opts, addr->u.inet);
fd = inet_listen_opts(opts, 0, errp);
break;
case SOCKET_ADDRESS_KIND_UNIX:
- qemu_opt_set(opts, "path", addr->q_unix->path, &error_abort);
+ qemu_opt_set(opts, "path", addr->u.q_unix->path, &error_abort);
fd = unix_listen_opts(opts, errp);
break;
case SOCKET_ADDRESS_KIND_FD:
- fd = monitor_get_fd(cur_mon, addr->fd->str, errp);
+ fd = monitor_get_fd(cur_mon, addr->u.fd->str, errp);
break;
default:
@@ -1006,12 +1019,12 @@ int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp)
int fd;
opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
- switch (remote->kind) {
+ switch (remote->type) {
case SOCKET_ADDRESS_KIND_INET:
- inet_addr_to_opts(opts, remote->inet);
+ inet_addr_to_opts(opts, remote->u.inet);
if (local) {
- qemu_opt_set(opts, "localaddr", local->inet->host, &error_abort);
- qemu_opt_set(opts, "localport", local->inet->port, &error_abort);
+ qemu_opt_set(opts, "localaddr", local->u.inet->host, &error_abort);
+ qemu_opt_set(opts, "localport", local->u.inet->port, &error_abort);
}
fd = inet_dgram_opts(opts, errp);
break;
@@ -1023,3 +1036,140 @@ int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp)
qemu_opts_del(opts);
return fd;
}
+
+
+static SocketAddress *
+socket_sockaddr_to_address_inet(struct sockaddr_storage *sa,
+ socklen_t salen,
+ Error **errp)
+{
+ char host[NI_MAXHOST];
+ char serv[NI_MAXSERV];
+ SocketAddress *addr;
+ int ret;
+
+ ret = getnameinfo((struct sockaddr *)sa, salen,
+ host, sizeof(host),
+ serv, sizeof(serv),
+ NI_NUMERICHOST | NI_NUMERICSERV);
+ if (ret != 0) {
+ error_setg(errp, "Cannot format numeric socket address: %s",
+ gai_strerror(ret));
+ return NULL;
+ }
+
+ addr = g_new0(SocketAddress, 1);
+ addr->type = SOCKET_ADDRESS_KIND_INET;
+ addr->u.inet = g_new0(InetSocketAddress, 1);
+ addr->u.inet->host = g_strdup(host);
+ addr->u.inet->port = g_strdup(serv);
+ if (sa->ss_family == AF_INET) {
+ addr->u.inet->has_ipv4 = addr->u.inet->ipv4 = true;
+ } else {
+ addr->u.inet->has_ipv6 = addr->u.inet->ipv6 = true;
+ }
+
+ return addr;
+}
+
+
+#ifndef WIN32
+static SocketAddress *
+socket_sockaddr_to_address_unix(struct sockaddr_storage *sa,
+ socklen_t salen,
+ Error **errp)
+{
+ SocketAddress *addr;
+ struct sockaddr_un *su = (struct sockaddr_un *)sa;
+
+ addr = g_new0(SocketAddress, 1);
+ addr->type = SOCKET_ADDRESS_KIND_UNIX;
+ addr->u.q_unix = g_new0(UnixSocketAddress, 1);
+ if (su->sun_path[0]) {
+ addr->u.q_unix->path = g_strndup(su->sun_path,
+ sizeof(su->sun_path));
+ }
+
+ return addr;
+}
+#endif /* WIN32 */
+
+static SocketAddress *
+socket_sockaddr_to_address(struct sockaddr_storage *sa,
+ socklen_t salen,
+ Error **errp)
+{
+ switch (sa->ss_family) {
+ case AF_INET:
+ case AF_INET6:
+ return socket_sockaddr_to_address_inet(sa, salen, errp);
+
+#ifndef WIN32
+ case AF_UNIX:
+ return socket_sockaddr_to_address_unix(sa, salen, errp);
+#endif /* WIN32 */
+
+ default:
+ error_setg(errp, "socket family %d unsupported",
+ sa->ss_family);
+ return NULL;
+ }
+ return 0;
+}
+
+
+SocketAddress *socket_local_address(int fd, Error **errp)
+{
+ struct sockaddr_storage ss;
+ socklen_t sslen = sizeof(ss);
+
+ if (getsockname(fd, (struct sockaddr *)&ss, &sslen) < 0) {
+ error_setg_errno(errp, socket_error(), "%s",
+ "Unable to query local socket address");
+ return NULL;
+ }
+
+ return socket_sockaddr_to_address(&ss, sslen, errp);
+}
+
+
+SocketAddress *socket_remote_address(int fd, Error **errp)
+{
+ struct sockaddr_storage ss;
+ socklen_t sslen = sizeof(ss);
+
+ if (getpeername(fd, (struct sockaddr *)&ss, &sslen) < 0) {
+ error_setg_errno(errp, socket_error(), "%s",
+ "Unable to query remote socket address");
+ return NULL;
+ }
+
+ return socket_sockaddr_to_address(&ss, sslen, errp);
+}
+
+
+void qapi_copy_SocketAddress(SocketAddress **p_dest,
+ SocketAddress *src)
+{
+ QmpOutputVisitor *qov;
+ QmpInputVisitor *qiv;
+ Visitor *ov, *iv;
+ QObject *obj;
+
+ *p_dest = NULL;
+
+ qov = qmp_output_visitor_new();
+ ov = qmp_output_get_visitor(qov);
+ visit_type_SocketAddress(ov, &src, NULL, &error_abort);
+ obj = qmp_output_get_qobject(qov);
+ qmp_output_visitor_cleanup(qov);
+ if (!obj) {
+ return;
+ }
+
+ qiv = qmp_input_visitor_new(obj);
+ iv = qmp_input_get_visitor(qiv);
+ visit_type_SocketAddress(iv, p_dest, NULL, &error_abort);
+ qmp_input_visitor_cleanup(qiv);
+ qobject_decref(obj);
+}
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index 23f93b73b1..dde2a213cd 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -298,7 +298,16 @@ static inline void futex_wake(QemuEvent *ev, int n)
static inline void futex_wait(QemuEvent *ev, unsigned val)
{
- futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0);
+ while (futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0)) {
+ switch (errno) {
+ case EWOULDBLOCK:
+ return;
+ case EINTR:
+ break; /* get out of switch and retry */
+ default:
+ abort();
+ }
+ }
}
#else
static inline void futex_wake(QemuEvent *ev, int n)
@@ -389,7 +398,7 @@ void qemu_event_wait(QemuEvent *ev)
/*
* Leave the event reset and tell qemu_event_set that there
* are waiters. No need to retry, because there cannot be
- * a concurent busy->free transition. After the CAS, the
+ * a concurrent busy->free transition. After the CAS, the
* event will be either set or busy.
*/
if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index 406b52f91d..6cdd553e9a 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -238,10 +238,34 @@ void qemu_sem_wait(QemuSemaphore *sem)
}
}
+/* Wrap a Win32 manual-reset event with a fast userspace path. The idea
+ * is to reset the Win32 event lazily, as part of a test-reset-test-wait
+ * sequence. Such a sequence is, indeed, how QemuEvents are used by
+ * RCU and other subsystems!
+ *
+ * Valid transitions:
+ * - free->set, when setting the event
+ * - busy->set, when setting the event, followed by futex_wake
+ * - set->free, when resetting the event
+ * - free->busy, when waiting
+ *
+ * set->busy does not happen (it can be observed from the outside but
+ * it really is set->free->busy).
+ *
+ * busy->free provably cannot happen; to enforce it, the set->free transition
+ * is done with an OR, which becomes a no-op if the event has concurrently
+ * transitioned to free or busy (and is faster than cmpxchg).
+ */
+
+#define EV_SET 0
+#define EV_FREE 1
+#define EV_BUSY -1
+
void qemu_event_init(QemuEvent *ev, bool init)
{
/* Manual reset. */
- ev->event = CreateEvent(NULL, TRUE, init, NULL);
+ ev->event = CreateEvent(NULL, TRUE, TRUE, NULL);
+ ev->value = (init ? EV_SET : EV_FREE);
}
void qemu_event_destroy(QemuEvent *ev)
@@ -251,17 +275,51 @@ void qemu_event_destroy(QemuEvent *ev)
void qemu_event_set(QemuEvent *ev)
{
- SetEvent(ev->event);
+ if (atomic_mb_read(&ev->value) != EV_SET) {
+ if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
+ /* There were waiters, wake them up. */
+ SetEvent(ev->event);
+ }
+ }
}
void qemu_event_reset(QemuEvent *ev)
{
- ResetEvent(ev->event);
+ if (atomic_mb_read(&ev->value) == EV_SET) {
+ /* If there was a concurrent reset (or even reset+wait),
+ * do nothing. Otherwise change EV_SET->EV_FREE.
+ */
+ atomic_or(&ev->value, EV_FREE);
+ }
}
void qemu_event_wait(QemuEvent *ev)
{
- WaitForSingleObject(ev->event, INFINITE);
+ unsigned value;
+
+ value = atomic_mb_read(&ev->value);
+ if (value != EV_SET) {
+ if (value == EV_FREE) {
+ /* qemu_event_set is not yet going to call SetEvent, but we are
+ * going to do another check for EV_SET below when setting EV_BUSY.
+ * At that point it is safe to call WaitForSingleObject.
+ */
+ ResetEvent(ev->event);
+
+ /* Tell qemu_event_set that there are waiters. No need to retry
+ * because there cannot be a concurent busy->free transition.
+ * After the CAS, the event will be either set or busy.
+ */
+ if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
+ value = EV_SET;
+ } else {
+ value = EV_BUSY;
+ }
+ }
+ if (value == EV_BUSY) {
+ WaitForSingleObject(ev->event, INFINITE);
+ }
+ }
}
struct QemuThreadData {
diff --git a/util/rcu.c b/util/rcu.c
index cdcad678b4..8ba304dc44 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -47,7 +47,8 @@
unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
QemuEvent rcu_gp_event;
-static QemuMutex rcu_gp_lock;
+static QemuMutex rcu_registry_lock;
+static QemuMutex rcu_sync_lock;
/*
* Check whether a quiescent state was crossed between the beginning of
@@ -66,7 +67,7 @@ static inline int rcu_gp_ongoing(unsigned long *ctr)
*/
__thread struct rcu_reader_data rcu_reader;
-/* Protected by rcu_gp_lock. */
+/* Protected by rcu_registry_lock. */
typedef QLIST_HEAD(, rcu_reader_data) ThreadList;
static ThreadList registry = QLIST_HEAD_INITIALIZER(registry);
@@ -114,10 +115,26 @@ static void wait_for_readers(void)
break;
}
- /* Wait for one thread to report a quiescent state and
- * try again.
+ /* Wait for one thread to report a quiescent state and try again.
+ * Release rcu_registry_lock, so rcu_(un)register_thread() doesn't
+ * wait too much time.
+ *
+ * rcu_register_thread() may add nodes to &registry; it will not
+ * wake up synchronize_rcu, but that is okay because at least another
+ * thread must exit its RCU read-side critical section before
+ * synchronize_rcu is done. The next iteration of the loop will
+ * move the new thread's rcu_reader from &registry to &qsreaders,
+ * because rcu_gp_ongoing() will return false.
+ *
+ * rcu_unregister_thread() may remove nodes from &qsreaders instead
+ * of &registry if it runs during qemu_event_wait. That's okay;
+ * the node then will not be added back to &registry by QLIST_SWAP
+ * below. The invariant is that the node is part of one list when
+ * rcu_registry_lock is released.
*/
+ qemu_mutex_unlock(&rcu_registry_lock);
qemu_event_wait(&rcu_gp_event);
+ qemu_mutex_lock(&rcu_registry_lock);
}
/* put back the reader list in the registry */
@@ -126,7 +143,8 @@ static void wait_for_readers(void)
void synchronize_rcu(void)
{
- qemu_mutex_lock(&rcu_gp_lock);
+ qemu_mutex_lock(&rcu_sync_lock);
+ qemu_mutex_lock(&rcu_registry_lock);
if (!QLIST_EMPTY(&registry)) {
/* In either case, the atomic_mb_set below blocks stores that free
@@ -149,7 +167,8 @@ void synchronize_rcu(void)
wait_for_readers();
}
- qemu_mutex_unlock(&rcu_gp_lock);
+ qemu_mutex_unlock(&rcu_registry_lock);
+ qemu_mutex_unlock(&rcu_sync_lock);
}
@@ -273,23 +292,24 @@ void call_rcu1(struct rcu_head *node, void (*func)(struct rcu_head *node))
void rcu_register_thread(void)
{
assert(rcu_reader.ctr == 0);
- qemu_mutex_lock(&rcu_gp_lock);
+ qemu_mutex_lock(&rcu_registry_lock);
QLIST_INSERT_HEAD(&registry, &rcu_reader, node);
- qemu_mutex_unlock(&rcu_gp_lock);
+ qemu_mutex_unlock(&rcu_registry_lock);
}
void rcu_unregister_thread(void)
{
- qemu_mutex_lock(&rcu_gp_lock);
+ qemu_mutex_lock(&rcu_registry_lock);
QLIST_REMOVE(&rcu_reader, node);
- qemu_mutex_unlock(&rcu_gp_lock);
+ qemu_mutex_unlock(&rcu_registry_lock);
}
static void rcu_init_complete(void)
{
QemuThread thread;
- qemu_mutex_init(&rcu_gp_lock);
+ qemu_mutex_init(&rcu_registry_lock);
+ qemu_mutex_init(&rcu_sync_lock);
qemu_event_init(&rcu_gp_event, true);
qemu_event_init(&rcu_call_ready_event, false);
@@ -306,12 +326,14 @@ static void rcu_init_complete(void)
#ifdef CONFIG_POSIX
static void rcu_init_lock(void)
{
- qemu_mutex_lock(&rcu_gp_lock);
+ qemu_mutex_lock(&rcu_sync_lock);
+ qemu_mutex_lock(&rcu_registry_lock);
}
static void rcu_init_unlock(void)
{
- qemu_mutex_unlock(&rcu_gp_lock);
+ qemu_mutex_unlock(&rcu_registry_lock);
+ qemu_mutex_unlock(&rcu_sync_lock);
}
#endif
diff --git a/util/throttle.c b/util/throttle.c
index 706c13111e..1113671ecf 100644
--- a/util/throttle.c
+++ b/util/throttle.c
@@ -300,6 +300,21 @@ bool throttle_is_valid(ThrottleConfig *cfg)
return !invalid;
}
+/* check if bps_max/iops_max is used without bps/iops
+ * @cfg: the throttling configuration to inspect
+ */
+bool throttle_max_is_missing_limit(ThrottleConfig *cfg)
+{
+ int i;
+
+ for (i = 0; i < BUCKETS_COUNT; i++) {
+ if (cfg->buckets[i].max && !cfg->buckets[i].avg) {
+ return true;
+ }
+ }
+ return false;
+}
+
/* fix bucket parameters */
static void throttle_fix_bucket(LeakyBucket *bkt)
{
diff --git a/util/timed-average.c b/util/timed-average.c
new file mode 100644
index 0000000000..a2dfb4834d
--- /dev/null
+++ b/util/timed-average.c
@@ -0,0 +1,231 @@
+/*
+ * QEMU timed average computation
+ *
+ * Copyright (C) Nodalink, EURL. 2014
+ * Copyright (C) Igalia, S.L. 2015
+ *
+ * Authors:
+ * BenoƮt Canet <benoit.canet@nodalink.com>
+ * Alberto Garcia <berto@igalia.com>
+ *
+ * This program is free sofware: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Sofware Foundation, either version 2 of the License, or
+ * (at your option) version 3 or any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+
+#include "qemu/timed-average.h"
+
+/* This module computes an average of a set of values within a time
+ * window.
+ *
+ * Algorithm:
+ *
+ * - Create two windows with a certain expiration period, and
+ * offsetted by period / 2.
+ * - Each time you want to account a new value, do it in both windows.
+ * - The minimum / maximum / average values are always returned from
+ * the oldest window.
+ *
+ * Example:
+ *
+ * t=0 |t=0.5 |t=1 |t=1.5 |t=2
+ * wnd0: [0,0.5)|wnd0: [0.5,1.5) | |wnd0: [1.5,2.5) |
+ * wnd1: [0,1) | |wnd1: [1,2) | |
+ *
+ * Values are returned from:
+ *
+ * wnd0---------|wnd1------------|wnd0---------|wnd1-------------|
+ */
+
+/* Update the expiration of a time window
+ *
+ * @w: the window used
+ * @now: the current time in nanoseconds
+ * @period: the expiration period in nanoseconds
+ */
+static void update_expiration(TimedAverageWindow *w, int64_t now,
+ int64_t period)
+{
+ /* time elapsed since the last theoretical expiration */
+ int64_t elapsed = (now - w->expiration) % period;
+ /* time remaininging until the next expiration */
+ int64_t remaining = period - elapsed;
+ /* compute expiration */
+ w->expiration = now + remaining;
+}
+
+/* Reset a window
+ *
+ * @w: the window to reset
+ */
+static void window_reset(TimedAverageWindow *w)
+{
+ w->min = UINT64_MAX;
+ w->max = 0;
+ w->sum = 0;
+ w->count = 0;
+}
+
+/* Get the current window (that is, the one with the earliest
+ * expiration time).
+ *
+ * @ta: the TimedAverage structure
+ * @ret: a pointer to the current window
+ */
+static TimedAverageWindow *current_window(TimedAverage *ta)
+{
+ return &ta->windows[ta->current];
+}
+
+/* Initialize a TimedAverage structure
+ *
+ * @ta: the TimedAverage structure
+ * @clock_type: the type of clock to use
+ * @period: the time window period in nanoseconds
+ */
+void timed_average_init(TimedAverage *ta, QEMUClockType clock_type,
+ uint64_t period)
+{
+ int64_t now = qemu_clock_get_ns(clock_type);
+
+ /* Returned values are from the oldest window, so they belong to
+ * the interval [ta->period/2,ta->period). By adjusting the
+ * requested period by 4/3, we guarantee that they're in the
+ * interval [2/3 period,4/3 period), closer to the requested
+ * period on average */
+ ta->period = (uint64_t) period * 4 / 3;
+ ta->clock_type = clock_type;
+ ta->current = 0;
+
+ window_reset(&ta->windows[0]);
+ window_reset(&ta->windows[1]);
+
+ /* Both windows are offsetted by half a period */
+ ta->windows[0].expiration = now + ta->period / 2;
+ ta->windows[1].expiration = now + ta->period;
+}
+
+/* Check if the time windows have expired, updating their counters and
+ * expiration time if that's the case.
+ *
+ * @ta: the TimedAverage structure
+ * @elapsed: if non-NULL, the elapsed time (in ns) within the current
+ * window will be stored here
+ */
+static void check_expirations(TimedAverage *ta, uint64_t *elapsed)
+{
+ int64_t now = qemu_clock_get_ns(ta->clock_type);
+ int i;
+
+ assert(ta->period != 0);
+
+ /* Check if the windows have expired */
+ for (i = 0; i < 2; i++) {
+ TimedAverageWindow *w = &ta->windows[i];
+ if (w->expiration <= now) {
+ window_reset(w);
+ update_expiration(w, now, ta->period);
+ }
+ }
+
+ /* Make ta->current point to the oldest window */
+ if (ta->windows[0].expiration < ta->windows[1].expiration) {
+ ta->current = 0;
+ } else {
+ ta->current = 1;
+ }
+
+ /* Calculate the elapsed time within the current window */
+ if (elapsed) {
+ int64_t remaining = ta->windows[ta->current].expiration - now;
+ *elapsed = ta->period - remaining;
+ }
+}
+
+/* Account a value
+ *
+ * @ta: the TimedAverage structure
+ * @value: the value to account
+ */
+void timed_average_account(TimedAverage *ta, uint64_t value)
+{
+ int i;
+ check_expirations(ta, NULL);
+
+ /* Do the accounting in both windows at the same time */
+ for (i = 0; i < 2; i++) {
+ TimedAverageWindow *w = &ta->windows[i];
+
+ w->sum += value;
+ w->count++;
+
+ if (value < w->min) {
+ w->min = value;
+ }
+
+ if (value > w->max) {
+ w->max = value;
+ }
+ }
+}
+
+/* Get the minimum value
+ *
+ * @ta: the TimedAverage structure
+ * @ret: the minimum value
+ */
+uint64_t timed_average_min(TimedAverage *ta)
+{
+ TimedAverageWindow *w;
+ check_expirations(ta, NULL);
+ w = current_window(ta);
+ return w->min < UINT64_MAX ? w->min : 0;
+}
+
+/* Get the average value
+ *
+ * @ta: the TimedAverage structure
+ * @ret: the average value
+ */
+uint64_t timed_average_avg(TimedAverage *ta)
+{
+ TimedAverageWindow *w;
+ check_expirations(ta, NULL);
+ w = current_window(ta);
+ return w->count > 0 ? w->sum / w->count : 0;
+}
+
+/* Get the maximum value
+ *
+ * @ta: the TimedAverage structure
+ * @ret: the maximum value
+ */
+uint64_t timed_average_max(TimedAverage *ta)
+{
+ check_expirations(ta, NULL);
+ return current_window(ta)->max;
+}
+
+/* Get the sum of all accounted values
+ * @ta: the TimedAverage structure
+ * @elapsed: if non-NULL, the elapsed time (in ns) will be stored here
+ * @ret: the sum of all accounted values
+ */
+uint64_t timed_average_sum(TimedAverage *ta, uint64_t *elapsed)
+{
+ TimedAverageWindow *w;
+ check_expirations(ta, elapsed);
+ w = current_window(ta);
+ return w->sum;
+}