From 437fd90c0250dee670290f9b714253671a990160 Mon Sep 17 00:00:00 2001 From: José Pekkarinen Date: Wed, 18 May 2016 13:18:31 +0300 Subject: These changes are the raw update to qemu-2.6. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Collission happened in the following patches: migration: do cleanup operation after completion(738df5b9) Bug fix.(1750c932f86) kvmclock: add a new function to update env->tsc.(b52baab2) The code provided by the patches was already in the upstreamed version. Change-Id: I3cc11841a6a76ae20887b2e245710199e1ea7f9a Signed-off-by: José Pekkarinen --- qemu/util/Makefile.objs | 20 +- qemu/util/acl.c | 1 + qemu/util/base64.c | 60 ++++ qemu/util/bitmap.c | 3 +- qemu/util/bitops.c | 1 + qemu/util/buffer.c | 172 ++++++++++++ qemu/util/compatfd.c | 1 + qemu/util/coroutine-gthread.c | 199 ++++++++++++++ qemu/util/coroutine-sigaltstack.c | 290 ++++++++++++++++++++ qemu/util/coroutine-ucontext.c | 192 +++++++++++++ qemu/util/coroutine-win32.c | 102 +++++++ qemu/util/crc32c.c | 1 + qemu/util/cutils.c | 363 ++++++++++++++++++++---- qemu/util/envlist.c | 1 + qemu/util/error.c | 216 ++++++++++----- qemu/util/event_notifier-posix.c | 14 +- qemu/util/event_notifier-win32.c | 2 + qemu/util/fifo8.c | 1 + qemu/util/getauxval.c | 2 +- qemu/util/hbitmap.c | 4 +- qemu/util/hexdump.c | 34 ++- qemu/util/host-utils.c | 3 +- qemu/util/id.c | 39 +++ qemu/util/iov.c | 11 +- qemu/util/log.c | 313 +++++++++++++++++++++ qemu/util/memfd.c | 162 +++++++++++ qemu/util/mmap-alloc.c | 110 ++++++++ qemu/util/module.c | 2 +- qemu/util/notify.c | 1 + qemu/util/osdep.c | 29 +- qemu/util/oslib-posix.c | 133 +++++---- qemu/util/oslib-win32.c | 298 +++++++++++++++++++- qemu/util/path.c | 9 +- qemu/util/qemu-config.c | 10 +- qemu/util/qemu-coroutine-io.c | 90 ++++++ qemu/util/qemu-coroutine-lock.c | 187 +++++++++++++ qemu/util/qemu-coroutine-sleep.c | 42 +++ qemu/util/qemu-coroutine.c | 147 ++++++++++ qemu/util/qemu-error.c | 12 +- qemu/util/qemu-openpty.c | 2 +- qemu/util/qemu-option.c | 57 +++- qemu/util/qemu-progress.c | 6 +- qemu/util/qemu-sockets.c | 560 +++++++++++++++++++++++--------------- qemu/util/qemu-thread-posix.c | 24 +- qemu/util/qemu-thread-win32.c | 69 ++++- qemu/util/qemu-timer-common.c | 1 + qemu/util/rcu.c | 54 ++-- qemu/util/readline.c | 2 + qemu/util/rfifolock.c | 2 +- qemu/util/throttle.c | 137 +++++++--- qemu/util/timed-average.c | 231 ++++++++++++++++ qemu/util/unicode.c | 3 +- qemu/util/uri.c | 3 +- 53 files changed, 3899 insertions(+), 529 deletions(-) create mode 100644 qemu/util/base64.c create mode 100644 qemu/util/buffer.c create mode 100644 qemu/util/coroutine-gthread.c create mode 100644 qemu/util/coroutine-sigaltstack.c create mode 100644 qemu/util/coroutine-ucontext.c create mode 100644 qemu/util/coroutine-win32.c create mode 100644 qemu/util/log.c create mode 100644 qemu/util/memfd.c create mode 100644 qemu/util/mmap-alloc.c create mode 100644 qemu/util/qemu-coroutine-io.c create mode 100644 qemu/util/qemu-coroutine-lock.c create mode 100644 qemu/util/qemu-coroutine-sleep.c create mode 100644 qemu/util/qemu-coroutine.c create mode 100644 qemu/util/timed-average.c (limited to 'qemu/util') diff --git a/qemu/util/Makefile.objs b/qemu/util/Makefile.objs index 114d6578c..a8a777ec4 100644 --- a/qemu/util/Makefile.objs +++ b/qemu/util/Makefile.objs @@ -1,13 +1,20 @@ util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o -util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o -util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o qemu-openpty.o +util-obj-$(CONFIG_POSIX) += compatfd.o +util-obj-$(CONFIG_POSIX) += event_notifier-posix.o +util-obj-$(CONFIG_POSIX) += mmap-alloc.o +util-obj-$(CONFIG_POSIX) += oslib-posix.o +util-obj-$(CONFIG_POSIX) += qemu-openpty.o +util-obj-$(CONFIG_POSIX) += qemu-thread-posix.o +util-obj-$(CONFIG_WIN32) += event_notifier-win32.o +util-obj-$(CONFIG_POSIX) += memfd.o +util-obj-$(CONFIG_WIN32) += oslib-win32.o +util-obj-$(CONFIG_WIN32) += qemu-thread-win32.o util-obj-y += envlist.o path.o module.o util-obj-$(call lnot,$(CONFIG_INT128)) += host-utils.o util-obj-y += bitmap.o bitops.o hbitmap.o util-obj-y += fifo8.o util-obj-y += acl.o util-obj-y += error.o qemu-error.o -util-obj-$(CONFIG_POSIX) += compatfd.o util-obj-y += id.o util-obj-y += iov.o qemu-config.o qemu-sockets.o uri.o notify.o util-obj-y += qemu-option.o qemu-progress.o @@ -18,3 +25,10 @@ util-obj-y += getauxval.o util-obj-y += readline.o util-obj-y += rfifolock.o util-obj-y += rcu.o +util-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o +util-obj-y += qemu-coroutine-sleep.o +util-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o +util-obj-y += buffer.o +util-obj-y += timed-average.o +util-obj-y += base64.o +util-obj-y += log.o diff --git a/qemu/util/acl.c b/qemu/util/acl.c index 571d68615..723b6a89b 100644 --- a/qemu/util/acl.c +++ b/qemu/util/acl.c @@ -23,6 +23,7 @@ */ +#include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/acl.h" diff --git a/qemu/util/base64.c b/qemu/util/base64.c new file mode 100644 index 000000000..9d3c46cbc --- /dev/null +++ b/qemu/util/base64.c @@ -0,0 +1,60 @@ +/* + * QEMU base64 helpers + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/base64.h" + +static const char *base64_valid_chars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n"; + +uint8_t *qbase64_decode(const char *input, + size_t in_len, + size_t *out_len, + Error **errp) +{ + *out_len = 0; + + if (in_len != -1) { + /* Lack of NUL terminator is an error */ + if (input[in_len] != '\0') { + error_setg(errp, "Base64 data is not NUL terminated"); + return NULL; + } + /* Check there's no NULs embedded since we expect + * this to be valid base64 data */ + if (memchr(input, '\0', in_len) != NULL) { + error_setg(errp, "Base64 data contains embedded NUL characters"); + return NULL; + } + + /* Now we know its a valid nul terminated string + * strspn is safe to use... */ + } else { + in_len = strlen(input); + } + + if (strspn(input, base64_valid_chars) != in_len) { + error_setg(errp, "Base64 data contains invalid characters"); + return NULL; + } + + return g_base64_decode(input, out_len); +} diff --git a/qemu/util/bitmap.c b/qemu/util/bitmap.c index 300a68e38..40aadfb4f 100644 --- a/qemu/util/bitmap.c +++ b/qemu/util/bitmap.c @@ -9,12 +9,13 @@ * Version 2. */ +#include "qemu/osdep.h" #include "qemu/bitops.h" #include "qemu/bitmap.h" #include "qemu/atomic.h" /* - * bitmaps provide an array of bits, implemented using an an + * bitmaps provide an array of bits, implemented using an * array of unsigned longs. The number of valid bits in a * given bitmap does _not_ need to be an exact multiple of * BITS_PER_LONG. diff --git a/qemu/util/bitops.c b/qemu/util/bitops.c index 227c38b88..b0c35dd5f 100644 --- a/qemu/util/bitops.c +++ b/qemu/util/bitops.c @@ -11,6 +11,7 @@ * 2 of the License, or (at your option) any later version. */ +#include "qemu/osdep.h" #include "qemu/bitops.h" #define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) diff --git a/qemu/util/buffer.c b/qemu/util/buffer.c new file mode 100644 index 000000000..a6118bf5b --- /dev/null +++ b/qemu/util/buffer.c @@ -0,0 +1,172 @@ +/* + * QEMU generic buffers + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "qemu/buffer.h" +#include "trace.h" + +#define BUFFER_MIN_INIT_SIZE 4096 +#define BUFFER_MIN_SHRINK_SIZE 65536 + +/* define the factor alpha for the expentional smoothing + * that is used in the average size calculation. a shift + * of 7 results in an alpha of 1/2^7. */ +#define BUFFER_AVG_SIZE_SHIFT 7 + +static size_t buffer_req_size(Buffer *buffer, size_t len) +{ + return MAX(BUFFER_MIN_INIT_SIZE, + pow2ceil(buffer->offset + len)); +} + +static void buffer_adj_size(Buffer *buffer, size_t len) +{ + size_t old = buffer->capacity; + buffer->capacity = buffer_req_size(buffer, len); + buffer->buffer = g_realloc(buffer->buffer, buffer->capacity); + trace_buffer_resize(buffer->name ?: "unnamed", + old, buffer->capacity); + + /* make it even harder for the buffer to shrink, reset average size + * to currenty capacity if it is larger than the average. */ + buffer->avg_size = MAX(buffer->avg_size, + buffer->capacity << BUFFER_AVG_SIZE_SHIFT); +} + +void buffer_init(Buffer *buffer, const char *name, ...) +{ + va_list ap; + + va_start(ap, name); + buffer->name = g_strdup_vprintf(name, ap); + va_end(ap); +} + +static uint64_t buffer_get_avg_size(Buffer *buffer) +{ + return buffer->avg_size >> BUFFER_AVG_SIZE_SHIFT; +} + +void buffer_shrink(Buffer *buffer) +{ + size_t new; + + /* Calculate the average size of the buffer as + * avg_size = avg_size * ( 1 - a ) + required_size * a + * where a is 1 / 2 ^ BUFFER_AVG_SIZE_SHIFT. */ + buffer->avg_size *= (1 << BUFFER_AVG_SIZE_SHIFT) - 1; + buffer->avg_size >>= BUFFER_AVG_SIZE_SHIFT; + buffer->avg_size += buffer_req_size(buffer, 0); + + /* And then only shrink if the average size of the buffer is much + * too big, to avoid bumping up & down the buffers all the time. + * realloc() isn't exactly cheap ... */ + new = buffer_req_size(buffer, buffer_get_avg_size(buffer)); + if (new < buffer->capacity >> 3 && + new >= BUFFER_MIN_SHRINK_SIZE) { + buffer_adj_size(buffer, buffer_get_avg_size(buffer)); + } + + buffer_adj_size(buffer, 0); +} + +void buffer_reserve(Buffer *buffer, size_t len) +{ + if ((buffer->capacity - buffer->offset) < len) { + buffer_adj_size(buffer, len); + } +} + +gboolean buffer_empty(Buffer *buffer) +{ + return buffer->offset == 0; +} + +uint8_t *buffer_end(Buffer *buffer) +{ + return buffer->buffer + buffer->offset; +} + +void buffer_reset(Buffer *buffer) +{ + buffer->offset = 0; + buffer_shrink(buffer); +} + +void buffer_free(Buffer *buffer) +{ + trace_buffer_free(buffer->name ?: "unnamed", buffer->capacity); + g_free(buffer->buffer); + g_free(buffer->name); + buffer->offset = 0; + buffer->capacity = 0; + buffer->buffer = NULL; + buffer->name = NULL; +} + +void buffer_append(Buffer *buffer, const void *data, size_t len) +{ + memcpy(buffer->buffer + buffer->offset, data, len); + buffer->offset += len; +} + +void buffer_advance(Buffer *buffer, size_t len) +{ + memmove(buffer->buffer, buffer->buffer + len, + (buffer->offset - len)); + buffer->offset -= len; + buffer_shrink(buffer); +} + +void buffer_move_empty(Buffer *to, Buffer *from) +{ + trace_buffer_move_empty(to->name ?: "unnamed", + from->offset, + from->name ?: "unnamed"); + assert(to->offset == 0); + + g_free(to->buffer); + to->offset = from->offset; + to->capacity = from->capacity; + to->buffer = from->buffer; + + from->offset = 0; + from->capacity = 0; + from->buffer = NULL; +} + +void buffer_move(Buffer *to, Buffer *from) +{ + if (to->offset == 0) { + buffer_move_empty(to, from); + return; + } + + trace_buffer_move(to->name ?: "unnamed", + from->offset, + from->name ?: "unnamed"); + buffer_reserve(to, from->offset); + buffer_append(to, from->buffer, from->offset); + + g_free(from->buffer); + from->offset = 0; + from->capacity = 0; + from->buffer = NULL; +} diff --git a/qemu/util/compatfd.c b/qemu/util/compatfd.c index e8571502b..9a43042ae 100644 --- a/qemu/util/compatfd.c +++ b/qemu/util/compatfd.c @@ -13,6 +13,7 @@ * GNU GPL, version 2 or (at your option) any later version. */ +#include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/compatfd.h" #include "qemu/thread.h" diff --git a/qemu/util/coroutine-gthread.c b/qemu/util/coroutine-gthread.c new file mode 100644 index 000000000..fb697eb0b --- /dev/null +++ b/qemu/util/coroutine-gthread.c @@ -0,0 +1,199 @@ +/* + * GThread coroutine initialization code + * + * Copyright (C) 2006 Anthony Liguori + * Copyright (C) 2011 Aneesh Kumar K.V + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.0 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include +#include "qemu-common.h" +#include "qemu/coroutine_int.h" + +typedef struct { + Coroutine base; + GThread *thread; + bool runnable; + bool free_on_thread_exit; + CoroutineAction action; +} CoroutineGThread; + +static CompatGMutex coroutine_lock; +static CompatGCond coroutine_cond; + +/* GLib 2.31 and beyond deprecated various parts of the thread API, + * but the new interfaces are not available in older GLib versions + * so we have to cope with both. + */ +#if GLIB_CHECK_VERSION(2, 31, 0) +/* Awkwardly, the GPrivate API doesn't provide a way to update the + * GDestroyNotify handler for the coroutine key dynamically. So instead + * we track whether or not the CoroutineGThread should be freed on + * thread exit / coroutine key update using the free_on_thread_exit + * field. + */ +static void coroutine_destroy_notify(gpointer data) +{ + CoroutineGThread *co = data; + if (co && co->free_on_thread_exit) { + g_free(co); + } +} + +static GPrivate coroutine_key = G_PRIVATE_INIT(coroutine_destroy_notify); + +static inline CoroutineGThread *get_coroutine_key(void) +{ + return g_private_get(&coroutine_key); +} + +static inline void set_coroutine_key(CoroutineGThread *co, + bool free_on_thread_exit) +{ + /* Unlike g_static_private_set() this does not call the GDestroyNotify + * if the previous value of the key was NULL. Fortunately we only need + * the GDestroyNotify in the non-NULL key case. + */ + co->free_on_thread_exit = free_on_thread_exit; + g_private_replace(&coroutine_key, co); +} + +static inline GThread *create_thread(GThreadFunc func, gpointer data) +{ + return g_thread_new("coroutine", func, data); +} + +#else + +/* Handle older GLib versions */ + +static GStaticPrivate coroutine_key = G_STATIC_PRIVATE_INIT; + +static inline CoroutineGThread *get_coroutine_key(void) +{ + return g_static_private_get(&coroutine_key); +} + +static inline void set_coroutine_key(CoroutineGThread *co, + bool free_on_thread_exit) +{ + g_static_private_set(&coroutine_key, co, + free_on_thread_exit ? (GDestroyNotify)g_free : NULL); +} + +static inline GThread *create_thread(GThreadFunc func, gpointer data) +{ + return g_thread_create_full(func, data, 0, TRUE, TRUE, + G_THREAD_PRIORITY_NORMAL, NULL); +} + +#endif + + +static void __attribute__((constructor)) coroutine_init(void) +{ +#if !GLIB_CHECK_VERSION(2, 31, 0) + if (!g_thread_supported()) { + g_thread_init(NULL); + } +#endif +} + +static void coroutine_wait_runnable_locked(CoroutineGThread *co) +{ + while (!co->runnable) { + g_cond_wait(&coroutine_cond, &coroutine_lock); + } +} + +static void coroutine_wait_runnable(CoroutineGThread *co) +{ + g_mutex_lock(&coroutine_lock); + coroutine_wait_runnable_locked(co); + g_mutex_unlock(&coroutine_lock); +} + +static gpointer coroutine_thread(gpointer opaque) +{ + CoroutineGThread *co = opaque; + + set_coroutine_key(co, false); + coroutine_wait_runnable(co); + co->base.entry(co->base.entry_arg); + qemu_coroutine_switch(&co->base, co->base.caller, COROUTINE_TERMINATE); + return NULL; +} + +Coroutine *qemu_coroutine_new(void) +{ + CoroutineGThread *co; + + co = g_malloc0(sizeof(*co)); + co->thread = create_thread(coroutine_thread, co); + if (!co->thread) { + g_free(co); + return NULL; + } + return &co->base; +} + +void qemu_coroutine_delete(Coroutine *co_) +{ + CoroutineGThread *co = DO_UPCAST(CoroutineGThread, base, co_); + + g_thread_join(co->thread); + g_free(co); +} + +CoroutineAction qemu_coroutine_switch(Coroutine *from_, + Coroutine *to_, + CoroutineAction action) +{ + CoroutineGThread *from = DO_UPCAST(CoroutineGThread, base, from_); + CoroutineGThread *to = DO_UPCAST(CoroutineGThread, base, to_); + + g_mutex_lock(&coroutine_lock); + from->runnable = false; + from->action = action; + to->runnable = true; + to->action = action; + g_cond_broadcast(&coroutine_cond); + + if (action != COROUTINE_TERMINATE) { + coroutine_wait_runnable_locked(from); + } + g_mutex_unlock(&coroutine_lock); + return from->action; +} + +Coroutine *qemu_coroutine_self(void) +{ + CoroutineGThread *co = get_coroutine_key(); + if (!co) { + co = g_malloc0(sizeof(*co)); + co->runnable = true; + set_coroutine_key(co, true); + } + + return &co->base; +} + +bool qemu_in_coroutine(void) +{ + CoroutineGThread *co = get_coroutine_key(); + + return co && co->base.caller; +} diff --git a/qemu/util/coroutine-sigaltstack.c b/qemu/util/coroutine-sigaltstack.c new file mode 100644 index 000000000..a7c336655 --- /dev/null +++ b/qemu/util/coroutine-sigaltstack.c @@ -0,0 +1,290 @@ +/* + * sigaltstack coroutine initialization code + * + * Copyright (C) 2006 Anthony Liguori + * Copyright (C) 2011 Kevin Wolf + * Copyright (C) 2012 Alex Barcelo +** This file is partly based on pth_mctx.c, from the GNU Portable Threads +** Copyright (c) 1999-2006 Ralf S. Engelschall + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */ +#ifdef _FORTIFY_SOURCE +#undef _FORTIFY_SOURCE +#endif +#include "qemu/osdep.h" +#include +#include "qemu-common.h" +#include "qemu/coroutine_int.h" + +typedef struct { + Coroutine base; + void *stack; + sigjmp_buf env; +} CoroutineUContext; + +/** + * Per-thread coroutine bookkeeping + */ +typedef struct { + /** Currently executing coroutine */ + Coroutine *current; + + /** The default coroutine */ + CoroutineUContext leader; + + /** Information for the signal handler (trampoline) */ + sigjmp_buf tr_reenter; + volatile sig_atomic_t tr_called; + void *tr_handler; +} CoroutineThreadState; + +static pthread_key_t thread_state_key; + +static CoroutineThreadState *coroutine_get_thread_state(void) +{ + CoroutineThreadState *s = pthread_getspecific(thread_state_key); + + if (!s) { + s = g_malloc0(sizeof(*s)); + s->current = &s->leader.base; + pthread_setspecific(thread_state_key, s); + } + return s; +} + +static void qemu_coroutine_thread_cleanup(void *opaque) +{ + CoroutineThreadState *s = opaque; + + g_free(s); +} + +static void __attribute__((constructor)) coroutine_init(void) +{ + int ret; + + ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup); + if (ret != 0) { + fprintf(stderr, "unable to create leader key: %s\n", strerror(errno)); + abort(); + } +} + +/* "boot" function + * This is what starts the coroutine, is called from the trampoline + * (from the signal handler when it is not signal handling, read ahead + * for more information). + */ +static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co) +{ + /* Initialize longjmp environment and switch back the caller */ + if (!sigsetjmp(self->env, 0)) { + siglongjmp(*(sigjmp_buf *)co->entry_arg, 1); + } + + while (true) { + co->entry(co->entry_arg); + qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE); + } +} + +/* + * This is used as the signal handler. This is called with the brand new stack + * (thanks to sigaltstack). We have to return, given that this is a signal + * handler and the sigmask and some other things are changed. + */ +static void coroutine_trampoline(int signal) +{ + CoroutineUContext *self; + Coroutine *co; + CoroutineThreadState *coTS; + + /* Get the thread specific information */ + coTS = coroutine_get_thread_state(); + self = coTS->tr_handler; + coTS->tr_called = 1; + co = &self->base; + + /* + * Here we have to do a bit of a ping pong between the caller, given that + * this is a signal handler and we have to do a return "soon". Then the + * caller can reestablish everything and do a siglongjmp here again. + */ + if (!sigsetjmp(coTS->tr_reenter, 0)) { + return; + } + + /* + * Ok, the caller has siglongjmp'ed back to us, so now prepare + * us for the real machine state switching. We have to jump + * into another function here to get a new stack context for + * the auto variables (which have to be auto-variables + * because the start of the thread happens later). Else with + * PIC (i.e. Position Independent Code which is used when PTH + * is built as a shared library) most platforms would + * horrible core dump as experience showed. + */ + coroutine_bootstrap(self, co); +} + +Coroutine *qemu_coroutine_new(void) +{ + const size_t stack_size = 1 << 20; + CoroutineUContext *co; + CoroutineThreadState *coTS; + struct sigaction sa; + struct sigaction osa; + stack_t ss; + stack_t oss; + sigset_t sigs; + sigset_t osigs; + sigjmp_buf old_env; + + /* The way to manipulate stack is with the sigaltstack function. We + * prepare a stack, with it delivering a signal to ourselves and then + * put sigsetjmp/siglongjmp where needed. + * This has been done keeping coroutine-ucontext as a model and with the + * pth ideas (GNU Portable Threads). See coroutine-ucontext for the basics + * of the coroutines and see pth_mctx.c (from the pth project) for the + * sigaltstack way of manipulating stacks. + */ + + co = g_malloc0(sizeof(*co)); + co->stack = g_malloc(stack_size); + co->base.entry_arg = &old_env; /* stash away our jmp_buf */ + + coTS = coroutine_get_thread_state(); + coTS->tr_handler = co; + + /* + * Preserve the SIGUSR2 signal state, block SIGUSR2, + * and establish our signal handler. The signal will + * later transfer control onto the signal stack. + */ + sigemptyset(&sigs); + sigaddset(&sigs, SIGUSR2); + pthread_sigmask(SIG_BLOCK, &sigs, &osigs); + sa.sa_handler = coroutine_trampoline; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_ONSTACK; + if (sigaction(SIGUSR2, &sa, &osa) != 0) { + abort(); + } + + /* + * Set the new stack. + */ + ss.ss_sp = co->stack; + ss.ss_size = stack_size; + ss.ss_flags = 0; + if (sigaltstack(&ss, &oss) < 0) { + abort(); + } + + /* + * Now transfer control onto the signal stack and set it up. + * It will return immediately via "return" after the sigsetjmp() + * was performed. Be careful here with race conditions. The + * signal can be delivered the first time sigsuspend() is + * called. + */ + coTS->tr_called = 0; + pthread_kill(pthread_self(), SIGUSR2); + sigfillset(&sigs); + sigdelset(&sigs, SIGUSR2); + while (!coTS->tr_called) { + sigsuspend(&sigs); + } + + /* + * Inform the system that we are back off the signal stack by + * removing the alternative signal stack. Be careful here: It + * first has to be disabled, before it can be removed. + */ + sigaltstack(NULL, &ss); + ss.ss_flags = SS_DISABLE; + if (sigaltstack(&ss, NULL) < 0) { + abort(); + } + sigaltstack(NULL, &ss); + if (!(oss.ss_flags & SS_DISABLE)) { + sigaltstack(&oss, NULL); + } + + /* + * Restore the old SIGUSR2 signal handler and mask + */ + sigaction(SIGUSR2, &osa, NULL); + pthread_sigmask(SIG_SETMASK, &osigs, NULL); + + /* + * Now enter the trampoline again, but this time not as a signal + * handler. Instead we jump into it directly. The functionally + * redundant ping-pong pointer arithmetic is necessary to avoid + * type-conversion warnings related to the `volatile' qualifier and + * the fact that `jmp_buf' usually is an array type. + */ + if (!sigsetjmp(old_env, 0)) { + siglongjmp(coTS->tr_reenter, 1); + } + + /* + * Ok, we returned again, so now we're finished + */ + + return &co->base; +} + +void qemu_coroutine_delete(Coroutine *co_) +{ + CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_); + + g_free(co->stack); + g_free(co); +} + +CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineAction action) +{ + CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_); + CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_); + CoroutineThreadState *s = coroutine_get_thread_state(); + int ret; + + s->current = to_; + + ret = sigsetjmp(from->env, 0); + if (ret == 0) { + siglongjmp(to->env, action); + } + return ret; +} + +Coroutine *qemu_coroutine_self(void) +{ + CoroutineThreadState *s = coroutine_get_thread_state(); + + return s->current; +} + +bool qemu_in_coroutine(void) +{ + CoroutineThreadState *s = pthread_getspecific(thread_state_key); + + return s && s->current->caller; +} + diff --git a/qemu/util/coroutine-ucontext.c b/qemu/util/coroutine-ucontext.c new file mode 100644 index 000000000..2bb7e10d4 --- /dev/null +++ b/qemu/util/coroutine-ucontext.c @@ -0,0 +1,192 @@ +/* + * ucontext coroutine initialization code + * + * Copyright (C) 2006 Anthony Liguori + * Copyright (C) 2011 Kevin Wolf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.0 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */ +#ifdef _FORTIFY_SOURCE +#undef _FORTIFY_SOURCE +#endif +#include "qemu/osdep.h" +#include +#include "qemu-common.h" +#include "qemu/coroutine_int.h" + +#ifdef CONFIG_VALGRIND_H +#include +#endif + +typedef struct { + Coroutine base; + void *stack; + sigjmp_buf env; + +#ifdef CONFIG_VALGRIND_H + unsigned int valgrind_stack_id; +#endif + +} CoroutineUContext; + +/** + * Per-thread coroutine bookkeeping + */ +static __thread CoroutineUContext leader; +static __thread Coroutine *current; + +/* + * va_args to makecontext() must be type 'int', so passing + * the pointer we need may require several int args. This + * union is a quick hack to let us do that + */ +union cc_arg { + void *p; + int i[2]; +}; + +static void coroutine_trampoline(int i0, int i1) +{ + union cc_arg arg; + CoroutineUContext *self; + Coroutine *co; + + arg.i[0] = i0; + arg.i[1] = i1; + self = arg.p; + co = &self->base; + + /* Initialize longjmp environment and switch back the caller */ + if (!sigsetjmp(self->env, 0)) { + siglongjmp(*(sigjmp_buf *)co->entry_arg, 1); + } + + while (true) { + co->entry(co->entry_arg); + qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE); + } +} + +Coroutine *qemu_coroutine_new(void) +{ + const size_t stack_size = 1 << 20; + CoroutineUContext *co; + ucontext_t old_uc, uc; + sigjmp_buf old_env; + union cc_arg arg = {0}; + + /* The ucontext functions preserve signal masks which incurs a + * system call overhead. sigsetjmp(buf, 0)/siglongjmp() does not + * preserve signal masks but only works on the current stack. + * Since we need a way to create and switch to a new stack, use + * the ucontext functions for that but sigsetjmp()/siglongjmp() for + * everything else. + */ + + if (getcontext(&uc) == -1) { + abort(); + } + + co = g_malloc0(sizeof(*co)); + co->stack = g_malloc(stack_size); + co->base.entry_arg = &old_env; /* stash away our jmp_buf */ + + uc.uc_link = &old_uc; + uc.uc_stack.ss_sp = co->stack; + uc.uc_stack.ss_size = stack_size; + uc.uc_stack.ss_flags = 0; + +#ifdef CONFIG_VALGRIND_H + co->valgrind_stack_id = + VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size); +#endif + + arg.p = co; + + makecontext(&uc, (void (*)(void))coroutine_trampoline, + 2, arg.i[0], arg.i[1]); + + /* swapcontext() in, siglongjmp() back out */ + if (!sigsetjmp(old_env, 0)) { + swapcontext(&old_uc, &uc); + } + return &co->base; +} + +#ifdef CONFIG_VALGRIND_H +#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE +/* Work around an unused variable in the valgrind.h macro... */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-but-set-variable" +#endif +static inline void valgrind_stack_deregister(CoroutineUContext *co) +{ + VALGRIND_STACK_DEREGISTER(co->valgrind_stack_id); +} +#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE +#pragma GCC diagnostic pop +#endif +#endif + +void qemu_coroutine_delete(Coroutine *co_) +{ + CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_); + +#ifdef CONFIG_VALGRIND_H + valgrind_stack_deregister(co); +#endif + + g_free(co->stack); + g_free(co); +} + +/* This function is marked noinline to prevent GCC from inlining it + * into coroutine_trampoline(). If we allow it to do that then it + * hoists the code to get the address of the TLS variable "current" + * out of the while() loop. This is an invalid transformation because + * the sigsetjmp() call may be called when running thread A but + * return in thread B, and so we might be in a different thread + * context each time round the loop. + */ +CoroutineAction __attribute__((noinline)) +qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineAction action) +{ + CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_); + CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_); + int ret; + + current = to_; + + ret = sigsetjmp(from->env, 0); + if (ret == 0) { + siglongjmp(to->env, action); + } + return ret; +} + +Coroutine *qemu_coroutine_self(void) +{ + if (!current) { + current = &leader.base; + } + return current; +} + +bool qemu_in_coroutine(void) +{ + return current && current->caller; +} diff --git a/qemu/util/coroutine-win32.c b/qemu/util/coroutine-win32.c new file mode 100644 index 000000000..02e28e825 --- /dev/null +++ b/qemu/util/coroutine-win32.c @@ -0,0 +1,102 @@ +/* + * Win32 coroutine initialization code + * + * Copyright (c) 2011 Kevin Wolf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/coroutine_int.h" + +typedef struct +{ + Coroutine base; + + LPVOID fiber; + CoroutineAction action; +} CoroutineWin32; + +static __thread CoroutineWin32 leader; +static __thread Coroutine *current; + +/* This function is marked noinline to prevent GCC from inlining it + * into coroutine_trampoline(). If we allow it to do that then it + * hoists the code to get the address of the TLS variable "current" + * out of the while() loop. This is an invalid transformation because + * the SwitchToFiber() call may be called when running thread A but + * return in thread B, and so we might be in a different thread + * context each time round the loop. + */ +CoroutineAction __attribute__((noinline)) +qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineAction action) +{ + CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_); + CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_); + + current = to_; + + to->action = action; + SwitchToFiber(to->fiber); + return from->action; +} + +static void CALLBACK coroutine_trampoline(void *co_) +{ + Coroutine *co = co_; + + while (true) { + co->entry(co->entry_arg); + qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE); + } +} + +Coroutine *qemu_coroutine_new(void) +{ + const size_t stack_size = 1 << 20; + CoroutineWin32 *co; + + co = g_malloc0(sizeof(*co)); + co->fiber = CreateFiber(stack_size, coroutine_trampoline, &co->base); + return &co->base; +} + +void qemu_coroutine_delete(Coroutine *co_) +{ + CoroutineWin32 *co = DO_UPCAST(CoroutineWin32, base, co_); + + DeleteFiber(co->fiber); + g_free(co); +} + +Coroutine *qemu_coroutine_self(void) +{ + if (!current) { + current = &leader.base; + leader.fiber = ConvertThreadToFiber(NULL); + } + return current; +} + +bool qemu_in_coroutine(void) +{ + return current && current->caller; +} diff --git a/qemu/util/crc32c.c b/qemu/util/crc32c.c index 886632780..7e99555c1 100644 --- a/qemu/util/crc32c.c +++ b/qemu/util/crc32c.c @@ -25,6 +25,7 @@ * */ +#include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/crc32c.h" diff --git a/qemu/util/cutils.c b/qemu/util/cutils.c index 5d1c9ebe0..43d1afbbe 100644 --- a/qemu/util/cutils.c +++ b/qemu/util/cutils.c @@ -21,15 +21,15 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/host-utils.h" #include -#include -#include #include "qemu/sockets.h" #include "qemu/iov.h" #include "net/net.h" +#include "qemu/cutils.h" void strpadcpy(char *buf, int buf_size, const char *str, char pad) { @@ -145,11 +145,6 @@ time_t mktimegm(struct tm *tm) return t; } -int qemu_fls(int i) -{ - return 32 - clz32(i); -} - /* * Make sure data goes on disk, but if possible do not bother to * write out the inode just for timestamp updates. @@ -166,6 +161,46 @@ int qemu_fdatasync(int fd) #endif } +/* vector definitions */ +#ifdef __ALTIVEC__ +#include +/* The altivec.h header says we're allowed to undef these for + * C++ compatibility. Here we don't care about C++, but we + * undef them anyway to avoid namespace pollution. + */ +#undef vector +#undef pixel +#undef bool +#define VECTYPE __vector unsigned char +#define SPLAT(p) vec_splat(vec_ld(0, p), 0) +#define ALL_EQ(v1, v2) vec_all_eq(v1, v2) +#define VEC_OR(v1, v2) ((v1) | (v2)) +/* altivec.h may redefine the bool macro as vector type. + * Reset it to POSIX semantics. */ +#define bool _Bool +#elif defined __SSE2__ +#include +#define VECTYPE __m128i +#define SPLAT(p) _mm_set1_epi8(*(p)) +#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF) +#define VEC_OR(v1, v2) (_mm_or_si128(v1, v2)) +#else +#define VECTYPE unsigned long +#define SPLAT(p) (*(p) * (~0UL / 255)) +#define ALL_EQ(v1, v2) ((v1) == (v2)) +#define VEC_OR(v1, v2) ((v1) | (v2)) +#endif + +#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8 + +static bool +can_use_buffer_find_nonzero_offset_inner(const void *buf, size_t len) +{ + return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR + * sizeof(VECTYPE)) == 0 + && ((uintptr_t) buf) % sizeof(VECTYPE) == 0); +} + /* * Searches for an area with non-zero content in a buffer * @@ -174,8 +209,8 @@ int qemu_fdatasync(int fd) * and addr must be a multiple of sizeof(VECTYPE) due to * restriction of optimizations in this function. * - * can_use_buffer_find_nonzero_offset() can be used to check - * these requirements. + * can_use_buffer_find_nonzero_offset_inner() can be used to + * check these requirements. * * The return value is the offset of the non-zero area rounded * down to a multiple of sizeof(VECTYPE) for the first @@ -186,13 +221,13 @@ int qemu_fdatasync(int fd) * If the buffer is all zero the return value is equal to len. */ -size_t buffer_find_nonzero_offset(const void *buf, size_t len) +static size_t buffer_find_nonzero_offset_inner(const void *buf, size_t len) { const VECTYPE *p = buf; const VECTYPE zero = (VECTYPE){0}; size_t i; - assert(can_use_buffer_find_nonzero_offset(buf, len)); + assert(can_use_buffer_find_nonzero_offset_inner(buf, len)); if (!len) { return 0; @@ -221,6 +256,114 @@ size_t buffer_find_nonzero_offset(const void *buf, size_t len) return i * sizeof(VECTYPE); } +/* + * GCC before version 4.9 has a bug which will cause the target + * attribute work incorrectly and failed to compile in some case, + * restrict the gcc version to 4.9+ to prevent the failure. + */ + +#if defined CONFIG_AVX2_OPT && QEMU_GNUC_PREREQ(4, 9) +#pragma GCC push_options +#pragma GCC target("avx2") +#include +#include + +#define AVX2_VECTYPE __m256i +#define AVX2_SPLAT(p) _mm256_set1_epi8(*(p)) +#define AVX2_ALL_EQ(v1, v2) \ + (_mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, v2)) == 0xFFFFFFFF) +#define AVX2_VEC_OR(v1, v2) (_mm256_or_si256(v1, v2)) + +static bool +can_use_buffer_find_nonzero_offset_avx2(const void *buf, size_t len) +{ + return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR + * sizeof(AVX2_VECTYPE)) == 0 + && ((uintptr_t) buf) % sizeof(AVX2_VECTYPE) == 0); +} + +static size_t buffer_find_nonzero_offset_avx2(const void *buf, size_t len) +{ + const AVX2_VECTYPE *p = buf; + const AVX2_VECTYPE zero = (AVX2_VECTYPE){0}; + size_t i; + + assert(can_use_buffer_find_nonzero_offset_avx2(buf, len)); + + if (!len) { + return 0; + } + + for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) { + if (!AVX2_ALL_EQ(p[i], zero)) { + return i * sizeof(AVX2_VECTYPE); + } + } + + for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; + i < len / sizeof(AVX2_VECTYPE); + i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) { + AVX2_VECTYPE tmp0 = AVX2_VEC_OR(p[i + 0], p[i + 1]); + AVX2_VECTYPE tmp1 = AVX2_VEC_OR(p[i + 2], p[i + 3]); + AVX2_VECTYPE tmp2 = AVX2_VEC_OR(p[i + 4], p[i + 5]); + AVX2_VECTYPE tmp3 = AVX2_VEC_OR(p[i + 6], p[i + 7]); + AVX2_VECTYPE tmp01 = AVX2_VEC_OR(tmp0, tmp1); + AVX2_VECTYPE tmp23 = AVX2_VEC_OR(tmp2, tmp3); + if (!AVX2_ALL_EQ(AVX2_VEC_OR(tmp01, tmp23), zero)) { + break; + } + } + + return i * sizeof(AVX2_VECTYPE); +} + +static bool avx2_support(void) +{ + int a, b, c, d; + + if (__get_cpuid_max(0, NULL) < 7) { + return false; + } + + __cpuid_count(7, 0, a, b, c, d); + + return b & bit_AVX2; +} + +bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) \ + __attribute__ ((ifunc("can_use_buffer_find_nonzero_offset_ifunc"))); +size_t buffer_find_nonzero_offset(const void *buf, size_t len) \ + __attribute__ ((ifunc("buffer_find_nonzero_offset_ifunc"))); + +static void *buffer_find_nonzero_offset_ifunc(void) +{ + typeof(buffer_find_nonzero_offset) *func = (avx2_support()) ? + buffer_find_nonzero_offset_avx2 : buffer_find_nonzero_offset_inner; + + return func; +} + +static void *can_use_buffer_find_nonzero_offset_ifunc(void) +{ + typeof(can_use_buffer_find_nonzero_offset) *func = (avx2_support()) ? + can_use_buffer_find_nonzero_offset_avx2 : + can_use_buffer_find_nonzero_offset_inner; + + return func; +} +#pragma GCC pop_options +#else +bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) +{ + return can_use_buffer_find_nonzero_offset_inner(buf, len); +} + +size_t buffer_find_nonzero_offset(const void *buf, size_t len) +{ + return buffer_find_nonzero_offset_inner(buf, len); +} +#endif + /* * Checks if a buffer is all zeroes * @@ -281,19 +424,19 @@ int fcntl_setfl(int fd, int flag) static int64_t suffix_mul(char suffix, int64_t unit) { switch (qemu_toupper(suffix)) { - case STRTOSZ_DEFSUFFIX_B: + case QEMU_STRTOSZ_DEFSUFFIX_B: return 1; - case STRTOSZ_DEFSUFFIX_KB: + case QEMU_STRTOSZ_DEFSUFFIX_KB: return unit; - case STRTOSZ_DEFSUFFIX_MB: + case QEMU_STRTOSZ_DEFSUFFIX_MB: return unit * unit; - case STRTOSZ_DEFSUFFIX_GB: + case QEMU_STRTOSZ_DEFSUFFIX_GB: return unit * unit * unit; - case STRTOSZ_DEFSUFFIX_TB: + case QEMU_STRTOSZ_DEFSUFFIX_TB: return unit * unit * unit * unit; - case STRTOSZ_DEFSUFFIX_PB: + case QEMU_STRTOSZ_DEFSUFFIX_PB: return unit * unit * unit * unit * unit; - case STRTOSZ_DEFSUFFIX_EB: + case QEMU_STRTOSZ_DEFSUFFIX_EB: return unit * unit * unit * unit * unit * unit; } return -1; @@ -305,7 +448,7 @@ static int64_t suffix_mul(char suffix, int64_t unit) * in *end, if not NULL. Return -ERANGE on overflow, Return -EINVAL on * other error. */ -int64_t strtosz_suffix_unit(const char *nptr, char **end, +int64_t qemu_strtosz_suffix_unit(const char *nptr, char **end, const char default_suffix, int64_t unit) { int64_t retval = -EINVAL; @@ -348,14 +491,165 @@ fail: return retval; } -int64_t strtosz_suffix(const char *nptr, char **end, const char default_suffix) +int64_t qemu_strtosz_suffix(const char *nptr, char **end, + const char default_suffix) { - return strtosz_suffix_unit(nptr, end, default_suffix, 1024); + return qemu_strtosz_suffix_unit(nptr, end, default_suffix, 1024); } -int64_t strtosz(const char *nptr, char **end) +int64_t qemu_strtosz(const char *nptr, char **end) { - return strtosz_suffix(nptr, end, STRTOSZ_DEFSUFFIX_MB); + return qemu_strtosz_suffix(nptr, end, QEMU_STRTOSZ_DEFSUFFIX_MB); +} + +/** + * Helper function for qemu_strto*l() functions. + */ +static int check_strtox_error(const char *p, char *endptr, const char **next, + int err) +{ + /* If no conversion was performed, prefer BSD behavior over glibc + * behavior. + */ + if (err == 0 && endptr == p) { + err = EINVAL; + } + if (!next && *endptr) { + return -EINVAL; + } + if (next) { + *next = endptr; + } + return -err; +} + +/** + * QEMU wrappers for strtol(), strtoll(), strtoul(), strotull() C functions. + * + * Convert ASCII string @nptr to a long integer value + * from the given @base. Parameters @nptr, @endptr, @base + * follows same semantics as strtol() C function. + * + * Unlike from strtol() function, if @endptr is not NULL, this + * function will return -EINVAL whenever it cannot fully convert + * the string in @nptr with given @base to a long. This function returns + * the result of the conversion only through the @result parameter. + * + * If NULL is passed in @endptr, then the whole string in @ntpr + * is a number otherwise it returns -EINVAL. + * + * RETURN VALUE + * Unlike from strtol() function, this wrapper returns either + * -EINVAL or the errno set by strtol() function (e.g -ERANGE). + * If the conversion overflows, -ERANGE is returned, and @result + * is set to the max value of the desired type + * (e.g. LONG_MAX, LLONG_MAX, ULONG_MAX, ULLONG_MAX). If the case + * of underflow, -ERANGE is returned, and @result is set to the min + * value of the desired type. For strtol(), strtoll(), @result is set to + * LONG_MIN, LLONG_MIN, respectively, and for strtoul(), strtoull() it + * is set to 0. + */ +int qemu_strtol(const char *nptr, const char **endptr, int base, + long *result) +{ + char *p; + int err = 0; + if (!nptr) { + if (endptr) { + *endptr = nptr; + } + err = -EINVAL; + } else { + errno = 0; + *result = strtol(nptr, &p, base); + err = check_strtox_error(nptr, p, endptr, errno); + } + return err; +} + +/** + * Converts ASCII string to an unsigned long integer. + * + * If string contains a negative number, value will be converted to + * the unsigned representation of the signed value, unless the original + * (nonnegated) value would overflow, in this case, it will set @result + * to ULONG_MAX, and return ERANGE. + * + * The same behavior holds, for qemu_strtoull() but sets @result to + * ULLONG_MAX instead of ULONG_MAX. + * + * See qemu_strtol() documentation for more info. + */ +int qemu_strtoul(const char *nptr, const char **endptr, int base, + unsigned long *result) +{ + char *p; + int err = 0; + if (!nptr) { + if (endptr) { + *endptr = nptr; + } + err = -EINVAL; + } else { + errno = 0; + *result = strtoul(nptr, &p, base); + /* Windows returns 1 for negative out-of-range values. */ + if (errno == ERANGE) { + *result = -1; + } + err = check_strtox_error(nptr, p, endptr, errno); + } + return err; +} + +/** + * Converts ASCII string to a long long integer. + * + * See qemu_strtol() documentation for more info. + */ +int qemu_strtoll(const char *nptr, const char **endptr, int base, + int64_t *result) +{ + char *p; + int err = 0; + if (!nptr) { + if (endptr) { + *endptr = nptr; + } + err = -EINVAL; + } else { + errno = 0; + *result = strtoll(nptr, &p, base); + err = check_strtox_error(nptr, p, endptr, errno); + } + return err; +} + +/** + * Converts ASCII string to an unsigned long long integer. + * + * See qemu_strtol() documentation for more info. + */ +int qemu_strtoull(const char *nptr, const char **endptr, int base, + uint64_t *result) +{ + char *p; + int err = 0; + if (!nptr) { + if (endptr) { + *endptr = nptr; + } + err = -EINVAL; + } else { + errno = 0; + *result = strtoull(nptr, &p, base); + /* Windows returns 1 for negative out-of-range values. */ + if (errno == ERANGE) { + *result = -1; + } + err = check_strtox_error(nptr, p, endptr, errno); + } + return err; } /** @@ -474,29 +768,6 @@ int qemu_parse_fd(const char *param) return fd; } -/* round down to the nearest power of 2*/ -int64_t pow2floor(int64_t value) -{ - if (!is_power_of_2(value)) { - value = 0x8000000000000000ULL >> clz64(value); - } - return value; -} - -/* round up to the nearest power of 2 (0 if overflow) */ -uint64_t pow2ceil(uint64_t value) -{ - uint8_t nlz = clz64(value); - - if (is_power_of_2(value)) { - return value; - } - if (!nlz) { - return 0; - } - return 1ULL << (64 - nlz); -} - /* * Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128) * Input is limited to 14-bit numbers diff --git a/qemu/util/envlist.c b/qemu/util/envlist.c index 099a544a4..e86857e70 100644 --- a/qemu/util/envlist.c +++ b/qemu/util/envlist.c @@ -1,3 +1,4 @@ +#include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/queue.h" #include "qemu/envlist.h" diff --git a/qemu/util/error.c b/qemu/util/error.c index 14f435187..cae251173 100644 --- a/qemu/util/error.c +++ b/qemu/util/error.c @@ -2,30 +2,53 @@ * QEMU Error Objects * * Copyright IBM, Corp. 2011 + * Copyright (C) 2011-2015 Red Hat, Inc. * * Authors: * Anthony Liguori + * Markus Armbruster , * * This work is licensed under the terms of the GNU LGPL, version 2. See * the COPYING.LIB file in the top-level directory. */ -#include "qemu-common.h" +#include "qemu/osdep.h" #include "qapi/error.h" +#include "qemu-common.h" #include "qemu/error-report.h" struct Error { char *msg; ErrorClass err_class; + const char *src, *func; + int line; + GString *hint; }; Error *error_abort; +Error *error_fatal; + +static void error_handle_fatal(Error **errp, Error *err) +{ + if (errp == &error_abort) { + fprintf(stderr, "Unexpected error in %s() at %s:%d:\n", + err->func, err->src, err->line); + error_report_err(err); + abort(); + } + if (errp == &error_fatal) { + error_report_err(err); + exit(1); + } +} -void error_set(Error **errp, ErrorClass err_class, const char *fmt, ...) +static void error_setv(Error **errp, + const char *src, int line, const char *func, + ErrorClass err_class, const char *fmt, va_list ap, + const char *suffix) { Error *err; - va_list ap; int saved_errno = errno; if (errp == NULL) { @@ -34,99 +57,140 @@ void error_set(Error **errp, ErrorClass err_class, const char *fmt, ...) assert(*errp == NULL); err = g_malloc0(sizeof(*err)); - - va_start(ap, fmt); err->msg = g_strdup_vprintf(fmt, ap); - va_end(ap); - err->err_class = err_class; - - if (errp == &error_abort) { - error_report_err(err); - abort(); + if (suffix) { + char *msg = err->msg; + err->msg = g_strdup_printf("%s: %s", msg, suffix); + g_free(msg); } + err->err_class = err_class; + err->src = src; + err->line = line; + err->func = func; + error_handle_fatal(errp, err); *errp = err; errno = saved_errno; } -void error_set_errno(Error **errp, int os_errno, ErrorClass err_class, - const char *fmt, ...) +void error_set_internal(Error **errp, + const char *src, int line, const char *func, + ErrorClass err_class, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + error_setv(errp, src, line, func, err_class, fmt, ap, NULL); + va_end(ap); +} + +void error_setg_internal(Error **errp, + const char *src, int line, const char *func, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap, NULL); + va_end(ap); +} + +void error_setg_errno_internal(Error **errp, + const char *src, int line, const char *func, + int os_errno, const char *fmt, ...) { - Error *err; - char *msg1; va_list ap; int saved_errno = errno; if (errp == NULL) { return; } - assert(*errp == NULL); - - err = g_malloc0(sizeof(*err)); va_start(ap, fmt); - msg1 = g_strdup_vprintf(fmt, ap); - if (os_errno != 0) { - err->msg = g_strdup_printf("%s: %s", msg1, strerror(os_errno)); - g_free(msg1); - } else { - err->msg = msg1; - } + error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap, + os_errno != 0 ? strerror(os_errno) : NULL); va_end(ap); - err->err_class = err_class; - if (errp == &error_abort) { - error_report_err(err); - abort(); + errno = saved_errno; +} + +void error_setg_file_open_internal(Error **errp, + const char *src, int line, const char *func, + int os_errno, const char *filename) +{ + error_setg_errno_internal(errp, src, line, func, os_errno, + "Could not open '%s'", filename); +} + +void error_vprepend(Error **errp, const char *fmt, va_list ap) +{ + GString *newmsg; + + if (!errp) { + return; } - *errp = err; + newmsg = g_string_new(NULL); + g_string_vprintf(newmsg, fmt, ap); + g_string_append(newmsg, (*errp)->msg); + (*errp)->msg = g_string_free(newmsg, 0); +} - errno = saved_errno; +void error_prepend(Error **errp, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + error_vprepend(errp, fmt, ap); + va_end(ap); } -void error_setg_file_open(Error **errp, int os_errno, const char *filename) +void error_append_hint(Error **errp, const char *fmt, ...) { - error_setg_errno(errp, os_errno, "Could not open '%s'", filename); + va_list ap; + int saved_errno = errno; + Error *err; + + if (!errp) { + return; + } + err = *errp; + assert(err && errp != &error_abort && errp != &error_fatal); + + if (!err->hint) { + err->hint = g_string_new(NULL); + } + va_start(ap, fmt); + g_string_append_vprintf(err->hint, fmt, ap); + va_end(ap); + + errno = saved_errno; } #ifdef _WIN32 -void error_set_win32(Error **errp, int win32_err, ErrorClass err_class, - const char *fmt, ...) +void error_setg_win32_internal(Error **errp, + const char *src, int line, const char *func, + int win32_err, const char *fmt, ...) { - Error *err; - char *msg1; va_list ap; + char *suffix = NULL; if (errp == NULL) { return; } - assert(*errp == NULL); - err = g_malloc0(sizeof(*err)); - - va_start(ap, fmt); - msg1 = g_strdup_vprintf(fmt, ap); if (win32_err != 0) { - char *msg2 = g_win32_error_message(win32_err); - err->msg = g_strdup_printf("%s: %s (error: %x)", msg1, msg2, - (unsigned)win32_err); - g_free(msg2); - g_free(msg1); - } else { - err->msg = msg1; + suffix = g_win32_error_message(win32_err); } - va_end(ap); - err->err_class = err_class; - if (errp == &error_abort) { - error_report_err(err); - abort(); - } + va_start(ap, fmt); + error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, + fmt, ap, suffix); + va_end(ap); - *errp = err; + g_free(suffix); } #endif @@ -138,6 +202,12 @@ Error *error_copy(const Error *err) err_new = g_malloc0(sizeof(*err)); err_new->msg = g_strdup(err->msg); err_new->err_class = err->err_class; + err_new->src = err->src; + err_new->line = err->line; + err_new->func = err->func; + if (err->hint) { + err_new->hint = g_string_new(err->hint->str); + } return err_new; } @@ -155,25 +225,49 @@ const char *error_get_pretty(Error *err) void error_report_err(Error *err) { error_report("%s", error_get_pretty(err)); + if (err->hint) { + error_printf_unless_qmp("%s", err->hint->str); + } error_free(err); } +void error_reportf_err(Error *err, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + error_vprepend(&err, fmt, ap); + va_end(ap); + error_report_err(err); +} + void error_free(Error *err) { if (err) { g_free(err->msg); + if (err->hint) { + g_string_free(err->hint, true); + } g_free(err); } } +void error_free_or_abort(Error **errp) +{ + assert(errp && *errp); + error_free(*errp); + *errp = NULL; +} + void error_propagate(Error **dst_errp, Error *local_err) { - if (local_err && dst_errp == &error_abort) { - error_report_err(local_err); - abort(); - } else if (dst_errp && !*dst_errp) { + if (!local_err) { + return; + } + error_handle_fatal(dst_errp, local_err); + if (dst_errp && !*dst_errp) { *dst_errp = local_err; - } else if (local_err) { + } else { error_free(local_err); } } diff --git a/qemu/util/event_notifier-posix.c b/qemu/util/event_notifier-posix.c index ed4ca2b01..c1f0d79b3 100644 --- a/qemu/util/event_notifier-posix.c +++ b/qemu/util/event_notifier-posix.c @@ -10,7 +10,9 @@ * See the COPYING file in the top-level directory. */ +#include "qemu/osdep.h" #include "qemu-common.h" +#include "qemu/cutils.h" #include "qemu/event_notifier.h" #include "sysemu/char.h" #include "qemu/main-loop.h" @@ -19,11 +21,17 @@ #include #endif +#ifdef CONFIG_EVENTFD +/* + * Initialize @e with existing file descriptor @fd. + * @fd must be a genuine eventfd object, emulation with pipe won't do. + */ void event_notifier_init_fd(EventNotifier *e, int fd) { e->rfd = fd; e->wfd = fd; } +#endif int event_notifier_init(EventNotifier *e, int active) { @@ -77,15 +85,17 @@ void event_notifier_cleanup(EventNotifier *e) close(e->wfd); } -int event_notifier_get_fd(EventNotifier *e) +int event_notifier_get_fd(const EventNotifier *e) { return e->rfd; } int event_notifier_set_handler(EventNotifier *e, + bool is_external, EventNotifierHandler *handler) { - qemu_set_fd_handler(e->rfd, (IOHandler *)handler, NULL, e); + aio_set_fd_handler(iohandler_get_aio_context(), e->rfd, is_external, + (IOHandler *)handler, NULL, e); return 0; } diff --git a/qemu/util/event_notifier-win32.c b/qemu/util/event_notifier-win32.c index 6dbb530cf..de87df02d 100644 --- a/qemu/util/event_notifier-win32.c +++ b/qemu/util/event_notifier-win32.c @@ -10,6 +10,7 @@ * See the COPYING file in the top-level directory. */ +#include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/event_notifier.h" #include "qemu/main-loop.h" @@ -32,6 +33,7 @@ HANDLE event_notifier_get_handle(EventNotifier *e) } int event_notifier_set_handler(EventNotifier *e, + bool is_external, EventNotifierHandler *handler) { if (handler) { diff --git a/qemu/util/fifo8.c b/qemu/util/fifo8.c index 0ea5ad98e..5c64101b3 100644 --- a/qemu/util/fifo8.c +++ b/qemu/util/fifo8.c @@ -12,6 +12,7 @@ * with this program; if not, see . */ +#include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/fifo8.h" diff --git a/qemu/util/getauxval.c b/qemu/util/getauxval.c index 1732ace2b..0b3bae2dc 100644 --- a/qemu/util/getauxval.c +++ b/qemu/util/getauxval.c @@ -22,8 +22,8 @@ * THE SOFTWARE. */ -#include "qemu-common.h" #include "qemu/osdep.h" +#include "qemu-common.h" #ifdef CONFIG_GETAUXVAL /* Don't inline this in qemu/osdep.h, because pulling in for diff --git a/qemu/util/hbitmap.c b/qemu/util/hbitmap.c index 50b888fd6..b22b87d0a 100644 --- a/qemu/util/hbitmap.c +++ b/qemu/util/hbitmap.c @@ -9,10 +9,8 @@ * later. See the COPYING file in the top-level directory. */ -#include -#include -#include #include "qemu/osdep.h" +#include #include "qemu/hbitmap.h" #include "qemu/host-utils.h" #include "trace.h" diff --git a/qemu/util/hexdump.c b/qemu/util/hexdump.c index 969b3406c..f879ff0ad 100644 --- a/qemu/util/hexdump.c +++ b/qemu/util/hexdump.c @@ -13,25 +13,37 @@ * GNU GPL, version 2 or (at your option) any later version. */ +#include "qemu/osdep.h" #include "qemu-common.h" void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size) { - unsigned int b; + unsigned int b, len, i, c; - for (b = 0; b < size; b++) { - if ((b % 16) == 0) { - fprintf(fp, "%s: %04x:", prefix, b); + for (b = 0; b < size; b += 16) { + len = size - b; + if (len > 16) { + len = 16; } - if ((b % 4) == 0) { - fprintf(fp, " "); + fprintf(fp, "%s: %04x:", prefix, b); + for (i = 0; i < 16; i++) { + if ((i % 4) == 0) { + fprintf(fp, " "); + } + if (i < len) { + fprintf(fp, " %02x", (unsigned char)buf[b + i]); + } else { + fprintf(fp, " "); + } } - fprintf(fp, " %02x", (unsigned char)buf[b]); - if ((b % 16) == 15) { - fprintf(fp, "\n"); + fprintf(fp, " "); + for (i = 0; i < len; i++) { + c = buf[b + i]; + if (c < ' ' || c > '~') { + c = '.'; + } + fprintf(fp, "%c", c); } - } - if ((b % 16) != 0) { fprintf(fp, "\n"); } } diff --git a/qemu/util/host-utils.c b/qemu/util/host-utils.c index 102e5bf30..b166e5758 100644 --- a/qemu/util/host-utils.c +++ b/qemu/util/host-utils.c @@ -23,8 +23,7 @@ * THE SOFTWARE. */ -#include -#include +#include "qemu/osdep.h" #include "qemu/host-utils.h" /* Long integer helpers */ diff --git a/qemu/util/id.c b/qemu/util/id.c index 09b22fb8f..614135295 100644 --- a/qemu/util/id.c +++ b/qemu/util/id.c @@ -10,7 +10,9 @@ * or later. See the COPYING.LIB file in the top-level directory. */ +#include "qemu/osdep.h" #include "qemu-common.h" +#include "qemu/id.h" bool id_wellformed(const char *id) { @@ -26,3 +28,40 @@ bool id_wellformed(const char *id) } return true; } + +#define ID_SPECIAL_CHAR '#' + +static const char *const id_subsys_str[ID_MAX] = { + [ID_QDEV] = "qdev", + [ID_BLOCK] = "block", +}; + +/* + * Generates an ID of the form PREFIX SUBSYSTEM NUMBER + * where: + * + * - PREFIX is the reserved character '#' + * - SUBSYSTEM identifies the subsystem creating the ID + * - NUMBER is a decimal number unique within SUBSYSTEM. + * + * Example: "#block146" + * + * Note that these IDs do not satisfy id_wellformed(). + * + * The caller is responsible for freeing the returned string with g_free() + */ +char *id_generate(IdSubSystems id) +{ + static uint64_t id_counters[ID_MAX]; + uint32_t rnd; + + assert(id < ARRAY_SIZE(id_subsys_str)); + assert(id_subsys_str[id]); + + rnd = g_random_int_range(0, 100); + + return g_strdup_printf("%c%s%" PRIu64 "%02" PRId32, ID_SPECIAL_CHAR, + id_subsys_str[id], + id_counters[id]++, + rnd); +} diff --git a/qemu/util/iov.c b/qemu/util/iov.c index a0d5934e8..003fcce66 100644 --- a/qemu/util/iov.c +++ b/qemu/util/iov.c @@ -16,11 +16,14 @@ * GNU GPL, version 2 or (at your option) any later version. */ +#include "qemu/osdep.h" +#include "qemu-common.h" #include "qemu/iov.h" #include "qemu/sockets.h" +#include "qemu/cutils.h" -size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt, - size_t offset, const void *buf, size_t bytes) +size_t iov_from_buf_full(const struct iovec *iov, unsigned int iov_cnt, + size_t offset, const void *buf, size_t bytes) { size_t done; unsigned int i; @@ -38,8 +41,8 @@ size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt, return done; } -size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, - size_t offset, void *buf, size_t bytes) +size_t iov_to_buf_full(const struct iovec *iov, const unsigned int iov_cnt, + size_t offset, void *buf, size_t bytes) { size_t done; unsigned int i; diff --git a/qemu/util/log.c b/qemu/util/log.c new file mode 100644 index 000000000..1857730dc --- /dev/null +++ b/qemu/util/log.c @@ -0,0 +1,313 @@ +/* + * Logging support + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/log.h" +#include "qemu/range.h" +#include "qemu/error-report.h" +#include "qemu/cutils.h" +#include "trace/control.h" + +static char *logfilename; +FILE *qemu_logfile; +int qemu_loglevel; +static int log_append = 0; +static GArray *debug_regions; + +void qemu_log(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (qemu_logfile) { + vfprintf(qemu_logfile, fmt, ap); + } + va_end(ap); +} + +/* enable or disable low levels log */ +void do_qemu_set_log(int log_flags, bool use_own_buffers) +{ + qemu_loglevel = log_flags; +#ifdef CONFIG_TRACE_LOG + qemu_loglevel |= LOG_TRACE; +#endif + if (!qemu_logfile && + (is_daemonized() ? logfilename != NULL : qemu_loglevel)) { + if (logfilename) { + qemu_logfile = fopen(logfilename, log_append ? "a" : "w"); + if (!qemu_logfile) { + perror(logfilename); + _exit(1); + } + /* In case we are a daemon redirect stderr to logfile */ + if (is_daemonized()) { + dup2(fileno(qemu_logfile), STDERR_FILENO); + fclose(qemu_logfile); + /* This will skip closing logfile in qemu_log_close() */ + qemu_logfile = stderr; + } + } else { + /* Default to stderr if no log file specified */ + assert(!is_daemonized()); + qemu_logfile = stderr; + } + /* must avoid mmap() usage of glibc by setting a buffer "by hand" */ + if (use_own_buffers) { + static char logfile_buf[4096]; + + setvbuf(qemu_logfile, logfile_buf, _IOLBF, sizeof(logfile_buf)); + } else { +#if defined(_WIN32) + /* Win32 doesn't support line-buffering, so use unbuffered output. */ + setvbuf(qemu_logfile, NULL, _IONBF, 0); +#else + setvbuf(qemu_logfile, NULL, _IOLBF, 0); +#endif + log_append = 1; + } + } + if (qemu_logfile && + (is_daemonized() ? logfilename == NULL : !qemu_loglevel)) { + qemu_log_close(); + } +} +/* + * Allow the user to include %d in their logfile which will be + * substituted with the current PID. This is useful for debugging many + * nested linux-user tasks but will result in lots of logs. + */ +void qemu_set_log_filename(const char *filename) +{ + char *pidstr; + g_free(logfilename); + + pidstr = strstr(filename, "%"); + if (pidstr) { + /* We only accept one %d, no other format strings */ + if (pidstr[1] != 'd' || strchr(pidstr + 2, '%')) { + error_report("Bad logfile format: %s", filename); + logfilename = NULL; + } else { + logfilename = g_strdup_printf(filename, getpid()); + } + } else { + logfilename = g_strdup(filename); + } + qemu_log_close(); + qemu_set_log(qemu_loglevel); +} + +/* Returns true if addr is in our debug filter or no filter defined + */ +bool qemu_log_in_addr_range(uint64_t addr) +{ + if (debug_regions) { + int i = 0; + for (i = 0; i < debug_regions->len; i++) { + struct Range *range = &g_array_index(debug_regions, Range, i); + if (addr >= range->begin && addr <= range->end) { + return true; + } + } + return false; + } else { + return true; + } +} + + +void qemu_set_dfilter_ranges(const char *filter_spec) +{ + gchar **ranges = g_strsplit(filter_spec, ",", 0); + if (ranges) { + gchar **next = ranges; + gchar *r = *next++; + debug_regions = g_array_sized_new(FALSE, FALSE, + sizeof(Range), g_strv_length(ranges)); + while (r) { + char *range_op = strstr(r, "-"); + char *r2 = range_op ? range_op + 1 : NULL; + if (!range_op) { + range_op = strstr(r, "+"); + r2 = range_op ? range_op + 1 : NULL; + } + if (!range_op) { + range_op = strstr(r, ".."); + r2 = range_op ? range_op + 2 : NULL; + } + if (range_op) { + const char *e = NULL; + uint64_t r1val, r2val; + + if ((qemu_strtoull(r, &e, 0, &r1val) == 0) && + (qemu_strtoull(r2, NULL, 0, &r2val) == 0) && + r2val > 0) { + struct Range range; + + g_assert(e == range_op); + + switch (*range_op) { + case '+': + { + range.begin = r1val; + range.end = r1val + (r2val - 1); + break; + } + case '-': + { + range.end = r1val; + range.begin = r1val - (r2val - 1); + break; + } + case '.': + range.begin = r1val; + range.end = r2val; + break; + default: + g_assert_not_reached(); + } + g_array_append_val(debug_regions, range); + + } else { + g_error("Failed to parse range in: %s", r); + } + } else { + g_error("Bad range specifier in: %s", r); + } + r = *next++; + } + g_strfreev(ranges); + } +} + +/* fflush() the log file */ +void qemu_log_flush(void) +{ + fflush(qemu_logfile); +} + +/* Close the log file */ +void qemu_log_close(void) +{ + if (qemu_logfile) { + if (qemu_logfile != stderr) { + fclose(qemu_logfile); + } + qemu_logfile = NULL; + } +} + +const QEMULogItem qemu_log_items[] = { + { CPU_LOG_TB_OUT_ASM, "out_asm", + "show generated host assembly code for each compiled TB" }, + { CPU_LOG_TB_IN_ASM, "in_asm", + "show target assembly code for each compiled TB" }, + { CPU_LOG_TB_OP, "op", + "show micro ops for each compiled TB" }, + { CPU_LOG_TB_OP_OPT, "op_opt", + "show micro ops (x86 only: before eflags optimization) and\n" + "after liveness analysis" }, + { CPU_LOG_INT, "int", + "show interrupts/exceptions in short format" }, + { CPU_LOG_EXEC, "exec", + "show trace before each executed TB (lots of logs)" }, + { CPU_LOG_TB_CPU, "cpu", + "show CPU registers before entering a TB (lots of logs)" }, + { CPU_LOG_MMU, "mmu", + "log MMU-related activities" }, + { CPU_LOG_PCALL, "pcall", + "x86 only: show protected mode far calls/returns/exceptions" }, + { CPU_LOG_RESET, "cpu_reset", + "show CPU state before CPU resets" }, + { LOG_UNIMP, "unimp", + "log unimplemented functionality" }, + { LOG_GUEST_ERROR, "guest_errors", + "log when the guest OS does something invalid (eg accessing a\n" + "non-existent register)" }, + { CPU_LOG_PAGE, "page", + "dump pages at beginning of user mode emulation" }, + { CPU_LOG_TB_NOCHAIN, "nochain", + "do not chain compiled TBs so that \"exec\" and \"cpu\" show\n" + "complete traces" }, + { 0, NULL, NULL }, +}; + +static int cmp1(const char *s1, int n, const char *s2) +{ + if (strlen(s2) != n) { + return 0; + } + return memcmp(s1, s2, n) == 0; +} + +/* takes a comma separated list of log masks. Return 0 if error. */ +int qemu_str_to_log_mask(const char *str) +{ + const QEMULogItem *item; + int mask; + const char *p, *p1; + + p = str; + mask = 0; + for (;;) { + p1 = strchr(p, ','); + if (!p1) { + p1 = p + strlen(p); + } + if (cmp1(p,p1-p,"all")) { + for (item = qemu_log_items; item->mask != 0; item++) { + mask |= item->mask; + } +#ifdef CONFIG_TRACE_LOG + } else if (strncmp(p, "trace:", 6) == 0 && p + 6 != p1) { + trace_enable_events(p + 6); + mask |= LOG_TRACE; +#endif + } else { + for (item = qemu_log_items; item->mask != 0; item++) { + if (cmp1(p, p1 - p, item->name)) { + goto found; + } + } + return 0; + found: + mask |= item->mask; + } + if (*p1 != ',') { + break; + } + p = p1 + 1; + } + return mask; +} + +void qemu_print_log_usage(FILE *f) +{ + const QEMULogItem *item; + fprintf(f, "Log items (comma separated):\n"); + for (item = qemu_log_items; item->mask != 0; item++) { + fprintf(f, "%-15s %s\n", item->name, item->help); + } +#ifdef CONFIG_TRACE_LOG + fprintf(f, "trace:PATTERN enable trace events\n"); + fprintf(f, "\nUse \"-d trace:help\" to get a list of trace events.\n\n"); +#endif +} diff --git a/qemu/util/memfd.c b/qemu/util/memfd.c new file mode 100644 index 000000000..7c406914c --- /dev/null +++ b/qemu/util/memfd.c @@ -0,0 +1,162 @@ +/* + * memfd.c + * + * Copyright (c) 2015 Red Hat, Inc. + * + * QEMU library functions on POSIX which are shared between QEMU and + * the QEMU tools. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" + +#include +#include + +#include + +#include "qemu/memfd.h" + +#ifdef CONFIG_MEMFD +#include +#elif defined CONFIG_LINUX +#include +#include + +static int memfd_create(const char *name, unsigned int flags) +{ +#ifdef __NR_memfd_create + return syscall(__NR_memfd_create, name, flags); +#else + return -1; +#endif +} +#endif + +#ifndef MFD_CLOEXEC +#define MFD_CLOEXEC 0x0001U +#endif + +#ifndef MFD_ALLOW_SEALING +#define MFD_ALLOW_SEALING 0x0002U +#endif + +/* + * This is a best-effort helper for shared memory allocation, with + * optional sealing. The helper will do his best to allocate using + * memfd with sealing, but may fallback on other methods without + * sealing. + */ +void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals, + int *fd) +{ + void *ptr; + int mfd = -1; + + *fd = -1; + +#ifdef CONFIG_LINUX + if (seals) { + mfd = memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC); + } + + if (mfd == -1) { + /* some systems have memfd without sealing */ + mfd = memfd_create(name, MFD_CLOEXEC); + seals = 0; + } +#endif + + if (mfd != -1) { + if (ftruncate(mfd, size) == -1) { + perror("ftruncate"); + close(mfd); + return NULL; + } + + if (seals && fcntl(mfd, F_ADD_SEALS, seals) == -1) { + perror("fcntl"); + close(mfd); + return NULL; + } + } else { + const char *tmpdir = g_get_tmp_dir(); + gchar *fname; + + fname = g_strdup_printf("%s/memfd-XXXXXX", tmpdir); + mfd = mkstemp(fname); + unlink(fname); + g_free(fname); + + if (mfd == -1) { + perror("mkstemp"); + return NULL; + } + + if (ftruncate(mfd, size) == -1) { + perror("ftruncate"); + close(mfd); + return NULL; + } + } + + ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + close(mfd); + return NULL; + } + + *fd = mfd; + return ptr; +} + +void qemu_memfd_free(void *ptr, size_t size, int fd) +{ + if (ptr) { + munmap(ptr, size); + } + + if (fd != -1) { + close(fd); + } +} + +enum { + MEMFD_KO, + MEMFD_OK, + MEMFD_TODO +}; + +bool qemu_memfd_check(void) +{ + static int memfd_check = MEMFD_TODO; + + if (memfd_check == MEMFD_TODO) { + int fd; + void *ptr; + + ptr = qemu_memfd_alloc("test", 4096, 0, &fd); + memfd_check = ptr ? MEMFD_OK : MEMFD_KO; + qemu_memfd_free(ptr, 4096, fd); + } + + return memfd_check == MEMFD_OK; +} diff --git a/qemu/util/mmap-alloc.c b/qemu/util/mmap-alloc.c new file mode 100644 index 000000000..0b4cc7f7f --- /dev/null +++ b/qemu/util/mmap-alloc.c @@ -0,0 +1,110 @@ +/* + * Support for RAM backed by mmaped host memory. + * + * Copyright (c) 2015 Red Hat, Inc. + * + * Authors: + * Michael S. Tsirkin + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include + +#define HUGETLBFS_MAGIC 0x958458f6 + +#ifdef CONFIG_LINUX +#include +#endif + +size_t qemu_fd_getpagesize(int fd) +{ +#ifdef CONFIG_LINUX + struct statfs fs; + int ret; + + if (fd != -1) { + do { + ret = fstatfs(fd, &fs); + } while (ret != 0 && errno == EINTR); + + if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) { + return fs.f_bsize; + } + } +#endif + + return getpagesize(); +} + +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) +{ + /* + * Note: this always allocates at least one extra page of virtual address + * space, even if size is already aligned. + */ + size_t total = size + align; +#if defined(__powerpc64__) && defined(__linux__) + /* On ppc64 mappings in the same segment (aka slice) must share the same + * page size. Since we will be re-allocating part of this segment + * from the supplied fd, we should make sure to use the same page size, to + * this end we mmap the supplied fd. In this case, set MAP_NORESERVE to + * avoid allocating backing store memory. + * We do this unless we are using the system page size, in which case + * anonymous memory is OK. + */ + int anonfd = fd == -1 || qemu_fd_getpagesize(fd) == getpagesize() ? -1 : fd; + int flags = anonfd == -1 ? MAP_ANONYMOUS : MAP_NORESERVE; + void *ptr = mmap(0, total, PROT_NONE, flags | MAP_PRIVATE, anonfd, 0); +#else + void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); +#endif + size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; + void *ptr1; + + if (ptr == MAP_FAILED) { + return MAP_FAILED; + } + + /* Make sure align is a power of 2 */ + assert(!(align & (align - 1))); + /* Always align to host page size */ + assert(align >= getpagesize()); + + ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, + MAP_FIXED | + (fd == -1 ? MAP_ANONYMOUS : 0) | + (shared ? MAP_SHARED : MAP_PRIVATE), + fd, 0); + if (ptr1 == MAP_FAILED) { + munmap(ptr, total); + return MAP_FAILED; + } + + ptr += offset; + total -= offset; + + if (offset > 0) { + munmap(ptr - offset, offset); + } + + /* + * Leave a single PROT_NONE page allocated after the RAM block, to serve as + * a guard page guarding against potential buffer overflows. + */ + if (total > size + getpagesize()) { + munmap(ptr + size + getpagesize(), total - size - getpagesize()); + } + + return ptr; +} + +void qemu_ram_munmap(void *ptr, size_t size) +{ + if (ptr) { + /* Unmap both the RAM block and the guard page */ + munmap(ptr, size + getpagesize()); + } +} diff --git a/qemu/util/module.c b/qemu/util/module.c index 4bd4a94d8..ce058aef6 100644 --- a/qemu/util/module.c +++ b/qemu/util/module.c @@ -13,7 +13,7 @@ * GNU GPL, version 2 or (at your option) any later version. */ -#include +#include "qemu/osdep.h" #include "qemu-common.h" #ifdef CONFIG_MODULES #include diff --git a/qemu/util/notify.c b/qemu/util/notify.c index f215dfc21..06de63a83 100644 --- a/qemu/util/notify.c +++ b/qemu/util/notify.c @@ -13,6 +13,7 @@ * GNU GPL, version 2 or (at your option) any later version. */ +#include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/notify.h" diff --git a/qemu/util/osdep.c b/qemu/util/osdep.c index 0092bb61b..d56d07111 100644 --- a/qemu/util/osdep.c +++ b/qemu/util/osdep.c @@ -21,24 +21,15 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#include -#include -#include -#include -#include -#include -#include -#include +#include "qemu/osdep.h" /* Needed early for CONFIG_BSD etc. */ -#include "config-host.h" #if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE) #include #endif #ifdef CONFIG_SOLARIS -#include #include /* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for discussion about Solaris header problems */ @@ -46,13 +37,21 @@ extern int madvise(caddr_t, size_t, int); #endif #include "qemu-common.h" +#include "qemu/cutils.h" #include "qemu/sockets.h" #include "qemu/error-report.h" #include "monitor/monitor.h" static bool fips_enabled = false; -static const char *qemu_version = QEMU_VERSION; +/* Starting on QEMU 2.5, qemu_hw_version() returns "2.5+" by default + * instead of QEMU_VERSION, so setting hw_version on MachineClass + * is no longer mandatory. + * + * Do NOT change this string, or it will break compatibility on all + * machine classes that don't set hw_version. + */ +static const char *hw_version = "2.5+"; int socket_set_cork(int fd, int v) { @@ -311,14 +310,14 @@ int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen) return ret; } -void qemu_set_version(const char *version) +void qemu_set_hw_version(const char *version) { - qemu_version = version; + hw_version = version; } -const char *qemu_get_version(void) +const char *qemu_hw_version(void) { - return qemu_version; + return hw_version; } void fips_set_state(bool requested) diff --git a/qemu/util/oslib-posix.c b/qemu/util/oslib-posix.c index 3ae4987b6..6cc4b8f00 100644 --- a/qemu/util/oslib-posix.c +++ b/qemu/util/oslib-posix.c @@ -26,16 +26,8 @@ * THE SOFTWARE. */ -/* The following block of code temporarily renames the daemon() function so the - compiler does not see the warning associated with it in stdlib.h on OSX */ -#ifdef __APPLE__ -#define daemon qemu_fake_daemon_function -#include -#undef daemon -extern int daemon(int, int); -#endif - -#if defined(__linux__) && (defined(__x86_64__) || defined(__arm__)) +#if defined(__linux__) && \ + (defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)) /* Use 2 MiB alignment so transparent hugepages can be used by KVM. Valgrind does not support alignments larger than 1 MiB, therefore we need special code which handles running on Valgrind. */ @@ -46,32 +38,32 @@ extern int daemon(int, int); #else # define QEMU_VMALLOC_ALIGN getpagesize() #endif -#define HUGETLBFS_MAGIC 0x958458f6 +#include "qemu/osdep.h" #include -#include #include #include -#include "config-host.h" #include "sysemu/sysemu.h" #include "trace.h" +#include "qapi/error.h" #include "qemu/sockets.h" #include #include -#include #include +#include "qemu/cutils.h" #ifdef CONFIG_LINUX #include -#include #endif #ifdef __FreeBSD__ #include #endif +#include + int qemu_get_thread_id(void) { #if defined(__linux__) @@ -128,10 +120,7 @@ void *qemu_memalign(size_t alignment, size_t size) void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) { size_t align = QEMU_VMALLOC_ALIGN; - size_t total = size + align - getpagesize(); - void *ptr = mmap(0, total, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; + void *ptr = qemu_ram_mmap(-1, size, align, false); if (ptr == MAP_FAILED) { return NULL; @@ -140,15 +129,6 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) if (alignment) { *alignment = align; } - ptr += offset; - total -= offset; - - if (offset > 0) { - munmap(ptr - offset, offset); - } - if (total > size) { - munmap(ptr + size, total - size); - } trace_qemu_anon_ram_alloc(size, ptr); return ptr; @@ -163,9 +143,7 @@ void qemu_vfree(void *ptr) void qemu_anon_ram_free(void *ptr, size_t size) { trace_qemu_anon_ram_free(ptr, size); - if (ptr) { - munmap(ptr, size); - } + qemu_ram_munmap(ptr, size); } void qemu_set_block(int fd) @@ -352,26 +330,6 @@ static void sigbus_handler(int signal) siglongjmp(sigjump, 1); } -static size_t fd_getpagesize(int fd) -{ -#ifdef CONFIG_LINUX - struct statfs fs; - int ret; - - if (fd != -1) { - do { - ret = fstatfs(fd, &fs); - } while (ret != 0 && errno == EINTR); - - if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) { - return fs.f_bsize; - } - } -#endif - - return getpagesize(); -} - void os_mem_prealloc(int fd, char *area, size_t memory) { int ret; @@ -399,7 +357,7 @@ void os_mem_prealloc(int fd, char *area, size_t memory) exit(1); } else { int i; - size_t hpagesize = fd_getpagesize(fd); + size_t hpagesize = qemu_fd_getpagesize(fd); size_t numpages = DIV_ROUND_UP(memory, hpagesize); /* MAP_POPULATE silently ignores failures */ @@ -482,3 +440,74 @@ int qemu_read_password(char *buf, int buf_size) printf("\n"); return ret; } + + +pid_t qemu_fork(Error **errp) +{ + sigset_t oldmask, newmask; + struct sigaction sig_action; + int saved_errno; + pid_t pid; + + /* + * Need to block signals now, so that child process can safely + * kill off caller's signal handlers without a race. + */ + sigfillset(&newmask); + if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) { + error_setg_errno(errp, errno, + "cannot block signals"); + return -1; + } + + pid = fork(); + saved_errno = errno; + + if (pid < 0) { + /* attempt to restore signal mask, but ignore failure, to + * avoid obscuring the fork failure */ + (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL); + error_setg_errno(errp, saved_errno, + "cannot fork child process"); + errno = saved_errno; + return -1; + } else if (pid) { + /* parent process */ + + /* Restore our original signal mask now that the child is + * safely running. Only documented failures are EFAULT (not + * possible, since we are using just-grabbed mask) or EINVAL + * (not possible, since we are using correct arguments). */ + (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL); + } else { + /* child process */ + size_t i; + + /* Clear out all signal handlers from parent so nothing + * unexpected can happen in our child once we unblock + * signals */ + sig_action.sa_handler = SIG_DFL; + sig_action.sa_flags = 0; + sigemptyset(&sig_action.sa_mask); + + for (i = 1; i < NSIG; i++) { + /* Only possible errors are EFAULT or EINVAL The former + * won't happen, the latter we expect, so no need to check + * return value */ + (void)sigaction(i, &sig_action, NULL); + } + + /* Unmask all signals in child, since we've no idea what the + * caller's done with their signal mask and don't want to + * propagate that to children */ + sigemptyset(&newmask); + if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) { + Error *local_err = NULL; + error_setg_errno(&local_err, errno, + "cannot unblock signals"); + error_report_err(local_err); + _exit(1); + } + } + return pid; +} diff --git a/qemu/util/oslib-win32.c b/qemu/util/oslib-win32.c index 730a6707a..c926db4a5 100644 --- a/qemu/util/oslib-win32.c +++ b/qemu/util/oslib-win32.c @@ -2,7 +2,7 @@ * os-win32.c * * Copyright (c) 2003-2008 Fabrice Bellard - * Copyright (c) 2010 Red Hat, Inc. + * Copyright (c) 2010-2016 Red Hat, Inc. * * QEMU library functions for win32 which are shared between QEMU and * the QEMU tools. @@ -29,14 +29,15 @@ * this file are based on code from GNOME glib-2 and use a different license, * see the license comment there. */ +#include "qemu/osdep.h" #include #include -#include -#include "config-host.h" +#include "qapi/error.h" #include "sysemu/sysemu.h" #include "qemu/main-loop.h" #include "trace.h" #include "qemu/sockets.h" +#include "qemu/cutils.h" /* this must come after including "trace.h" */ #include @@ -95,6 +96,7 @@ void qemu_anon_ram_free(void *ptr, size_t size) } } +#ifndef CONFIG_LOCALTIME_R /* FIXME: add proper locking */ struct tm *gmtime_r(const time_t *timep, struct tm *result) { @@ -118,6 +120,7 @@ struct tm *localtime_r(const time_t *timep, struct tm *result) } return p; } +#endif /* CONFIG_LOCALTIME_R */ void qemu_set_block(int fd) { @@ -143,6 +146,83 @@ int socket_set_fast_reuse(int fd) return 0; } + +static int socket_error(void) +{ + switch (WSAGetLastError()) { + case 0: + return 0; + case WSAEINTR: + return EINTR; + case WSAEINVAL: + return EINVAL; + case WSA_INVALID_HANDLE: + return EBADF; + case WSA_NOT_ENOUGH_MEMORY: + return ENOMEM; + case WSA_INVALID_PARAMETER: + return EINVAL; + case WSAENAMETOOLONG: + return ENAMETOOLONG; + case WSAENOTEMPTY: + return ENOTEMPTY; + case WSAEWOULDBLOCK: + /* not using EWOULDBLOCK as we don't want code to have + * to check both EWOULDBLOCK and EAGAIN */ + return EAGAIN; + case WSAEINPROGRESS: + return EINPROGRESS; + case WSAEALREADY: + return EALREADY; + case WSAENOTSOCK: + return ENOTSOCK; + case WSAEDESTADDRREQ: + return EDESTADDRREQ; + case WSAEMSGSIZE: + return EMSGSIZE; + case WSAEPROTOTYPE: + return EPROTOTYPE; + case WSAENOPROTOOPT: + return ENOPROTOOPT; + case WSAEPROTONOSUPPORT: + return EPROTONOSUPPORT; + case WSAEOPNOTSUPP: + return EOPNOTSUPP; + case WSAEAFNOSUPPORT: + return EAFNOSUPPORT; + case WSAEADDRINUSE: + return EADDRINUSE; + case WSAEADDRNOTAVAIL: + return EADDRNOTAVAIL; + case WSAENETDOWN: + return ENETDOWN; + case WSAENETUNREACH: + return ENETUNREACH; + case WSAENETRESET: + return ENETRESET; + case WSAECONNABORTED: + return ECONNABORTED; + case WSAECONNRESET: + return ECONNRESET; + case WSAENOBUFS: + return ENOBUFS; + case WSAEISCONN: + return EISCONN; + case WSAENOTCONN: + return ENOTCONN; + case WSAETIMEDOUT: + return ETIMEDOUT; + case WSAECONNREFUSED: + return ECONNREFUSED; + case WSAELOOP: + return ELOOP; + case WSAEHOSTUNREACH: + return EHOSTUNREACH; + default: + return EIO; + } +} + int inet_aton(const char *cp, struct in_addr *ia) { uint32_t addr = inet_addr(cp); @@ -452,7 +532,7 @@ gint g_poll(GPollFD *fds, guint nfds, gint timeout) return retval; } -size_t getpagesize(void) +int getpagesize(void) { SYSTEM_INFO system_info; @@ -494,3 +574,213 @@ int qemu_read_password(char *buf, int buf_size) buf[i] = '\0'; return 0; } + + +pid_t qemu_fork(Error **errp) +{ + errno = ENOSYS; + error_setg_errno(errp, errno, + "cannot fork child process"); + return -1; +} + + +#undef connect +int qemu_connect_wrap(int sockfd, const struct sockaddr *addr, + socklen_t addrlen) +{ + int ret; + ret = connect(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef listen +int qemu_listen_wrap(int sockfd, int backlog) +{ + int ret; + ret = listen(sockfd, backlog); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef bind +int qemu_bind_wrap(int sockfd, const struct sockaddr *addr, + socklen_t addrlen) +{ + int ret; + ret = bind(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef socket +int qemu_socket_wrap(int domain, int type, int protocol) +{ + int ret; + ret = socket(domain, type, protocol); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef accept +int qemu_accept_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen) +{ + int ret; + ret = accept(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef shutdown +int qemu_shutdown_wrap(int sockfd, int how) +{ + int ret; + ret = shutdown(sockfd, how); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef ioctlsocket +int qemu_ioctlsocket_wrap(int fd, int req, void *val) +{ + int ret; + ret = ioctlsocket(fd, req, val); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef closesocket +int qemu_closesocket_wrap(int fd) +{ + int ret; + ret = closesocket(fd); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef getsockopt +int qemu_getsockopt_wrap(int sockfd, int level, int optname, + void *optval, socklen_t *optlen) +{ + int ret; + ret = getsockopt(sockfd, level, optname, optval, optlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef setsockopt +int qemu_setsockopt_wrap(int sockfd, int level, int optname, + const void *optval, socklen_t optlen) +{ + int ret; + ret = setsockopt(sockfd, level, optname, optval, optlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef getpeername +int qemu_getpeername_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen) +{ + int ret; + ret = getpeername(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef getsockname +int qemu_getsockname_wrap(int sockfd, struct sockaddr *addr, + socklen_t *addrlen) +{ + int ret; + ret = getsockname(sockfd, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef send +ssize_t qemu_send_wrap(int sockfd, const void *buf, size_t len, int flags) +{ + int ret; + ret = send(sockfd, buf, len, flags); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef sendto +ssize_t qemu_sendto_wrap(int sockfd, const void *buf, size_t len, int flags, + const struct sockaddr *addr, socklen_t addrlen) +{ + int ret; + ret = sendto(sockfd, buf, len, flags, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef recv +ssize_t qemu_recv_wrap(int sockfd, void *buf, size_t len, int flags) +{ + int ret; + ret = recv(sockfd, buf, len, flags); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} + + +#undef recvfrom +ssize_t qemu_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, + struct sockaddr *addr, socklen_t *addrlen) +{ + int ret; + ret = recvfrom(sockfd, buf, len, flags, addr, addrlen); + if (ret < 0) { + errno = socket_error(); + } + return ret; +} diff --git a/qemu/util/path.c b/qemu/util/path.c index 4e4877e82..5479f76c6 100644 --- a/qemu/util/path.c +++ b/qemu/util/path.c @@ -3,15 +3,12 @@ The assumption is that this area does not change. */ -#include +#include "qemu/osdep.h" #include #include -#include -#include -#include -#include -#include #include "qemu-common.h" +#include "qemu/cutils.h" +#include "qemu/path.h" struct pathelem { diff --git a/qemu/util/qemu-config.c b/qemu/util/qemu-config.c index 5fcfd0e6a..fb973074d 100644 --- a/qemu/util/qemu-config.c +++ b/qemu/util/qemu-config.c @@ -1,8 +1,8 @@ +#include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/error-report.h" #include "qemu/option.h" #include "qemu/config-file.h" -#include "qapi/error.h" #include "qmp-commands.h" static QemuOptsList *vm_config_groups[48]; @@ -219,6 +219,14 @@ static QemuOptsList machine_opts = { .name = "suppress-vmdesc", .type = QEMU_OPT_BOOL, .help = "Set on to disable self-describing migration", + },{ + .name = "aes-key-wrap", + .type = QEMU_OPT_BOOL, + .help = "enable/disable AES key wrapping using the CPACF wrapping key", + },{ + .name = "dea-key-wrap", + .type = QEMU_OPT_BOOL, + .help = "enable/disable DEA key wrapping using the CPACF wrapping key", }, { /* End of list */ } } diff --git a/qemu/util/qemu-coroutine-io.c b/qemu/util/qemu-coroutine-io.c new file mode 100644 index 000000000..91b9357d4 --- /dev/null +++ b/qemu/util/qemu-coroutine-io.c @@ -0,0 +1,90 @@ +/* + * Coroutine-aware I/O functions + * + * Copyright (C) 2009-2010 Nippon Telegraph and Telephone Corporation. + * Copyright (c) 2011, Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/sockets.h" +#include "qemu/coroutine.h" +#include "qemu/iov.h" +#include "qemu/main-loop.h" + +ssize_t coroutine_fn +qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, + size_t offset, size_t bytes, bool do_send) +{ + size_t done = 0; + ssize_t ret; + while (done < bytes) { + ret = iov_send_recv(sockfd, iov, iov_cnt, + offset + done, bytes - done, do_send); + if (ret > 0) { + done += ret; + } else if (ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + qemu_coroutine_yield(); + } else if (done == 0) { + return -errno; + } else { + break; + } + } else if (ret == 0 && !do_send) { + /* write (send) should never return 0. + * read (recv) returns 0 for end-of-file (-data). + * In both cases there's little point retrying, + * but we do for write anyway, just in case */ + break; + } + } + return done; +} + +ssize_t coroutine_fn +qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send) +{ + struct iovec iov = { .iov_base = buf, .iov_len = bytes }; + return qemu_co_sendv_recvv(sockfd, &iov, 1, 0, bytes, do_send); +} + +typedef struct { + Coroutine *co; + int fd; +} FDYieldUntilData; + +static void fd_coroutine_enter(void *opaque) +{ + FDYieldUntilData *data = opaque; + qemu_set_fd_handler(data->fd, NULL, NULL, NULL); + qemu_coroutine_enter(data->co, NULL); +} + +void coroutine_fn yield_until_fd_readable(int fd) +{ + FDYieldUntilData data; + + assert(qemu_in_coroutine()); + data.co = qemu_coroutine_self(); + data.fd = fd; + qemu_set_fd_handler(fd, fd_coroutine_enter, NULL, &data); + qemu_coroutine_yield(); +} diff --git a/qemu/util/qemu-coroutine-lock.c b/qemu/util/qemu-coroutine-lock.c new file mode 100644 index 000000000..da37ca7f9 --- /dev/null +++ b/qemu/util/qemu-coroutine-lock.c @@ -0,0 +1,187 @@ +/* + * coroutine queues and locks + * + * Copyright (c) 2011 Kevin Wolf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/coroutine.h" +#include "qemu/coroutine_int.h" +#include "qemu/queue.h" +#include "trace.h" + +void qemu_co_queue_init(CoQueue *queue) +{ + QTAILQ_INIT(&queue->entries); +} + +void coroutine_fn qemu_co_queue_wait(CoQueue *queue) +{ + Coroutine *self = qemu_coroutine_self(); + QTAILQ_INSERT_TAIL(&queue->entries, self, co_queue_next); + qemu_coroutine_yield(); + assert(qemu_in_coroutine()); +} + +/** + * qemu_co_queue_run_restart: + * + * Enter each coroutine that was previously marked for restart by + * qemu_co_queue_next() or qemu_co_queue_restart_all(). This function is + * invoked by the core coroutine code when the current coroutine yields or + * terminates. + */ +void qemu_co_queue_run_restart(Coroutine *co) +{ + Coroutine *next; + + trace_qemu_co_queue_run_restart(co); + while ((next = QTAILQ_FIRST(&co->co_queue_wakeup))) { + QTAILQ_REMOVE(&co->co_queue_wakeup, next, co_queue_next); + qemu_coroutine_enter(next, NULL); + } +} + +static bool qemu_co_queue_do_restart(CoQueue *queue, bool single) +{ + Coroutine *self = qemu_coroutine_self(); + Coroutine *next; + + if (QTAILQ_EMPTY(&queue->entries)) { + return false; + } + + while ((next = QTAILQ_FIRST(&queue->entries)) != NULL) { + QTAILQ_REMOVE(&queue->entries, next, co_queue_next); + QTAILQ_INSERT_TAIL(&self->co_queue_wakeup, next, co_queue_next); + trace_qemu_co_queue_next(next); + if (single) { + break; + } + } + return true; +} + +bool coroutine_fn qemu_co_queue_next(CoQueue *queue) +{ + assert(qemu_in_coroutine()); + return qemu_co_queue_do_restart(queue, true); +} + +void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue) +{ + assert(qemu_in_coroutine()); + qemu_co_queue_do_restart(queue, false); +} + +bool qemu_co_enter_next(CoQueue *queue) +{ + Coroutine *next; + + next = QTAILQ_FIRST(&queue->entries); + if (!next) { + return false; + } + + QTAILQ_REMOVE(&queue->entries, next, co_queue_next); + qemu_coroutine_enter(next, NULL); + return true; +} + +bool qemu_co_queue_empty(CoQueue *queue) +{ + return QTAILQ_FIRST(&queue->entries) == NULL; +} + +void qemu_co_mutex_init(CoMutex *mutex) +{ + memset(mutex, 0, sizeof(*mutex)); + qemu_co_queue_init(&mutex->queue); +} + +void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex) +{ + Coroutine *self = qemu_coroutine_self(); + + trace_qemu_co_mutex_lock_entry(mutex, self); + + while (mutex->locked) { + qemu_co_queue_wait(&mutex->queue); + } + + mutex->locked = true; + + trace_qemu_co_mutex_lock_return(mutex, self); +} + +void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex) +{ + Coroutine *self = qemu_coroutine_self(); + + trace_qemu_co_mutex_unlock_entry(mutex, self); + + assert(mutex->locked == true); + assert(qemu_in_coroutine()); + + mutex->locked = false; + qemu_co_queue_next(&mutex->queue); + + trace_qemu_co_mutex_unlock_return(mutex, self); +} + +void qemu_co_rwlock_init(CoRwlock *lock) +{ + memset(lock, 0, sizeof(*lock)); + qemu_co_queue_init(&lock->queue); +} + +void qemu_co_rwlock_rdlock(CoRwlock *lock) +{ + while (lock->writer) { + qemu_co_queue_wait(&lock->queue); + } + lock->reader++; +} + +void qemu_co_rwlock_unlock(CoRwlock *lock) +{ + assert(qemu_in_coroutine()); + if (lock->writer) { + lock->writer = false; + qemu_co_queue_restart_all(&lock->queue); + } else { + lock->reader--; + assert(lock->reader >= 0); + /* Wakeup only one waiting writer */ + if (!lock->reader) { + qemu_co_queue_next(&lock->queue); + } + } +} + +void qemu_co_rwlock_wrlock(CoRwlock *lock) +{ + while (lock->writer || lock->reader) { + qemu_co_queue_wait(&lock->queue); + } + lock->writer = true; +} diff --git a/qemu/util/qemu-coroutine-sleep.c b/qemu/util/qemu-coroutine-sleep.c new file mode 100644 index 000000000..6966831d3 --- /dev/null +++ b/qemu/util/qemu-coroutine-sleep.c @@ -0,0 +1,42 @@ +/* + * QEMU coroutine sleep + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/coroutine.h" +#include "qemu/timer.h" +#include "block/aio.h" + +typedef struct CoSleepCB { + QEMUTimer *ts; + Coroutine *co; +} CoSleepCB; + +static void co_sleep_cb(void *opaque) +{ + CoSleepCB *sleep_cb = opaque; + + qemu_coroutine_enter(sleep_cb->co, NULL); +} + +void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type, + int64_t ns) +{ + CoSleepCB sleep_cb = { + .co = qemu_coroutine_self(), + }; + sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb); + timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns); + qemu_coroutine_yield(); + timer_del(sleep_cb.ts); + timer_free(sleep_cb.ts); +} diff --git a/qemu/util/qemu-coroutine.c b/qemu/util/qemu-coroutine.c new file mode 100644 index 000000000..5816702cc --- /dev/null +++ b/qemu/util/qemu-coroutine.c @@ -0,0 +1,147 @@ +/* + * QEMU coroutines + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Stefan Hajnoczi + * Kevin Wolf + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "trace.h" +#include "qemu-common.h" +#include "qemu/thread.h" +#include "qemu/atomic.h" +#include "qemu/coroutine.h" +#include "qemu/coroutine_int.h" + +enum { + POOL_BATCH_SIZE = 64, +}; + +/** Free list to speed up creation */ +static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); +static unsigned int release_pool_size; +static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); +static __thread unsigned int alloc_pool_size; +static __thread Notifier coroutine_pool_cleanup_notifier; + +static void coroutine_pool_cleanup(Notifier *n, void *value) +{ + Coroutine *co; + Coroutine *tmp; + + QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) { + QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); + qemu_coroutine_delete(co); + } +} + +Coroutine *qemu_coroutine_create(CoroutineEntry *entry) +{ + Coroutine *co = NULL; + + if (CONFIG_COROUTINE_POOL) { + co = QSLIST_FIRST(&alloc_pool); + if (!co) { + if (release_pool_size > POOL_BATCH_SIZE) { + /* Slow path; a good place to register the destructor, too. */ + if (!coroutine_pool_cleanup_notifier.notify) { + coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; + qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier); + } + + /* This is not exact; there could be a little skew between + * release_pool_size and the actual size of release_pool. But + * it is just a heuristic, it does not need to be perfect. + */ + alloc_pool_size = atomic_xchg(&release_pool_size, 0); + QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool); + co = QSLIST_FIRST(&alloc_pool); + } + } + if (co) { + QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); + alloc_pool_size--; + } + } + + if (!co) { + co = qemu_coroutine_new(); + } + + co->entry = entry; + QTAILQ_INIT(&co->co_queue_wakeup); + return co; +} + +static void coroutine_delete(Coroutine *co) +{ + co->caller = NULL; + + if (CONFIG_COROUTINE_POOL) { + if (release_pool_size < POOL_BATCH_SIZE * 2) { + QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); + atomic_inc(&release_pool_size); + return; + } + if (alloc_pool_size < POOL_BATCH_SIZE) { + QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); + alloc_pool_size++; + return; + } + } + + qemu_coroutine_delete(co); +} + +void qemu_coroutine_enter(Coroutine *co, void *opaque) +{ + Coroutine *self = qemu_coroutine_self(); + CoroutineAction ret; + + trace_qemu_coroutine_enter(self, co, opaque); + + if (co->caller) { + fprintf(stderr, "Co-routine re-entered recursively\n"); + abort(); + } + + co->caller = self; + co->entry_arg = opaque; + ret = qemu_coroutine_switch(self, co, COROUTINE_ENTER); + + qemu_co_queue_run_restart(co); + + switch (ret) { + case COROUTINE_YIELD: + return; + case COROUTINE_TERMINATE: + trace_qemu_coroutine_terminate(co); + coroutine_delete(co); + return; + default: + abort(); + } +} + +void coroutine_fn qemu_coroutine_yield(void) +{ + Coroutine *self = qemu_coroutine_self(); + Coroutine *to = self->caller; + + trace_qemu_coroutine_yield(self, to); + + if (!to) { + fprintf(stderr, "Co-routine is yielding to no one\n"); + abort(); + } + + self->caller = NULL; + qemu_coroutine_switch(self, to, COROUTINE_YIELD); +} diff --git a/qemu/util/qemu-error.c b/qemu/util/qemu-error.c index 77ea6c614..1ef35664a 100644 --- a/qemu/util/qemu-error.c +++ b/qemu/util/qemu-error.c @@ -10,7 +10,7 @@ * See the COPYING file in the top-level directory. */ -#include +#include "qemu/osdep.h" #include "monitor/monitor.h" #include "qemu/error-report.h" @@ -200,8 +200,8 @@ static void error_print_loc(void) bool enable_timestamp_msg; /* * Print an error message to current monitor if we have one, else to stderr. - * Format arguments like vsprintf(). The result should not contain - * newlines. + * Format arguments like vsprintf(). The resulting message should be + * a single phrase, with no newline or trailing punctuation. * Prepend the current location and append a newline. * It's wrong to call this in a QMP monitor. Use error_setg() there. */ @@ -210,7 +210,7 @@ void error_vreport(const char *fmt, va_list ap) GTimeVal tv; gchar *timestr; - if (enable_timestamp_msg) { + if (enable_timestamp_msg && !cur_mon) { g_get_current_time(&tv); timestr = g_time_val_to_iso8601(&tv); error_printf("%s ", timestr); @@ -224,8 +224,8 @@ void error_vreport(const char *fmt, va_list ap) /* * Print an error message to current monitor if we have one, else to stderr. - * Format arguments like sprintf(). The result should not contain - * newlines. + * Format arguments like sprintf(). The resulting message should be a + * single phrase, with no newline or trailing punctuation. * Prepend the current location and append a newline. * It's wrong to call this in a QMP monitor. Use error_setg() there. */ diff --git a/qemu/util/qemu-openpty.c b/qemu/util/qemu-openpty.c index 4c5321116..2e8b43bdf 100644 --- a/qemu/util/qemu-openpty.c +++ b/qemu/util/qemu-openpty.c @@ -32,7 +32,7 @@ * linked with -lutil. */ -#include "config-host.h" +#include "qemu/osdep.h" #include "qemu-common.h" #if defined(__GLIBC__) diff --git a/qemu/util/qemu-option.c b/qemu/util/qemu-option.c index efe9d279c..3467dc239 100644 --- a/qemu/util/qemu-option.c +++ b/qemu/util/qemu-option.c @@ -23,15 +23,17 @@ * THE SOFTWARE. */ -#include -#include +#include "qemu/osdep.h" +#include "qapi/error.h" #include "qemu-common.h" #include "qemu/error-report.h" #include "qapi/qmp/types.h" -#include "qapi/error.h" #include "qapi/qmp/qerror.h" #include "qemu/option_int.h" +#include "qemu/cutils.h" +#include "qemu/id.h" +#include "qemu/help_option.h" /* * Extracts the name of an option from the parameter string (p points at the @@ -180,6 +182,11 @@ void parse_option_size(const char *name, const char *value, if (value != NULL) { sizef = strtod(value, &postfix); + if (sizef < 0 || sizef > UINT64_MAX) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, + "a non-negative number below 2^64"); + return; + } switch (*postfix) { case 'T': sizef *= 1024; @@ -200,10 +207,8 @@ void parse_option_size(const char *name, const char *value, break; default: error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a size"); -#if 0 /* conversion from qerror_report() to error_set() broke this: */ - error_printf_unless_qmp("You may use k, M, G or T suffixes for " + error_append_hint(errp, "You may use k, M, G or T suffixes for " "kilobytes, megabytes, gigabytes and terabytes.\n"); -#endif return; } } else { @@ -643,9 +648,8 @@ QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id, if (!id_wellformed(id)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "id", "an identifier"); -#if 0 /* conversion from qerror_report() to error_set() broke this: */ - error_printf_unless_qmp("Identifiers consist of letters, digits, '-', '.', '_', starting with a letter.\n"); -#endif + error_append_hint(errp, "Identifiers consist of letters, digits, " + "'-', '.', '_', starting with a letter.\n"); return NULL; } opts = qemu_opts_find(list, id); @@ -730,14 +734,35 @@ void qemu_opts_del(QemuOpts *opts) g_free(opts); } -void qemu_opts_print(QemuOpts *opts, const char *sep) +/* print value, escaping any commas in value */ +static void escaped_print(const char *value) +{ + const char *ptr; + + for (ptr = value; *ptr; ++ptr) { + if (*ptr == ',') { + putchar(','); + } + putchar(*ptr); + } +} + +void qemu_opts_print(QemuOpts *opts, const char *separator) { QemuOpt *opt; QemuOptDesc *desc = opts->list->desc; + const char *sep = ""; + + if (opts->id) { + printf("id=%s", opts->id); /* passed id_wellformed -> no commas */ + sep = separator; + } if (desc[0].name == NULL) { QTAILQ_FOREACH(opt, &opts->head, next) { - printf("%s%s=\"%s\"", sep, opt->name, opt->str); + printf("%s%s=", sep, opt->name); + escaped_print(opt->str); + sep = separator; } return; } @@ -750,13 +775,15 @@ void qemu_opts_print(QemuOpts *opts, const char *sep) continue; } if (desc->type == QEMU_OPT_STRING) { - printf("%s%s='%s'", sep, desc->name, value); + printf("%s%s=", sep, desc->name); + escaped_print(value); } else if ((desc->type == QEMU_OPT_SIZE || desc->type == QEMU_OPT_NUMBER) && opt) { printf("%s%s=%" PRId64, sep, desc->name, opt->value.uint); } else { printf("%s%s=%s", sep, desc->name, value); } + sep = separator; } } @@ -1081,19 +1108,19 @@ int qemu_opts_foreach(QemuOptsList *list, qemu_opts_loopfunc func, { Location loc; QemuOpts *opts; - int rc; + int rc = 0; loc_push_none(&loc); QTAILQ_FOREACH(opts, &list->head, next) { loc_restore(&opts->loc); rc = func(opaque, opts, errp); if (rc) { - return rc; + break; } assert(!errp || !*errp); } loc_pop(&loc); - return 0; + return rc; } static size_t count_opts_list(QemuOptsList *list) diff --git a/qemu/util/qemu-progress.c b/qemu/util/qemu-progress.c index 4ee5cd07f..f74523376 100644 --- a/qemu/util/qemu-progress.c +++ b/qemu/util/qemu-progress.c @@ -22,9 +22,8 @@ * THE SOFTWARE. */ -#include "qemu-common.h" #include "qemu/osdep.h" -#include +#include "qemu-common.h" struct progress_state { float current; @@ -152,7 +151,8 @@ void qemu_progress_print(float delta, int max) state.current = current; if (current > (state.last_print + state.min_skip) || - (current == 100) || (current == 0)) { + current < (state.last_print - state.min_skip) || + current == 100 || current == 0) { state.last_print = state.current; state.print(); } diff --git a/qemu/util/qemu-sockets.c b/qemu/util/qemu-sockets.c index 2add83a0f..0d536911c 100644 --- a/qemu/util/qemu-sockets.c +++ b/qemu/util/qemu-sockets.c @@ -15,57 +15,25 @@ * Contributions after 2012-01-13 are licensed under the terms of the * GNU GPL, version 2 or (at your option) any later version. */ -#include -#include -#include -#include -#include -#include +#include "qemu/osdep.h" #include "monitor/monitor.h" +#include "qapi/error.h" #include "qemu/sockets.h" #include "qemu/main-loop.h" +#include "qapi/qmp-input-visitor.h" +#include "qapi/qmp-output-visitor.h" +#include "qapi-visit.h" +#include "qemu/cutils.h" #ifndef AI_ADDRCONFIG # define AI_ADDRCONFIG 0 #endif + #ifndef AI_V4MAPPED # define AI_V4MAPPED 0 #endif -/* used temporarily until all users are converted to QemuOpts */ -QemuOptsList socket_optslist = { - .name = "socket", - .head = QTAILQ_HEAD_INITIALIZER(socket_optslist.head), - .desc = { - { - .name = "path", - .type = QEMU_OPT_STRING, - },{ - .name = "host", - .type = QEMU_OPT_STRING, - },{ - .name = "port", - .type = QEMU_OPT_STRING, - },{ - .name = "localaddr", - .type = QEMU_OPT_STRING, - },{ - .name = "localport", - .type = QEMU_OPT_STRING, - },{ - .name = "to", - .type = QEMU_OPT_NUMBER, - },{ - .name = "ipv4", - .type = QEMU_OPT_BOOL, - },{ - .name = "ipv6", - .type = QEMU_OPT_BOOL, - }, - { /* end if list */ } - }, -}; static int inet_getport(struct addrinfo *e) { @@ -111,37 +79,86 @@ NetworkAddressFamily inet_netfamily(int family) return NETWORK_ADDRESS_FAMILY_UNKNOWN; } -int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp) +/* + * Matrix we're trying to apply + * + * ipv4 ipv6 family + * - - PF_UNSPEC + * - f PF_INET + * - t PF_INET6 + * f - PF_INET6 + * f f + * f t PF_INET6 + * t - PF_INET + * t f PF_INET + * t t PF_INET6 + * + * NB, this matrix is only about getting the neccessary results + * from getaddrinfo(). Some of the cases require further work + * after reading results from getaddrinfo in order to fully + * apply the logic the end user wants. eg with the last case + * ipv4=t + ipv6=t + PF_INET6, getaddrinfo alone can only + * guarantee the ipv6=t part of the request - we need more + * checks to provide ipv4=t part of the guarantee. This is + * outside scope of this method and not currently handled by + * callers at all. + */ +static int inet_ai_family_from_address(InetSocketAddress *addr, + Error **errp) +{ + if (addr->has_ipv6 && addr->has_ipv4 && + !addr->ipv6 && !addr->ipv4) { + error_setg(errp, "Cannot disable IPv4 and IPv6 at same time"); + return PF_UNSPEC; + } + if ((addr->has_ipv6 && addr->ipv6) || (addr->has_ipv4 && !addr->ipv4)) { + return PF_INET6; + } + if ((addr->has_ipv4 && addr->ipv4) || (addr->has_ipv6 && !addr->ipv6)) { + return PF_INET; + } + return PF_UNSPEC; +} + +static int inet_listen_saddr(InetSocketAddress *saddr, + int port_offset, + bool update_addr, + Error **errp) { struct addrinfo ai,*res,*e; - const char *addr; char port[33]; char uaddr[INET6_ADDRSTRLEN+1]; char uport[33]; - int slisten, rc, to, port_min, port_max, p; + int slisten, rc, port_min, port_max, p; + Error *err = NULL; memset(&ai,0, sizeof(ai)); ai.ai_flags = AI_PASSIVE; - ai.ai_family = PF_UNSPEC; + ai.ai_family = inet_ai_family_from_address(saddr, &err); ai.ai_socktype = SOCK_STREAM; - if ((qemu_opt_get(opts, "host") == NULL) || - (qemu_opt_get(opts, "port") == NULL)) { - error_setg(errp, "host and/or port not specified"); + if (err) { + error_propagate(errp, err); return -1; } - pstrcpy(port, sizeof(port), qemu_opt_get(opts, "port")); - addr = qemu_opt_get(opts, "host"); - to = qemu_opt_get_number(opts, "to", 0); - if (qemu_opt_get_bool(opts, "ipv4", 0)) - ai.ai_family = PF_INET; - if (qemu_opt_get_bool(opts, "ipv6", 0)) - ai.ai_family = PF_INET6; + if (saddr->host == NULL) { + error_setg(errp, "host not specified"); + return -1; + } + if (saddr->port != NULL) { + pstrcpy(port, sizeof(port), saddr->port); + } else { + port[0] = '\0'; + } /* lookup */ if (port_offset) { unsigned long long baseport; + if (strlen(port) == 0) { + error_setg(errp, "port not specified"); + return -1; + } if (parse_uint_full(port, &baseport, 10) < 0) { error_setg(errp, "can't convert to a number: %s", port); return -1; @@ -153,10 +170,11 @@ int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp) } snprintf(port, sizeof(port), "%d", (int)baseport + port_offset); } - rc = getaddrinfo(strlen(addr) ? addr : NULL, port, &ai, &res); + rc = getaddrinfo(strlen(saddr->host) ? saddr->host : NULL, + strlen(port) ? port : NULL, &ai, &res); if (rc != 0) { - error_setg(errp, "address resolution failed for %s:%s: %s", addr, port, - gai_strerror(rc)); + error_setg(errp, "address resolution failed for %s:%s: %s", + saddr->host, port, gai_strerror(rc)); return -1; } @@ -184,7 +202,7 @@ int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp) #endif port_min = inet_getport(e); - port_max = to ? to + port_offset : port_min; + port_max = saddr->has_to ? saddr->to + port_offset : port_min; for (p = port_min; p <= port_max; p++) { inet_setport(e, p); if (bind(slisten, e->ai_addr, e->ai_addrlen) == 0) { @@ -208,13 +226,15 @@ listen: freeaddrinfo(res); return -1; } - qemu_opt_set(opts, "host", uaddr, &error_abort); - qemu_opt_set_number(opts, "port", inet_getport(e) - port_offset, - &error_abort); - qemu_opt_set_bool(opts, "ipv6", e->ai_family == PF_INET6, - &error_abort); - qemu_opt_set_bool(opts, "ipv4", e->ai_family != PF_INET6, - &error_abort); + if (update_addr) { + g_free(saddr->host); + saddr->host = g_strdup(uaddr); + g_free(saddr->port); + saddr->port = g_strdup_printf("%d", + inet_getport(e) - port_offset); + saddr->has_ipv6 = saddr->ipv6 = e->ai_family == PF_INET6; + saddr->has_ipv4 = saddr->ipv4 = e->ai_family != PF_INET6; + } freeaddrinfo(res); return slisten; } @@ -251,7 +271,7 @@ static void wait_for_connect(void *opaque) do { rc = qemu_getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &val, &valsize); - } while (rc == -1 && socket_error() == EINTR); + } while (rc == -1 && errno == EINTR); /* update rc to contain error */ if (!rc && val) { @@ -313,7 +333,7 @@ static int inet_connect_addr(struct addrinfo *addr, bool *in_progress, do { rc = 0; if (connect(sock, addr->ai_addr, addr->ai_addrlen) < 0) { - rc = -socket_error(); + rc = -errno; } } while (rc == -EINTR); @@ -329,38 +349,50 @@ static int inet_connect_addr(struct addrinfo *addr, bool *in_progress, return sock; } -static struct addrinfo *inet_parse_connect_opts(QemuOpts *opts, Error **errp) +static struct addrinfo *inet_parse_connect_saddr(InetSocketAddress *saddr, + Error **errp) { struct addrinfo ai, *res; int rc; - const char *addr; - const char *port; + Error *err = NULL; + static int useV4Mapped = 1; memset(&ai, 0, sizeof(ai)); - ai.ai_flags = AI_CANONNAME | AI_V4MAPPED | AI_ADDRCONFIG; - ai.ai_family = PF_UNSPEC; + ai.ai_flags = AI_CANONNAME | AI_ADDRCONFIG; + if (atomic_read(&useV4Mapped)) { + ai.ai_flags |= AI_V4MAPPED; + } + ai.ai_family = inet_ai_family_from_address(saddr, &err); ai.ai_socktype = SOCK_STREAM; - addr = qemu_opt_get(opts, "host"); - port = qemu_opt_get(opts, "port"); - if (addr == NULL || port == NULL) { - error_setg(errp, "host and/or port not specified"); + if (err) { + error_propagate(errp, err); return NULL; } - if (qemu_opt_get_bool(opts, "ipv4", 0)) { - ai.ai_family = PF_INET; - } - if (qemu_opt_get_bool(opts, "ipv6", 0)) { - ai.ai_family = PF_INET6; + if (saddr->host == NULL || saddr->port == NULL) { + error_setg(errp, "host and/or port not specified"); + return NULL; } /* lookup */ - rc = getaddrinfo(addr, port, &ai, &res); + rc = getaddrinfo(saddr->host, saddr->port, &ai, &res); + + /* At least FreeBSD and OS-X 10.6 declare AI_V4MAPPED but + * then don't implement it in their getaddrinfo(). Detect + * this and retry without the flag since that's preferrable + * to a fatal error + */ + if (rc == EAI_BADFLAGS && + (ai.ai_flags & AI_V4MAPPED)) { + atomic_set(&useV4Mapped, 0); + ai.ai_flags &= ~AI_V4MAPPED; + rc = getaddrinfo(saddr->host, saddr->port, &ai, &res); + } if (rc != 0) { - error_setg(errp, "address resolution failed for %s:%s: %s", addr, port, - gai_strerror(rc)); + error_setg(errp, "address resolution failed for %s:%s: %s", + saddr->host, saddr->port, gai_strerror(rc)); return NULL; } return res; @@ -369,8 +401,7 @@ static struct addrinfo *inet_parse_connect_opts(QemuOpts *opts, Error **errp) /** * Create a socket and connect it to an address. * - * @opts: QEMU options, recognized parameters strings "host" and "port", - * bools "ipv4" and "ipv6". + * @saddr: Inet socket address specification * @errp: set on error * @callback: callback function for non-blocking connect * @opaque: opaque for callback function @@ -381,8 +412,8 @@ static struct addrinfo *inet_parse_connect_opts(QemuOpts *opts, Error **errp) * function succeeds, callback will be called when the connection * completes, with the file descriptor on success, or -1 on error. */ -int inet_connect_opts(QemuOpts *opts, Error **errp, - NonBlockingConnectHandler *callback, void *opaque) +static int inet_connect_saddr(InetSocketAddress *saddr, Error **errp, + NonBlockingConnectHandler *callback, void *opaque) { Error *local_err = NULL; struct addrinfo *res, *e; @@ -390,7 +421,7 @@ int inet_connect_opts(QemuOpts *opts, Error **errp, bool in_progress; ConnectState *connect_state = NULL; - res = inet_parse_connect_opts(opts, errp); + res = inet_parse_connect_saddr(saddr, errp); if (!res) { return -1; } @@ -429,38 +460,41 @@ int inet_connect_opts(QemuOpts *opts, Error **errp, return sock; } -int inet_dgram_opts(QemuOpts *opts, Error **errp) +static int inet_dgram_saddr(InetSocketAddress *sraddr, + InetSocketAddress *sladdr, + Error **errp) { struct addrinfo ai, *peer = NULL, *local = NULL; const char *addr; const char *port; int sock = -1, rc; + Error *err = NULL; /* lookup peer addr */ memset(&ai,0, sizeof(ai)); ai.ai_flags = AI_CANONNAME | AI_V4MAPPED | AI_ADDRCONFIG; - ai.ai_family = PF_UNSPEC; + ai.ai_family = inet_ai_family_from_address(sraddr, &err); ai.ai_socktype = SOCK_DGRAM; - addr = qemu_opt_get(opts, "host"); - port = qemu_opt_get(opts, "port"); + if (err) { + error_propagate(errp, err); + goto err; + } + + addr = sraddr->host; + port = sraddr->port; if (addr == NULL || strlen(addr) == 0) { addr = "localhost"; } if (port == NULL || strlen(port) == 0) { error_setg(errp, "remote port not specified"); - return -1; + goto err; } - if (qemu_opt_get_bool(opts, "ipv4", 0)) - ai.ai_family = PF_INET; - if (qemu_opt_get_bool(opts, "ipv6", 0)) - ai.ai_family = PF_INET6; - if (0 != (rc = getaddrinfo(addr, port, &ai, &peer))) { error_setg(errp, "address resolution failed for %s:%s: %s", addr, port, gai_strerror(rc)); - return -1; + goto err; } /* lookup local addr */ @@ -469,13 +503,19 @@ int inet_dgram_opts(QemuOpts *opts, Error **errp) ai.ai_family = peer->ai_family; ai.ai_socktype = SOCK_DGRAM; - addr = qemu_opt_get(opts, "localaddr"); - port = qemu_opt_get(opts, "localport"); - if (addr == NULL || strlen(addr) == 0) { + if (sladdr) { + addr = sladdr->host; + port = sladdr->port; + if (addr == NULL || strlen(addr) == 0) { + addr = NULL; + } + if (!port || strlen(port) == 0) { + port = "0"; + } + } else { addr = NULL; - } - if (!port || strlen(port) == 0) port = "0"; + } if (0 != (rc = getaddrinfo(addr, port, &ai, &local))) { error_setg(errp, "address resolution failed for %s:%s: %s", addr, port, @@ -584,51 +624,31 @@ fail: return NULL; } -static void inet_addr_to_opts(QemuOpts *opts, const InetSocketAddress *addr) -{ - bool ipv4 = addr->ipv4 || !addr->has_ipv4; - bool ipv6 = addr->ipv6 || !addr->has_ipv6; - - if (!ipv4 || !ipv6) { - qemu_opt_set_bool(opts, "ipv4", ipv4, &error_abort); - qemu_opt_set_bool(opts, "ipv6", ipv6, &error_abort); - } - if (addr->has_to) { - qemu_opt_set_number(opts, "to", addr->to, &error_abort); - } - qemu_opt_set(opts, "host", addr->host, &error_abort); - qemu_opt_set(opts, "port", addr->port, &error_abort); -} - int inet_listen(const char *str, char *ostr, int olen, int socktype, int port_offset, Error **errp) { - QemuOpts *opts; char *optstr; int sock = -1; InetSocketAddress *addr; addr = inet_parse(str, errp); if (addr != NULL) { - opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); - inet_addr_to_opts(opts, addr); - qapi_free_InetSocketAddress(addr); - sock = inet_listen_opts(opts, port_offset, errp); + sock = inet_listen_saddr(addr, port_offset, true, errp); if (sock != -1 && ostr) { optstr = strchr(str, ','); - if (qemu_opt_get_bool(opts, "ipv6", 0)) { + if (addr->ipv6) { snprintf(ostr, olen, "[%s]:%s%s", - qemu_opt_get(opts, "host"), - qemu_opt_get(opts, "port"), + addr->host, + addr->port, optstr ? optstr : ""); } else { snprintf(ostr, olen, "%s:%s%s", - qemu_opt_get(opts, "host"), - qemu_opt_get(opts, "port"), + addr->host, + addr->port, optstr ? optstr : ""); } } - qemu_opts_del(opts); + qapi_free_InetSocketAddress(addr); } return sock; } @@ -643,17 +663,13 @@ int inet_listen(const char *str, char *ostr, int olen, **/ int inet_connect(const char *str, Error **errp) { - QemuOpts *opts; int sock = -1; InetSocketAddress *addr; addr = inet_parse(str, errp); if (addr != NULL) { - opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); - inet_addr_to_opts(opts, addr); + sock = inet_connect_saddr(addr, errp, NULL, NULL); qapi_free_InetSocketAddress(addr); - sock = inet_connect_opts(opts, errp, NULL, NULL); - qemu_opts_del(opts); } return sock; } @@ -675,7 +691,6 @@ int inet_nonblocking_connect(const char *str, NonBlockingConnectHandler *callback, void *opaque, Error **errp) { - QemuOpts *opts; int sock = -1; InetSocketAddress *addr; @@ -683,21 +698,19 @@ int inet_nonblocking_connect(const char *str, addr = inet_parse(str, errp); if (addr != NULL) { - opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); - inet_addr_to_opts(opts, addr); + sock = inet_connect_saddr(addr, errp, callback, opaque); qapi_free_InetSocketAddress(addr); - sock = inet_connect_opts(opts, errp, callback, opaque); - qemu_opts_del(opts); } return sock; } #ifndef _WIN32 -int unix_listen_opts(QemuOpts *opts, Error **errp) +static int unix_listen_saddr(UnixSocketAddress *saddr, + bool update_addr, + Error **errp) { struct sockaddr_un un; - const char *path = qemu_opt_get(opts, "path"); int sock, fd; sock = qemu_socket(PF_UNIX, SOCK_STREAM, 0); @@ -708,8 +721,8 @@ int unix_listen_opts(QemuOpts *opts, Error **errp) memset(&un, 0, sizeof(un)); un.sun_family = AF_UNIX; - if (path && strlen(path)) { - snprintf(un.sun_path, sizeof(un.sun_path), "%s", path); + if (saddr->path && strlen(saddr->path)) { + snprintf(un.sun_path, sizeof(un.sun_path), "%s", saddr->path); } else { const char *tmpdir = getenv("TMPDIR"); tmpdir = tmpdir ? tmpdir : "/tmp"; @@ -734,11 +747,13 @@ int unix_listen_opts(QemuOpts *opts, Error **errp) goto err; } close(fd); - qemu_opt_set(opts, "path", un.sun_path, &error_abort); + if (update_addr) { + g_free(saddr->path); + saddr->path = g_strdup(un.sun_path); + } } - if ((access(un.sun_path, F_OK) == 0) && - unlink(un.sun_path) < 0) { + if (unlink(un.sun_path) < 0 && errno != ENOENT) { error_setg_errno(errp, errno, "Failed to unlink socket %s", un.sun_path); goto err; @@ -759,15 +774,14 @@ err: return -1; } -int unix_connect_opts(QemuOpts *opts, Error **errp, - NonBlockingConnectHandler *callback, void *opaque) +static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp, + NonBlockingConnectHandler *callback, void *opaque) { struct sockaddr_un un; - const char *path = qemu_opt_get(opts, "path"); ConnectState *connect_state = NULL; int sock, rc; - if (path == NULL) { + if (saddr->path == NULL) { error_setg(errp, "unix connect: no path specified"); return -1; } @@ -786,13 +800,13 @@ int unix_connect_opts(QemuOpts *opts, Error **errp, memset(&un, 0, sizeof(un)); un.sun_family = AF_UNIX; - snprintf(un.sun_path, sizeof(un.sun_path), "%s", path); + snprintf(un.sun_path, sizeof(un.sun_path), "%s", saddr->path); /* connect to peer */ do { rc = 0; if (connect(sock, (struct sockaddr *) &un, sizeof(un)) < 0) { - rc = -socket_error(); + rc = -errno; } } while (rc == -EINTR); @@ -819,15 +833,17 @@ int unix_connect_opts(QemuOpts *opts, Error **errp, #else -int unix_listen_opts(QemuOpts *opts, Error **errp) +static int unix_listen_saddr(UnixSocketAddress *saddr, + bool update_addr, + Error **errp) { error_setg(errp, "unix sockets are not available on windows"); errno = ENOTSUP; return -1; } -int unix_connect_opts(QemuOpts *opts, Error **errp, - NonBlockingConnectHandler *callback, void *opaque) +static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp, + NonBlockingConnectHandler *callback, void *opaque) { error_setg(errp, "unix sockets are not available on windows"); errno = ENOTSUP; @@ -838,11 +854,11 @@ int unix_connect_opts(QemuOpts *opts, Error **errp, /* compatibility wrapper */ int unix_listen(const char *str, char *ostr, int olen, Error **errp) { - QemuOpts *opts; char *path, *optstr; int sock, len; + UnixSocketAddress *saddr; - opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); + saddr = g_new0(UnixSocketAddress, 1); optstr = strchr(str, ','); if (optstr) { @@ -850,30 +866,29 @@ int unix_listen(const char *str, char *ostr, int olen, Error **errp) if (len) { path = g_malloc(len+1); snprintf(path, len+1, "%.*s", len, str); - qemu_opt_set(opts, "path", path, &error_abort); - g_free(path); + saddr->path = path; } } else { - qemu_opt_set(opts, "path", str, &error_abort); + saddr->path = g_strdup(str); } - sock = unix_listen_opts(opts, errp); + sock = unix_listen_saddr(saddr, true, errp); if (sock != -1 && ostr) - snprintf(ostr, olen, "%s%s", qemu_opt_get(opts, "path"), optstr ? optstr : ""); - qemu_opts_del(opts); + snprintf(ostr, olen, "%s%s", saddr->path, optstr ? optstr : ""); + qapi_free_UnixSocketAddress(saddr); return sock; } int unix_connect(const char *path, Error **errp) { - QemuOpts *opts; + UnixSocketAddress *saddr; int sock; - opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); - qemu_opt_set(opts, "path", path, &error_abort); - sock = unix_connect_opts(opts, errp, NULL, NULL); - qemu_opts_del(opts); + saddr = g_new0(UnixSocketAddress, 1); + saddr->path = g_strdup(path); + sock = unix_connect_saddr(saddr, errp, NULL, NULL); + qapi_free_UnixSocketAddress(saddr); return sock; } @@ -882,15 +897,15 @@ int unix_nonblocking_connect(const char *path, NonBlockingConnectHandler *callback, void *opaque, Error **errp) { - QemuOpts *opts; + UnixSocketAddress *saddr; int sock = -1; g_assert(callback != NULL); - opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); - qemu_opt_set(opts, "path", path, &error_abort); - sock = unix_connect_opts(opts, errp, callback, opaque); - qemu_opts_del(opts); + saddr = g_new0(UnixSocketAddress, 1); + saddr->path = g_strdup(path); + sock = unix_connect_saddr(saddr, errp, callback, opaque); + qapi_free_UnixSocketAddress(saddr); return sock; } @@ -904,23 +919,23 @@ SocketAddress *socket_parse(const char *str, Error **errp) error_setg(errp, "invalid Unix socket address"); goto fail; } else { - addr->kind = SOCKET_ADDRESS_KIND_UNIX; - addr->q_unix = g_new(UnixSocketAddress, 1); - addr->q_unix->path = g_strdup(str + 5); + addr->type = SOCKET_ADDRESS_KIND_UNIX; + addr->u.q_unix.data = g_new(UnixSocketAddress, 1); + addr->u.q_unix.data->path = g_strdup(str + 5); } } else if (strstart(str, "fd:", NULL)) { if (str[3] == '\0') { error_setg(errp, "invalid file descriptor address"); goto fail; } else { - addr->kind = SOCKET_ADDRESS_KIND_FD; - addr->fd = g_new(String, 1); - addr->fd->str = g_strdup(str + 3); + addr->type = SOCKET_ADDRESS_KIND_FD; + addr->u.fd.data = g_new(String, 1); + addr->u.fd.data->str = g_strdup(str + 3); } } else { - addr->kind = SOCKET_ADDRESS_KIND_INET; - addr->inet = inet_parse(str, errp); - if (addr->inet == NULL) { + addr->type = SOCKET_ADDRESS_KIND_INET; + addr->u.inet.data = inet_parse(str, errp); + if (addr->u.inet.data == NULL) { goto fail; } } @@ -934,23 +949,19 @@ fail: int socket_connect(SocketAddress *addr, Error **errp, NonBlockingConnectHandler *callback, void *opaque) { - QemuOpts *opts; int fd; - opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); - switch (addr->kind) { + switch (addr->type) { case SOCKET_ADDRESS_KIND_INET: - inet_addr_to_opts(opts, addr->inet); - fd = inet_connect_opts(opts, errp, callback, opaque); + fd = inet_connect_saddr(addr->u.inet.data, errp, callback, opaque); break; case SOCKET_ADDRESS_KIND_UNIX: - qemu_opt_set(opts, "path", addr->q_unix->path, &error_abort); - fd = unix_connect_opts(opts, errp, callback, opaque); + fd = unix_connect_saddr(addr->u.q_unix.data, errp, callback, opaque); break; case SOCKET_ADDRESS_KIND_FD: - fd = monitor_get_fd(cur_mon, addr->fd->str, errp); + fd = monitor_get_fd(cur_mon, addr->u.fd.data->str, errp); if (fd >= 0 && callback) { qemu_set_nonblock(fd); callback(fd, NULL, opaque); @@ -960,58 +971,183 @@ int socket_connect(SocketAddress *addr, Error **errp, default: abort(); } - qemu_opts_del(opts); return fd; } int socket_listen(SocketAddress *addr, Error **errp) { - QemuOpts *opts; int fd; - opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); - switch (addr->kind) { + switch (addr->type) { case SOCKET_ADDRESS_KIND_INET: - inet_addr_to_opts(opts, addr->inet); - fd = inet_listen_opts(opts, 0, errp); + fd = inet_listen_saddr(addr->u.inet.data, 0, false, errp); break; case SOCKET_ADDRESS_KIND_UNIX: - qemu_opt_set(opts, "path", addr->q_unix->path, &error_abort); - fd = unix_listen_opts(opts, errp); + fd = unix_listen_saddr(addr->u.q_unix.data, false, errp); break; case SOCKET_ADDRESS_KIND_FD: - fd = monitor_get_fd(cur_mon, addr->fd->str, errp); + fd = monitor_get_fd(cur_mon, addr->u.fd.data->str, errp); break; default: abort(); } - qemu_opts_del(opts); return fd; } int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp) { - QemuOpts *opts; int fd; - opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); - switch (remote->kind) { + switch (remote->type) { case SOCKET_ADDRESS_KIND_INET: - inet_addr_to_opts(opts, remote->inet); - if (local) { - qemu_opt_set(opts, "localaddr", local->inet->host, &error_abort); - qemu_opt_set(opts, "localport", local->inet->port, &error_abort); - } - fd = inet_dgram_opts(opts, errp); + fd = inet_dgram_saddr(remote->u.inet.data, + local ? local->u.inet.data : NULL, errp); break; default: error_setg(errp, "socket type unsupported for datagram"); fd = -1; } - qemu_opts_del(opts); return fd; } + + +static SocketAddress * +socket_sockaddr_to_address_inet(struct sockaddr_storage *sa, + socklen_t salen, + Error **errp) +{ + char host[NI_MAXHOST]; + char serv[NI_MAXSERV]; + SocketAddress *addr; + InetSocketAddress *inet; + int ret; + + ret = getnameinfo((struct sockaddr *)sa, salen, + host, sizeof(host), + serv, sizeof(serv), + NI_NUMERICHOST | NI_NUMERICSERV); + if (ret != 0) { + error_setg(errp, "Cannot format numeric socket address: %s", + gai_strerror(ret)); + return NULL; + } + + addr = g_new0(SocketAddress, 1); + addr->type = SOCKET_ADDRESS_KIND_INET; + inet = addr->u.inet.data = g_new0(InetSocketAddress, 1); + inet->host = g_strdup(host); + inet->port = g_strdup(serv); + if (sa->ss_family == AF_INET) { + inet->has_ipv4 = inet->ipv4 = true; + } else { + inet->has_ipv6 = inet->ipv6 = true; + } + + return addr; +} + + +#ifndef WIN32 +static SocketAddress * +socket_sockaddr_to_address_unix(struct sockaddr_storage *sa, + socklen_t salen, + Error **errp) +{ + SocketAddress *addr; + struct sockaddr_un *su = (struct sockaddr_un *)sa; + + addr = g_new0(SocketAddress, 1); + addr->type = SOCKET_ADDRESS_KIND_UNIX; + addr->u.q_unix.data = g_new0(UnixSocketAddress, 1); + if (su->sun_path[0]) { + addr->u.q_unix.data->path = g_strndup(su->sun_path, + sizeof(su->sun_path)); + } + + return addr; +} +#endif /* WIN32 */ + +SocketAddress * +socket_sockaddr_to_address(struct sockaddr_storage *sa, + socklen_t salen, + Error **errp) +{ + switch (sa->ss_family) { + case AF_INET: + case AF_INET6: + return socket_sockaddr_to_address_inet(sa, salen, errp); + +#ifndef WIN32 + case AF_UNIX: + return socket_sockaddr_to_address_unix(sa, salen, errp); +#endif /* WIN32 */ + + default: + error_setg(errp, "socket family %d unsupported", + sa->ss_family); + return NULL; + } + return 0; +} + + +SocketAddress *socket_local_address(int fd, Error **errp) +{ + struct sockaddr_storage ss; + socklen_t sslen = sizeof(ss); + + if (getsockname(fd, (struct sockaddr *)&ss, &sslen) < 0) { + error_setg_errno(errp, errno, "%s", + "Unable to query local socket address"); + return NULL; + } + + return socket_sockaddr_to_address(&ss, sslen, errp); +} + + +SocketAddress *socket_remote_address(int fd, Error **errp) +{ + struct sockaddr_storage ss; + socklen_t sslen = sizeof(ss); + + if (getpeername(fd, (struct sockaddr *)&ss, &sslen) < 0) { + error_setg_errno(errp, errno, "%s", + "Unable to query remote socket address"); + return NULL; + } + + return socket_sockaddr_to_address(&ss, sslen, errp); +} + + +void qapi_copy_SocketAddress(SocketAddress **p_dest, + SocketAddress *src) +{ + QmpOutputVisitor *qov; + QmpInputVisitor *qiv; + Visitor *ov, *iv; + QObject *obj; + + *p_dest = NULL; + + qov = qmp_output_visitor_new(); + ov = qmp_output_get_visitor(qov); + visit_type_SocketAddress(ov, NULL, &src, &error_abort); + obj = qmp_output_get_qobject(qov); + qmp_output_visitor_cleanup(qov); + if (!obj) { + return; + } + + qiv = qmp_input_visitor_new(obj); + iv = qmp_input_get_visitor(qiv); + visit_type_SocketAddress(iv, NULL, p_dest, &error_abort); + qmp_input_visitor_cleanup(qiv); + qobject_decref(obj); +} diff --git a/qemu/util/qemu-thread-posix.c b/qemu/util/qemu-thread-posix.c index ba67cec62..74a3023f3 100644 --- a/qemu/util/qemu-thread-posix.c +++ b/qemu/util/qemu-thread-posix.c @@ -10,16 +10,7 @@ * See the COPYING file in the top-level directory. * */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "qemu/osdep.h" #ifdef __linux__ #include #include @@ -298,7 +289,16 @@ static inline void futex_wake(QemuEvent *ev, int n) static inline void futex_wait(QemuEvent *ev, unsigned val) { - futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0); + while (futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0)) { + switch (errno) { + case EWOULDBLOCK: + return; + case EINTR: + break; /* get out of switch and retry */ + default: + abort(); + } + } } #else static inline void futex_wake(QemuEvent *ev, int n) @@ -389,7 +389,7 @@ void qemu_event_wait(QemuEvent *ev) /* * Leave the event reset and tell qemu_event_set that there * are waiters. No need to retry, because there cannot be - * a concurent busy->free transition. After the CAS, the + * a concurrent busy->free transition. After the CAS, the * event will be either set or busy. */ if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { diff --git a/qemu/util/qemu-thread-win32.c b/qemu/util/qemu-thread-win32.c index 406b52f91..98a5ddff8 100644 --- a/qemu/util/qemu-thread-win32.c +++ b/qemu/util/qemu-thread-win32.c @@ -10,12 +10,11 @@ * See the COPYING file in the top-level directory. * */ +#include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/thread.h" #include "qemu/notify.h" #include -#include -#include static bool name_threads; @@ -238,10 +237,34 @@ void qemu_sem_wait(QemuSemaphore *sem) } } +/* Wrap a Win32 manual-reset event with a fast userspace path. The idea + * is to reset the Win32 event lazily, as part of a test-reset-test-wait + * sequence. Such a sequence is, indeed, how QemuEvents are used by + * RCU and other subsystems! + * + * Valid transitions: + * - free->set, when setting the event + * - busy->set, when setting the event, followed by futex_wake + * - set->free, when resetting the event + * - free->busy, when waiting + * + * set->busy does not happen (it can be observed from the outside but + * it really is set->free->busy). + * + * busy->free provably cannot happen; to enforce it, the set->free transition + * is done with an OR, which becomes a no-op if the event has concurrently + * transitioned to free or busy (and is faster than cmpxchg). + */ + +#define EV_SET 0 +#define EV_FREE 1 +#define EV_BUSY -1 + void qemu_event_init(QemuEvent *ev, bool init) { /* Manual reset. */ - ev->event = CreateEvent(NULL, TRUE, init, NULL); + ev->event = CreateEvent(NULL, TRUE, TRUE, NULL); + ev->value = (init ? EV_SET : EV_FREE); } void qemu_event_destroy(QemuEvent *ev) @@ -251,17 +274,51 @@ void qemu_event_destroy(QemuEvent *ev) void qemu_event_set(QemuEvent *ev) { - SetEvent(ev->event); + if (atomic_mb_read(&ev->value) != EV_SET) { + if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) { + /* There were waiters, wake them up. */ + SetEvent(ev->event); + } + } } void qemu_event_reset(QemuEvent *ev) { - ResetEvent(ev->event); + if (atomic_mb_read(&ev->value) == EV_SET) { + /* If there was a concurrent reset (or even reset+wait), + * do nothing. Otherwise change EV_SET->EV_FREE. + */ + atomic_or(&ev->value, EV_FREE); + } } void qemu_event_wait(QemuEvent *ev) { - WaitForSingleObject(ev->event, INFINITE); + unsigned value; + + value = atomic_mb_read(&ev->value); + if (value != EV_SET) { + if (value == EV_FREE) { + /* qemu_event_set is not yet going to call SetEvent, but we are + * going to do another check for EV_SET below when setting EV_BUSY. + * At that point it is safe to call WaitForSingleObject. + */ + ResetEvent(ev->event); + + /* Tell qemu_event_set that there are waiters. No need to retry + * because there cannot be a concurent busy->free transition. + * After the CAS, the event will be either set or busy. + */ + if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { + value = EV_SET; + } else { + value = EV_BUSY; + } + } + if (value == EV_BUSY) { + WaitForSingleObject(ev->event, INFINITE); + } + } } struct QemuThreadData { diff --git a/qemu/util/qemu-timer-common.c b/qemu/util/qemu-timer-common.c index 95e0847c7..06d084d36 100644 --- a/qemu/util/qemu-timer-common.c +++ b/qemu/util/qemu-timer-common.c @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "qemu/timer.h" /***********************************************************/ diff --git a/qemu/util/rcu.c b/qemu/util/rcu.c index cdcad678b..bceb3e472 100644 --- a/qemu/util/rcu.c +++ b/qemu/util/rcu.c @@ -26,12 +26,8 @@ * IBM's contributions to this file may be relicensed under LGPLv2 or later. */ +#include "qemu/osdep.h" #include "qemu-common.h" -#include -#include -#include -#include -#include #include "qemu/rcu.h" #include "qemu/atomic.h" #include "qemu/thread.h" @@ -47,7 +43,8 @@ unsigned long rcu_gp_ctr = RCU_GP_LOCKED; QemuEvent rcu_gp_event; -static QemuMutex rcu_gp_lock; +static QemuMutex rcu_registry_lock; +static QemuMutex rcu_sync_lock; /* * Check whether a quiescent state was crossed between the beginning of @@ -66,7 +63,7 @@ static inline int rcu_gp_ongoing(unsigned long *ctr) */ __thread struct rcu_reader_data rcu_reader; -/* Protected by rcu_gp_lock. */ +/* Protected by rcu_registry_lock. */ typedef QLIST_HEAD(, rcu_reader_data) ThreadList; static ThreadList registry = QLIST_HEAD_INITIALIZER(registry); @@ -114,10 +111,26 @@ static void wait_for_readers(void) break; } - /* Wait for one thread to report a quiescent state and - * try again. + /* Wait for one thread to report a quiescent state and try again. + * Release rcu_registry_lock, so rcu_(un)register_thread() doesn't + * wait too much time. + * + * rcu_register_thread() may add nodes to ®istry; it will not + * wake up synchronize_rcu, but that is okay because at least another + * thread must exit its RCU read-side critical section before + * synchronize_rcu is done. The next iteration of the loop will + * move the new thread's rcu_reader from ®istry to &qsreaders, + * because rcu_gp_ongoing() will return false. + * + * rcu_unregister_thread() may remove nodes from &qsreaders instead + * of ®istry if it runs during qemu_event_wait. That's okay; + * the node then will not be added back to ®istry by QLIST_SWAP + * below. The invariant is that the node is part of one list when + * rcu_registry_lock is released. */ + qemu_mutex_unlock(&rcu_registry_lock); qemu_event_wait(&rcu_gp_event); + qemu_mutex_lock(&rcu_registry_lock); } /* put back the reader list in the registry */ @@ -126,7 +139,8 @@ static void wait_for_readers(void) void synchronize_rcu(void) { - qemu_mutex_lock(&rcu_gp_lock); + qemu_mutex_lock(&rcu_sync_lock); + qemu_mutex_lock(&rcu_registry_lock); if (!QLIST_EMPTY(®istry)) { /* In either case, the atomic_mb_set below blocks stores that free @@ -149,7 +163,8 @@ void synchronize_rcu(void) wait_for_readers(); } - qemu_mutex_unlock(&rcu_gp_lock); + qemu_mutex_unlock(&rcu_registry_lock); + qemu_mutex_unlock(&rcu_sync_lock); } @@ -273,23 +288,24 @@ void call_rcu1(struct rcu_head *node, void (*func)(struct rcu_head *node)) void rcu_register_thread(void) { assert(rcu_reader.ctr == 0); - qemu_mutex_lock(&rcu_gp_lock); + qemu_mutex_lock(&rcu_registry_lock); QLIST_INSERT_HEAD(®istry, &rcu_reader, node); - qemu_mutex_unlock(&rcu_gp_lock); + qemu_mutex_unlock(&rcu_registry_lock); } void rcu_unregister_thread(void) { - qemu_mutex_lock(&rcu_gp_lock); + qemu_mutex_lock(&rcu_registry_lock); QLIST_REMOVE(&rcu_reader, node); - qemu_mutex_unlock(&rcu_gp_lock); + qemu_mutex_unlock(&rcu_registry_lock); } static void rcu_init_complete(void) { QemuThread thread; - qemu_mutex_init(&rcu_gp_lock); + qemu_mutex_init(&rcu_registry_lock); + qemu_mutex_init(&rcu_sync_lock); qemu_event_init(&rcu_gp_event, true); qemu_event_init(&rcu_call_ready_event, false); @@ -306,12 +322,14 @@ static void rcu_init_complete(void) #ifdef CONFIG_POSIX static void rcu_init_lock(void) { - qemu_mutex_lock(&rcu_gp_lock); + qemu_mutex_lock(&rcu_sync_lock); + qemu_mutex_lock(&rcu_registry_lock); } static void rcu_init_unlock(void) { - qemu_mutex_unlock(&rcu_gp_lock); + qemu_mutex_unlock(&rcu_registry_lock); + qemu_mutex_unlock(&rcu_sync_lock); } #endif diff --git a/qemu/util/readline.c b/qemu/util/readline.c index cc1302ac0..bbdee790b 100644 --- a/qemu/util/readline.c +++ b/qemu/util/readline.c @@ -22,8 +22,10 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/readline.h" +#include "qemu/cutils.h" #define IS_NORM 0 #define IS_ESC 1 diff --git a/qemu/util/rfifolock.c b/qemu/util/rfifolock.c index afbf7488d..c22f5feee 100644 --- a/qemu/util/rfifolock.c +++ b/qemu/util/rfifolock.c @@ -11,7 +11,7 @@ * */ -#include +#include "qemu/osdep.h" #include "qemu/rfifolock.h" void rfifolock_init(RFifoLock *r, void (*cb)(void *), void *opaque) diff --git a/qemu/util/throttle.c b/qemu/util/throttle.c index 706c13111..71246b234 100644 --- a/qemu/util/throttle.c +++ b/qemu/util/throttle.c @@ -22,6 +22,8 @@ * along with this program; if not, see . */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "qemu/throttle.h" #include "qemu/timer.h" #include "block/aio.h" @@ -40,6 +42,14 @@ void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns) /* make the bucket leak */ bkt->level = MAX(bkt->level - leak, 0); + + /* if we allow bursts for more than one second we also need to + * keep track of bkt->burst_level so the bkt->max goal per second + * is attained */ + if (bkt->burst_length > 1) { + leak = (bkt->max * (double) delta_ns) / NANOSECONDS_PER_SECOND; + bkt->burst_level = MAX(bkt->burst_level - leak, 0); + } } /* Calculate the time delta since last leak and make proportionals leaks @@ -90,13 +100,24 @@ int64_t throttle_compute_wait(LeakyBucket *bkt) return 0; } - extra = bkt->level - bkt->max; + /* If the bucket is full then we have to wait */ + extra = bkt->level - bkt->max * bkt->burst_length; + if (extra > 0) { + return throttle_do_compute_wait(bkt->avg, extra); + } - if (extra <= 0) { - return 0; + /* If the bucket is not full yet we have to make sure that we + * fulfill the goal of bkt->max units per second. */ + if (bkt->burst_length > 1) { + /* We use 1/10 of the max value to smooth the throttling. + * See throttle_fix_bucket() for more details. */ + extra = bkt->burst_level - bkt->max / 10; + if (extra > 0) { + return throttle_do_compute_wait(bkt->max, extra); + } } - return throttle_do_compute_wait(bkt->avg, extra); + return 0; } /* This function compute the time that must be waited while this IO @@ -136,10 +157,10 @@ static int64_t throttle_compute_wait_for(ThrottleState *ts, * @next_timestamp: the resulting timer * @ret: true if a timer must be set */ -bool throttle_compute_timer(ThrottleState *ts, - bool is_write, - int64_t now, - int64_t *next_timestamp) +static bool throttle_compute_timer(ThrottleState *ts, + bool is_write, + int64_t now, + int64_t *next_timestamp) { int64_t wait; @@ -170,10 +191,24 @@ void throttle_timers_attach_aio_context(ThrottleTimers *tt, tt->write_timer_cb, tt->timer_opaque); } +/* + * Initialize the ThrottleConfig structure to a valid state + * @cfg: the config to initialize + */ +void throttle_config_init(ThrottleConfig *cfg) +{ + unsigned i; + memset(cfg, 0, sizeof(*cfg)); + for (i = 0; i < BUCKETS_COUNT; i++) { + cfg->buckets[i].burst_length = 1; + } +} + /* To be called first on the ThrottleState */ void throttle_init(ThrottleState *ts) { memset(ts, 0, sizeof(ThrottleState)); + throttle_config_init(&ts->cfg); } /* To be called first on the ThrottleTimers */ @@ -247,13 +282,14 @@ bool throttle_enabled(ThrottleConfig *cfg) return false; } -/* return true if any two throttling parameters conflicts - * +/* check if a throttling configuration is valid * @cfg: the throttling configuration to inspect - * @ret: true if any conflict detected else false + * @ret: true if valid else false + * @errp: error object */ -bool throttle_conflicting(ThrottleConfig *cfg) +bool throttle_is_valid(ThrottleConfig *cfg, Error **errp) { + int i; bool bps_flag, ops_flag; bool bps_max_flag, ops_max_flag; @@ -273,31 +309,40 @@ bool throttle_conflicting(ThrottleConfig *cfg) (cfg->buckets[THROTTLE_OPS_READ].max || cfg->buckets[THROTTLE_OPS_WRITE].max); - return bps_flag || ops_flag || bps_max_flag || ops_max_flag; -} - -/* check if a throttling configuration is valid - * @cfg: the throttling configuration to inspect - * @ret: true if valid else false - */ -bool throttle_is_valid(ThrottleConfig *cfg) -{ - bool invalid = false; - int i; + if (bps_flag || ops_flag || bps_max_flag || ops_max_flag) { + error_setg(errp, "bps/iops/max total values and read/write values" + " cannot be used at the same time"); + return false; + } for (i = 0; i < BUCKETS_COUNT; i++) { - if (cfg->buckets[i].avg < 0) { - invalid = true; + if (cfg->buckets[i].avg < 0 || + cfg->buckets[i].max < 0 || + cfg->buckets[i].avg > THROTTLE_VALUE_MAX || + cfg->buckets[i].max > THROTTLE_VALUE_MAX) { + error_setg(errp, "bps/iops/max values must be within [0, %lld]", + THROTTLE_VALUE_MAX); + return false; } - } - for (i = 0; i < BUCKETS_COUNT; i++) { - if (cfg->buckets[i].max < 0) { - invalid = true; + if (!cfg->buckets[i].burst_length) { + error_setg(errp, "the burst length cannot be 0"); + return false; + } + + if (cfg->buckets[i].burst_length > 1 && !cfg->buckets[i].max) { + error_setg(errp, "burst length set without burst rate"); + return false; + } + + if (cfg->buckets[i].max && !cfg->buckets[i].avg) { + error_setg(errp, "bps_max/iops_max require corresponding" + " bps/iops values"); + return false; } } - return !invalid; + return true; } /* fix bucket parameters */ @@ -306,7 +351,7 @@ static void throttle_fix_bucket(LeakyBucket *bkt) double min; /* zero bucket level */ - bkt->level = 0; + bkt->level = bkt->burst_level = 0; /* The following is done to cope with the Linux CFQ block scheduler * which regroup reads and writes by block of 100ms in the guest. @@ -409,22 +454,36 @@ bool throttle_schedule_timer(ThrottleState *ts, */ void throttle_account(ThrottleState *ts, bool is_write, uint64_t size) { + const BucketType bucket_types_size[2][2] = { + { THROTTLE_BPS_TOTAL, THROTTLE_BPS_READ }, + { THROTTLE_BPS_TOTAL, THROTTLE_BPS_WRITE } + }; + const BucketType bucket_types_units[2][2] = { + { THROTTLE_OPS_TOTAL, THROTTLE_OPS_READ }, + { THROTTLE_OPS_TOTAL, THROTTLE_OPS_WRITE } + }; double units = 1.0; + unsigned i; /* if cfg.op_size is defined and smaller than size we compute unit count */ if (ts->cfg.op_size && size > ts->cfg.op_size) { units = (double) size / ts->cfg.op_size; } - ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size; - ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units; + for (i = 0; i < 2; i++) { + LeakyBucket *bkt; + + bkt = &ts->cfg.buckets[bucket_types_size[is_write][i]]; + bkt->level += size; + if (bkt->burst_length > 1) { + bkt->burst_level += size; + } - if (is_write) { - ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size; - ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units; - } else { - ts->cfg.buckets[THROTTLE_BPS_READ].level += size; - ts->cfg.buckets[THROTTLE_OPS_READ].level += units; + bkt = &ts->cfg.buckets[bucket_types_units[is_write][i]]; + bkt->level += units; + if (bkt->burst_length > 1) { + bkt->burst_level += units; + } } } diff --git a/qemu/util/timed-average.c b/qemu/util/timed-average.c new file mode 100644 index 000000000..2eef9cbb1 --- /dev/null +++ b/qemu/util/timed-average.c @@ -0,0 +1,231 @@ +/* + * QEMU timed average computation + * + * Copyright (C) Nodalink, EURL. 2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * Benoît Canet + * Alberto Garcia + * + * This program is free sofware: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Sofware Foundation, either version 2 of the License, or + * (at your option) version 3 or any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "qemu/osdep.h" + +#include "qemu/timed-average.h" + +/* This module computes an average of a set of values within a time + * window. + * + * Algorithm: + * + * - Create two windows with a certain expiration period, and + * offsetted by period / 2. + * - Each time you want to account a new value, do it in both windows. + * - The minimum / maximum / average values are always returned from + * the oldest window. + * + * Example: + * + * t=0 |t=0.5 |t=1 |t=1.5 |t=2 + * wnd0: [0,0.5)|wnd0: [0.5,1.5) | |wnd0: [1.5,2.5) | + * wnd1: [0,1) | |wnd1: [1,2) | | + * + * Values are returned from: + * + * wnd0---------|wnd1------------|wnd0---------|wnd1-------------| + */ + +/* Update the expiration of a time window + * + * @w: the window used + * @now: the current time in nanoseconds + * @period: the expiration period in nanoseconds + */ +static void update_expiration(TimedAverageWindow *w, int64_t now, + int64_t period) +{ + /* time elapsed since the last theoretical expiration */ + int64_t elapsed = (now - w->expiration) % period; + /* time remaininging until the next expiration */ + int64_t remaining = period - elapsed; + /* compute expiration */ + w->expiration = now + remaining; +} + +/* Reset a window + * + * @w: the window to reset + */ +static void window_reset(TimedAverageWindow *w) +{ + w->min = UINT64_MAX; + w->max = 0; + w->sum = 0; + w->count = 0; +} + +/* Get the current window (that is, the one with the earliest + * expiration time). + * + * @ta: the TimedAverage structure + * @ret: a pointer to the current window + */ +static TimedAverageWindow *current_window(TimedAverage *ta) +{ + return &ta->windows[ta->current]; +} + +/* Initialize a TimedAverage structure + * + * @ta: the TimedAverage structure + * @clock_type: the type of clock to use + * @period: the time window period in nanoseconds + */ +void timed_average_init(TimedAverage *ta, QEMUClockType clock_type, + uint64_t period) +{ + int64_t now = qemu_clock_get_ns(clock_type); + + /* Returned values are from the oldest window, so they belong to + * the interval [ta->period/2,ta->period). By adjusting the + * requested period by 4/3, we guarantee that they're in the + * interval [2/3 period,4/3 period), closer to the requested + * period on average */ + ta->period = (uint64_t) period * 4 / 3; + ta->clock_type = clock_type; + ta->current = 0; + + window_reset(&ta->windows[0]); + window_reset(&ta->windows[1]); + + /* Both windows are offsetted by half a period */ + ta->windows[0].expiration = now + ta->period / 2; + ta->windows[1].expiration = now + ta->period; +} + +/* Check if the time windows have expired, updating their counters and + * expiration time if that's the case. + * + * @ta: the TimedAverage structure + * @elapsed: if non-NULL, the elapsed time (in ns) within the current + * window will be stored here + */ +static void check_expirations(TimedAverage *ta, uint64_t *elapsed) +{ + int64_t now = qemu_clock_get_ns(ta->clock_type); + int i; + + assert(ta->period != 0); + + /* Check if the windows have expired */ + for (i = 0; i < 2; i++) { + TimedAverageWindow *w = &ta->windows[i]; + if (w->expiration <= now) { + window_reset(w); + update_expiration(w, now, ta->period); + } + } + + /* Make ta->current point to the oldest window */ + if (ta->windows[0].expiration < ta->windows[1].expiration) { + ta->current = 0; + } else { + ta->current = 1; + } + + /* Calculate the elapsed time within the current window */ + if (elapsed) { + int64_t remaining = ta->windows[ta->current].expiration - now; + *elapsed = ta->period - remaining; + } +} + +/* Account a value + * + * @ta: the TimedAverage structure + * @value: the value to account + */ +void timed_average_account(TimedAverage *ta, uint64_t value) +{ + int i; + check_expirations(ta, NULL); + + /* Do the accounting in both windows at the same time */ + for (i = 0; i < 2; i++) { + TimedAverageWindow *w = &ta->windows[i]; + + w->sum += value; + w->count++; + + if (value < w->min) { + w->min = value; + } + + if (value > w->max) { + w->max = value; + } + } +} + +/* Get the minimum value + * + * @ta: the TimedAverage structure + * @ret: the minimum value + */ +uint64_t timed_average_min(TimedAverage *ta) +{ + TimedAverageWindow *w; + check_expirations(ta, NULL); + w = current_window(ta); + return w->min < UINT64_MAX ? w->min : 0; +} + +/* Get the average value + * + * @ta: the TimedAverage structure + * @ret: the average value + */ +uint64_t timed_average_avg(TimedAverage *ta) +{ + TimedAverageWindow *w; + check_expirations(ta, NULL); + w = current_window(ta); + return w->count > 0 ? w->sum / w->count : 0; +} + +/* Get the maximum value + * + * @ta: the TimedAverage structure + * @ret: the maximum value + */ +uint64_t timed_average_max(TimedAverage *ta) +{ + check_expirations(ta, NULL); + return current_window(ta)->max; +} + +/* Get the sum of all accounted values + * @ta: the TimedAverage structure + * @elapsed: if non-NULL, the elapsed time (in ns) will be stored here + * @ret: the sum of all accounted values + */ +uint64_t timed_average_sum(TimedAverage *ta, uint64_t *elapsed) +{ + TimedAverageWindow *w; + check_expirations(ta, elapsed); + w = current_window(ta); + return w->sum; +} diff --git a/qemu/util/unicode.c b/qemu/util/unicode.c index d1c865885..a812a3517 100644 --- a/qemu/util/unicode.c +++ b/qemu/util/unicode.c @@ -10,7 +10,8 @@ * later. See the COPYING file in the top-level directory. */ -#include "qemu-common.h" +#include "qemu/osdep.h" +#include "qemu/unicode.h" /** * mod_utf8_codepoint: diff --git a/qemu/util/uri.c b/qemu/util/uri.c index 550b98458..d109d6c01 100644 --- a/qemu/util/uri.c +++ b/qemu/util/uri.c @@ -51,9 +51,8 @@ * */ +#include "qemu/osdep.h" #include -#include -#include #include "qemu/uri.h" -- cgit 1.2.3-korg