summaryrefslogtreecommitdiffstats
path: root/qemu/include/block
diff options
context:
space:
mode:
authorYang Zhang <yang.z.zhang@intel.com>2015-08-28 09:58:54 +0800
committerYang Zhang <yang.z.zhang@intel.com>2015-09-01 12:44:00 +0800
commite44e3482bdb4d0ebde2d8b41830ac2cdb07948fb (patch)
tree66b09f592c55df2878107a468a91d21506104d3f /qemu/include/block
parent9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (diff)
Add qemu 2.4.0
Change-Id: Ic99cbad4b61f8b127b7dc74d04576c0bcbaaf4f5 Signed-off-by: Yang Zhang <yang.z.zhang@intel.com>
Diffstat (limited to 'qemu/include/block')
-rw-r--r--qemu/include/block/accounting.h60
-rw-r--r--qemu/include/block/aio.h376
-rw-r--r--qemu/include/block/block.h619
-rw-r--r--qemu/include/block/block_int.h667
-rw-r--r--qemu/include/block/blockjob.h359
-rw-r--r--qemu/include/block/coroutine.h219
-rw-r--r--qemu/include/block/coroutine_int.h54
-rw-r--r--qemu/include/block/nbd.h106
-rw-r--r--qemu/include/block/qapi.h46
-rw-r--r--qemu/include/block/scsi.h309
-rw-r--r--qemu/include/block/snapshot.h78
-rw-r--r--qemu/include/block/thread-pool.h37
-rw-r--r--qemu/include/block/throttle-groups.h46
-rw-r--r--qemu/include/block/write-threshold.h64
14 files changed, 3040 insertions, 0 deletions
diff --git a/qemu/include/block/accounting.h b/qemu/include/block/accounting.h
new file mode 100644
index 000000000..4c406cff7
--- /dev/null
+++ b/qemu/include/block/accounting.h
@@ -0,0 +1,60 @@
+/*
+ * QEMU System Emulator block accounting
+ *
+ * Copyright (c) 2011 Christoph Hellwig
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_ACCOUNTING_H
+#define BLOCK_ACCOUNTING_H
+
+#include <stdint.h>
+
+#include "qemu/typedefs.h"
+
+enum BlockAcctType {
+ BLOCK_ACCT_READ,
+ BLOCK_ACCT_WRITE,
+ BLOCK_ACCT_FLUSH,
+ BLOCK_MAX_IOTYPE,
+};
+
+typedef struct BlockAcctStats {
+ uint64_t nr_bytes[BLOCK_MAX_IOTYPE];
+ uint64_t nr_ops[BLOCK_MAX_IOTYPE];
+ uint64_t total_time_ns[BLOCK_MAX_IOTYPE];
+ uint64_t merged[BLOCK_MAX_IOTYPE];
+ uint64_t wr_highest_sector;
+} BlockAcctStats;
+
+typedef struct BlockAcctCookie {
+ int64_t bytes;
+ int64_t start_time_ns;
+ enum BlockAcctType type;
+} BlockAcctCookie;
+
+void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
+ int64_t bytes, enum BlockAcctType type);
+void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie);
+void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
+ unsigned int nb_sectors);
+void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
+ int num_requests);
+
+#endif
diff --git a/qemu/include/block/aio.h b/qemu/include/block/aio.h
new file mode 100644
index 000000000..400b1b002
--- /dev/null
+++ b/qemu/include/block/aio.h
@@ -0,0 +1,376 @@
+/*
+ * QEMU aio implementation
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_AIO_H
+#define QEMU_AIO_H
+
+#include "qemu/typedefs.h"
+#include "qemu-common.h"
+#include "qemu/queue.h"
+#include "qemu/event_notifier.h"
+#include "qemu/thread.h"
+#include "qemu/rfifolock.h"
+#include "qemu/timer.h"
+
+typedef struct BlockAIOCB BlockAIOCB;
+typedef void BlockCompletionFunc(void *opaque, int ret);
+
+typedef struct AIOCBInfo {
+ void (*cancel_async)(BlockAIOCB *acb);
+ AioContext *(*get_aio_context)(BlockAIOCB *acb);
+ size_t aiocb_size;
+} AIOCBInfo;
+
+struct BlockAIOCB {
+ const AIOCBInfo *aiocb_info;
+ BlockDriverState *bs;
+ BlockCompletionFunc *cb;
+ void *opaque;
+ int refcnt;
+};
+
+void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
+ BlockCompletionFunc *cb, void *opaque);
+void qemu_aio_unref(void *p);
+void qemu_aio_ref(void *p);
+
+typedef struct AioHandler AioHandler;
+typedef void QEMUBHFunc(void *opaque);
+typedef void IOHandler(void *opaque);
+
+struct AioContext {
+ GSource source;
+
+ /* Protects all fields from multi-threaded access */
+ RFifoLock lock;
+
+ /* The list of registered AIO handlers */
+ QLIST_HEAD(, AioHandler) aio_handlers;
+
+ /* This is a simple lock used to protect the aio_handlers list.
+ * Specifically, it's used to ensure that no callbacks are removed while
+ * we're walking and dispatching callbacks.
+ */
+ int walking_handlers;
+
+ /* Used to avoid unnecessary event_notifier_set calls in aio_notify;
+ * accessed with atomic primitives. If this field is 0, everything
+ * (file descriptors, bottom halves, timers) will be re-evaluated
+ * before the next blocking poll(), thus the event_notifier_set call
+ * can be skipped. If it is non-zero, you may need to wake up a
+ * concurrent aio_poll or the glib main event loop, making
+ * event_notifier_set necessary.
+ *
+ * Bit 0 is reserved for GSource usage of the AioContext, and is 1
+ * between a call to aio_ctx_check and the next call to aio_ctx_dispatch.
+ * Bits 1-31 simply count the number of active calls to aio_poll
+ * that are in the prepare or poll phase.
+ *
+ * The GSource and aio_poll must use a different mechanism because
+ * there is no certainty that a call to GSource's prepare callback
+ * (via g_main_context_prepare) is indeed followed by check and
+ * dispatch. It's not clear whether this would be a bug, but let's
+ * play safe and allow it---it will just cause extra calls to
+ * event_notifier_set until the next call to dispatch.
+ *
+ * Instead, the aio_poll calls include both the prepare and the
+ * dispatch phase, hence a simple counter is enough for them.
+ */
+ uint32_t notify_me;
+
+ /* lock to protect between bh's adders and deleter */
+ QemuMutex bh_lock;
+
+ /* Anchor of the list of Bottom Halves belonging to the context */
+ struct QEMUBH *first_bh;
+
+ /* A simple lock used to protect the first_bh list, and ensure that
+ * no callbacks are removed while we're walking and dispatching callbacks.
+ */
+ int walking_bh;
+
+ /* Used by aio_notify.
+ *
+ * "notified" is used to avoid expensive event_notifier_test_and_clear
+ * calls. When it is clear, the EventNotifier is clear, or one thread
+ * is going to clear "notified" before processing more events. False
+ * positives are possible, i.e. "notified" could be set even though the
+ * EventNotifier is clear.
+ *
+ * Note that event_notifier_set *cannot* be optimized the same way. For
+ * more information on the problem that would result, see "#ifdef BUG2"
+ * in the docs/aio_notify_accept.promela formal model.
+ */
+ bool notified;
+ EventNotifier notifier;
+
+ /* Scheduling this BH forces the event loop it iterate */
+ QEMUBH *notify_dummy_bh;
+
+ /* Thread pool for performing work and receiving completion callbacks */
+ struct ThreadPool *thread_pool;
+
+ /* TimerLists for calling timers - one per clock type */
+ QEMUTimerListGroup tlg;
+};
+
+/**
+ * aio_context_new: Allocate a new AioContext.
+ *
+ * AioContext provide a mini event-loop that can be waited on synchronously.
+ * They also provide bottom halves, a service to execute a piece of code
+ * as soon as possible.
+ */
+AioContext *aio_context_new(Error **errp);
+
+/**
+ * aio_context_ref:
+ * @ctx: The AioContext to operate on.
+ *
+ * Add a reference to an AioContext.
+ */
+void aio_context_ref(AioContext *ctx);
+
+/**
+ * aio_context_unref:
+ * @ctx: The AioContext to operate on.
+ *
+ * Drop a reference to an AioContext.
+ */
+void aio_context_unref(AioContext *ctx);
+
+/* Take ownership of the AioContext. If the AioContext will be shared between
+ * threads, and a thread does not want to be interrupted, it will have to
+ * take ownership around calls to aio_poll(). Otherwise, aio_poll()
+ * automatically takes care of calling aio_context_acquire and
+ * aio_context_release.
+ *
+ * Access to timers and BHs from a thread that has not acquired AioContext
+ * is possible. Access to callbacks for now must be done while the AioContext
+ * is owned by the thread (FIXME).
+ */
+void aio_context_acquire(AioContext *ctx);
+
+/* Relinquish ownership of the AioContext. */
+void aio_context_release(AioContext *ctx);
+
+/**
+ * aio_bh_new: Allocate a new bottom half structure.
+ *
+ * Bottom halves are lightweight callbacks whose invocation is guaranteed
+ * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure
+ * is opaque and must be allocated prior to its use.
+ */
+QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
+
+/**
+ * aio_notify: Force processing of pending events.
+ *
+ * Similar to signaling a condition variable, aio_notify forces
+ * aio_wait to exit, so that the next call will re-examine pending events.
+ * The caller of aio_notify will usually call aio_wait again very soon,
+ * or go through another iteration of the GLib main loop. Hence, aio_notify
+ * also has the side effect of recalculating the sets of file descriptors
+ * that the main loop waits for.
+ *
+ * Calling aio_notify is rarely necessary, because for example scheduling
+ * a bottom half calls it already.
+ */
+void aio_notify(AioContext *ctx);
+
+/**
+ * aio_notify_accept: Acknowledge receiving an aio_notify.
+ *
+ * aio_notify() uses an EventNotifier in order to wake up a sleeping
+ * aio_poll() or g_main_context_iteration(). Calls to aio_notify() are
+ * usually rare, but the AioContext has to clear the EventNotifier on
+ * every aio_poll() or g_main_context_iteration() in order to avoid
+ * busy waiting. This event_notifier_test_and_clear() cannot be done
+ * using the usual aio_context_set_event_notifier(), because it must
+ * be done before processing all events (file descriptors, bottom halves,
+ * timers).
+ *
+ * aio_notify_accept() is an optimized event_notifier_test_and_clear()
+ * that is specific to an AioContext's notifier; it is used internally
+ * to clear the EventNotifier only if aio_notify() had been called.
+ */
+void aio_notify_accept(AioContext *ctx);
+
+/**
+ * aio_bh_poll: Poll bottom halves for an AioContext.
+ *
+ * These are internal functions used by the QEMU main loop.
+ * And notice that multiple occurrences of aio_bh_poll cannot
+ * be called concurrently
+ */
+int aio_bh_poll(AioContext *ctx);
+
+/**
+ * qemu_bh_schedule: Schedule a bottom half.
+ *
+ * Scheduling a bottom half interrupts the main loop and causes the
+ * execution of the callback that was passed to qemu_bh_new.
+ *
+ * Bottom halves that are scheduled from a bottom half handler are instantly
+ * invoked. This can create an infinite loop if a bottom half handler
+ * schedules itself.
+ *
+ * @bh: The bottom half to be scheduled.
+ */
+void qemu_bh_schedule(QEMUBH *bh);
+
+/**
+ * qemu_bh_cancel: Cancel execution of a bottom half.
+ *
+ * Canceling execution of a bottom half undoes the effect of calls to
+ * qemu_bh_schedule without freeing its resources yet. While cancellation
+ * itself is also wait-free and thread-safe, it can of course race with the
+ * loop that executes bottom halves unless you are holding the iothread
+ * mutex. This makes it mostly useless if you are not holding the mutex.
+ *
+ * @bh: The bottom half to be canceled.
+ */
+void qemu_bh_cancel(QEMUBH *bh);
+
+/**
+ *qemu_bh_delete: Cancel execution of a bottom half and free its resources.
+ *
+ * Deleting a bottom half frees the memory that was allocated for it by
+ * qemu_bh_new. It also implies canceling the bottom half if it was
+ * scheduled.
+ * This func is async. The bottom half will do the delete action at the finial
+ * end.
+ *
+ * @bh: The bottom half to be deleted.
+ */
+void qemu_bh_delete(QEMUBH *bh);
+
+/* Return whether there are any pending callbacks from the GSource
+ * attached to the AioContext, before g_poll is invoked.
+ *
+ * This is used internally in the implementation of the GSource.
+ */
+bool aio_prepare(AioContext *ctx);
+
+/* Return whether there are any pending callbacks from the GSource
+ * attached to the AioContext, after g_poll is invoked.
+ *
+ * This is used internally in the implementation of the GSource.
+ */
+bool aio_pending(AioContext *ctx);
+
+/* Dispatch any pending callbacks from the GSource attached to the AioContext.
+ *
+ * This is used internally in the implementation of the GSource.
+ */
+bool aio_dispatch(AioContext *ctx);
+
+/* Progress in completing AIO work to occur. This can issue new pending
+ * aio as a result of executing I/O completion or bh callbacks.
+ *
+ * Return whether any progress was made by executing AIO or bottom half
+ * handlers. If @blocking == true, this should always be true except
+ * if someone called aio_notify.
+ *
+ * If there are no pending bottom halves, but there are pending AIO
+ * operations, it may not be possible to make any progress without
+ * blocking. If @blocking is true, this function will wait until one
+ * or more AIO events have completed, to ensure something has moved
+ * before returning.
+ */
+bool aio_poll(AioContext *ctx, bool blocking);
+
+/* Register a file descriptor and associated callbacks. Behaves very similarly
+ * to qemu_set_fd_handler. Unlike qemu_set_fd_handler, these callbacks will
+ * be invoked when using aio_poll().
+ *
+ * Code that invokes AIO completion functions should rely on this function
+ * instead of qemu_set_fd_handler[2].
+ */
+void aio_set_fd_handler(AioContext *ctx,
+ int fd,
+ IOHandler *io_read,
+ IOHandler *io_write,
+ void *opaque);
+
+/* Register an event notifier and associated callbacks. Behaves very similarly
+ * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks
+ * will be invoked when using aio_poll().
+ *
+ * Code that invokes AIO completion functions should rely on this function
+ * instead of event_notifier_set_handler.
+ */
+void aio_set_event_notifier(AioContext *ctx,
+ EventNotifier *notifier,
+ EventNotifierHandler *io_read);
+
+/* Return a GSource that lets the main loop poll the file descriptors attached
+ * to this AioContext.
+ */
+GSource *aio_get_g_source(AioContext *ctx);
+
+/* Return the ThreadPool bound to this AioContext */
+struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
+
+/**
+ * aio_timer_new:
+ * @ctx: the aio context
+ * @type: the clock type
+ * @scale: the scale
+ * @cb: the callback to call on timer expiry
+ * @opaque: the opaque pointer to pass to the callback
+ *
+ * Allocate a new timer attached to the context @ctx.
+ * The function is responsible for memory allocation.
+ *
+ * The preferred interface is aio_timer_init. Use that
+ * unless you really need dynamic memory allocation.
+ *
+ * Returns: a pointer to the new timer
+ */
+static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type,
+ int scale,
+ QEMUTimerCB *cb, void *opaque)
+{
+ return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque);
+}
+
+/**
+ * aio_timer_init:
+ * @ctx: the aio context
+ * @ts: the timer
+ * @type: the clock type
+ * @scale: the scale
+ * @cb: the callback to call on timer expiry
+ * @opaque: the opaque pointer to pass to the callback
+ *
+ * Initialise a new timer attached to the context @ctx.
+ * The caller is responsible for memory allocation.
+ */
+static inline void aio_timer_init(AioContext *ctx,
+ QEMUTimer *ts, QEMUClockType type,
+ int scale,
+ QEMUTimerCB *cb, void *opaque)
+{
+ timer_init_tl(ts, ctx->tlg.tl[type], scale, cb, opaque);
+}
+
+/**
+ * aio_compute_timeout:
+ * @ctx: the aio context
+ *
+ * Compute the timeout that a blocking aio_poll should use.
+ */
+int64_t aio_compute_timeout(AioContext *ctx);
+
+#endif
diff --git a/qemu/include/block/block.h b/qemu/include/block/block.h
new file mode 100644
index 000000000..37916f720
--- /dev/null
+++ b/qemu/include/block/block.h
@@ -0,0 +1,619 @@
+#ifndef BLOCK_H
+#define BLOCK_H
+
+#include "block/aio.h"
+#include "qemu-common.h"
+#include "qemu/option.h"
+#include "block/coroutine.h"
+#include "block/accounting.h"
+#include "qapi/qmp/qobject.h"
+#include "qapi-types.h"
+
+/* block.c */
+typedef struct BlockDriver BlockDriver;
+typedef struct BlockJob BlockJob;
+typedef struct BdrvChild BdrvChild;
+typedef struct BdrvChildRole BdrvChildRole;
+
+typedef struct BlockDriverInfo {
+ /* in bytes, 0 if irrelevant */
+ int cluster_size;
+ /* offset at which the VM state can be saved (0 if not possible) */
+ int64_t vm_state_offset;
+ bool is_dirty;
+ /*
+ * True if unallocated blocks read back as zeroes. This is equivalent
+ * to the the LBPRZ flag in the SCSI logical block provisioning page.
+ */
+ bool unallocated_blocks_are_zero;
+ /*
+ * True if the driver can optimize writing zeroes by unmapping
+ * sectors. This is equivalent to the BLKDISCARDZEROES ioctl in Linux
+ * with the difference that in qemu a discard is allowed to silently
+ * fail. Therefore we have to use bdrv_write_zeroes with the
+ * BDRV_REQ_MAY_UNMAP flag for an optimized zero write with unmapping.
+ * After this call the driver has to guarantee that the contents read
+ * back as zero. It is additionally required that the block device is
+ * opened with BDRV_O_UNMAP flag for this to work.
+ */
+ bool can_write_zeroes_with_unmap;
+ /*
+ * True if this block driver only supports compressed writes
+ */
+ bool needs_compressed_writes;
+} BlockDriverInfo;
+
+typedef struct BlockFragInfo {
+ uint64_t allocated_clusters;
+ uint64_t total_clusters;
+ uint64_t fragmented_clusters;
+ uint64_t compressed_clusters;
+} BlockFragInfo;
+
+typedef enum {
+ BDRV_REQ_COPY_ON_READ = 0x1,
+ BDRV_REQ_ZERO_WRITE = 0x2,
+ /* The BDRV_REQ_MAY_UNMAP flag is used to indicate that the block driver
+ * is allowed to optimize a write zeroes request by unmapping (discarding)
+ * blocks if it is guaranteed that the result will read back as
+ * zeroes. The flag is only passed to the driver if the block device is
+ * opened with BDRV_O_UNMAP.
+ */
+ BDRV_REQ_MAY_UNMAP = 0x4,
+} BdrvRequestFlags;
+
+typedef struct BlockSizes {
+ uint32_t phys;
+ uint32_t log;
+} BlockSizes;
+
+typedef struct HDGeometry {
+ uint32_t heads;
+ uint32_t sectors;
+ uint32_t cylinders;
+} HDGeometry;
+
+#define BDRV_O_RDWR 0x0002
+#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
+#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */
+#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
+#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */
+#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */
+#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
+#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
+#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
+#define BDRV_O_INCOMING 0x0800 /* consistency hint for incoming migration */
+#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
+#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
+#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
+#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given:
+ select an appropriate protocol driver,
+ ignoring the format layer */
+
+#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH)
+
+
+/* Option names of options parsed by the block layer */
+
+#define BDRV_OPT_CACHE_WB "cache.writeback"
+#define BDRV_OPT_CACHE_DIRECT "cache.direct"
+#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
+
+
+#define BDRV_SECTOR_BITS 9
+#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
+#define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1)
+
+#define BDRV_REQUEST_MAX_SECTORS MIN(SIZE_MAX >> BDRV_SECTOR_BITS, \
+ INT_MAX >> BDRV_SECTOR_BITS)
+
+/*
+ * Allocation status flags
+ * BDRV_BLOCK_DATA: data is read from bs->file or another file
+ * BDRV_BLOCK_ZERO: sectors read as zero
+ * BDRV_BLOCK_OFFSET_VALID: sector stored in bs->file as raw data
+ * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
+ * layer (as opposed to the backing file)
+ * BDRV_BLOCK_RAW: used internally to indicate that the request
+ * was answered by the raw driver and that one
+ * should look in bs->file directly.
+ *
+ * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 represent the offset in
+ * bs->file where sector data can be read from as raw data.
+ *
+ * DATA == 0 && ZERO == 0 means that data is read from backing_hd if present.
+ *
+ * DATA ZERO OFFSET_VALID
+ * t t t sectors read as zero, bs->file is zero at offset
+ * t f t sectors read as valid from bs->file at offset
+ * f t t sectors preallocated, read as zero, bs->file not
+ * necessarily zero at offset
+ * f f t sectors preallocated but read from backing_hd,
+ * bs->file contains garbage at offset
+ * t t f sectors preallocated, read as zero, unknown offset
+ * t f f sectors read from unknown file or offset
+ * f t f not allocated or unknown offset, read as zero
+ * f f f not allocated or unknown offset, read from backing_hd
+ */
+#define BDRV_BLOCK_DATA 0x01
+#define BDRV_BLOCK_ZERO 0x02
+#define BDRV_BLOCK_OFFSET_VALID 0x04
+#define BDRV_BLOCK_RAW 0x08
+#define BDRV_BLOCK_ALLOCATED 0x10
+#define BDRV_BLOCK_OFFSET_MASK BDRV_SECTOR_MASK
+
+typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
+
+typedef struct BDRVReopenState {
+ BlockDriverState *bs;
+ int flags;
+ void *opaque;
+} BDRVReopenState;
+
+/*
+ * Block operation types
+ */
+typedef enum BlockOpType {
+ BLOCK_OP_TYPE_BACKUP_SOURCE,
+ BLOCK_OP_TYPE_BACKUP_TARGET,
+ BLOCK_OP_TYPE_CHANGE,
+ BLOCK_OP_TYPE_COMMIT_SOURCE,
+ BLOCK_OP_TYPE_COMMIT_TARGET,
+ BLOCK_OP_TYPE_DATAPLANE,
+ BLOCK_OP_TYPE_DRIVE_DEL,
+ BLOCK_OP_TYPE_EJECT,
+ BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
+ BLOCK_OP_TYPE_INTERNAL_SNAPSHOT,
+ BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE,
+ BLOCK_OP_TYPE_MIRROR,
+ BLOCK_OP_TYPE_RESIZE,
+ BLOCK_OP_TYPE_STREAM,
+ BLOCK_OP_TYPE_REPLACE,
+ BLOCK_OP_TYPE_MAX,
+} BlockOpType;
+
+void bdrv_iostatus_enable(BlockDriverState *bs);
+void bdrv_iostatus_reset(BlockDriverState *bs);
+void bdrv_iostatus_disable(BlockDriverState *bs);
+bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
+void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
+void bdrv_info_print(Monitor *mon, const QObject *data);
+void bdrv_info(Monitor *mon, QObject **ret_data);
+void bdrv_stats_print(Monitor *mon, const QObject *data);
+void bdrv_info_stats(Monitor *mon, QObject **ret_data);
+
+/* disk I/O throttling */
+void bdrv_io_limits_enable(BlockDriverState *bs, const char *group);
+void bdrv_io_limits_disable(BlockDriverState *bs);
+void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group);
+
+void bdrv_init(void);
+void bdrv_init_with_whitelist(void);
+BlockDriver *bdrv_find_protocol(const char *filename,
+ bool allow_protocol_prefix,
+ Error **errp);
+BlockDriver *bdrv_find_format(const char *format_name);
+BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
+ bool readonly);
+int bdrv_create(BlockDriver *drv, const char* filename,
+ QemuOpts *opts, Error **errp);
+int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
+BlockDriverState *bdrv_new_root(void);
+BlockDriverState *bdrv_new(void);
+void bdrv_make_anon(BlockDriverState *bs);
+void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old);
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
+int bdrv_parse_cache_flags(const char *mode, int *flags);
+int bdrv_parse_discard_flags(const char *mode, int *flags);
+int bdrv_open_image(BlockDriverState **pbs, const char *filename,
+ QDict *options, const char *bdref_key,
+ BlockDriverState* parent, const BdrvChildRole *child_role,
+ bool allow_none, Error **errp);
+BdrvChild *bdrv_open_child(const char *filename,
+ QDict *options, const char *bdref_key,
+ BlockDriverState* parent,
+ const BdrvChildRole *child_role,
+ bool allow_none, Error **errp);
+void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
+int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp);
+int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp);
+int bdrv_open(BlockDriverState **pbs, const char *filename,
+ const char *reference, QDict *options, int flags,
+ BlockDriver *drv, Error **errp);
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+ BlockDriverState *bs, int flags);
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
+int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp);
+int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp);
+void bdrv_reopen_commit(BDRVReopenState *reopen_state);
+void bdrv_reopen_abort(BDRVReopenState *reopen_state);
+void bdrv_close(BlockDriverState *bs);
+void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify);
+int bdrv_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors);
+int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors);
+int bdrv_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, BdrvRequestFlags flags);
+BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque);
+int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags);
+int bdrv_pread(BlockDriverState *bs, int64_t offset,
+ void *buf, int count);
+int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
+ const void *buf, int count);
+int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov);
+int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
+ const void *buf, int count);
+int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov);
+int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov);
+/*
+ * Efficiently zero a region of the disk image. Note that this is a regular
+ * I/O request like read or write and should have a reasonable size. This
+ * function is not suitable for zeroing the entire image in a single request
+ * because it may allocate memory for the entire region.
+ */
+int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, BdrvRequestFlags flags);
+BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
+ const char *backing_file);
+int bdrv_get_backing_file_depth(BlockDriverState *bs);
+void bdrv_refresh_filename(BlockDriverState *bs);
+int bdrv_truncate(BlockDriverState *bs, int64_t offset);
+int64_t bdrv_nb_sectors(BlockDriverState *bs);
+int64_t bdrv_getlength(BlockDriverState *bs);
+int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
+void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
+void bdrv_refresh_limits(BlockDriverState *bs, Error **errp);
+int bdrv_commit(BlockDriverState *bs);
+int bdrv_commit_all(void);
+int bdrv_change_backing_file(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt);
+void bdrv_register(BlockDriver *bdrv);
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+ BlockDriverState *base,
+ const char *backing_file_str);
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+ BlockDriverState *bs);
+BlockDriverState *bdrv_find_base(BlockDriverState *bs);
+
+
+typedef struct BdrvCheckResult {
+ int corruptions;
+ int leaks;
+ int check_errors;
+ int corruptions_fixed;
+ int leaks_fixed;
+ int64_t image_end_offset;
+ BlockFragInfo bfi;
+} BdrvCheckResult;
+
+typedef enum {
+ BDRV_FIX_LEAKS = 1,
+ BDRV_FIX_ERRORS = 2,
+} BdrvCheckMode;
+
+int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
+
+/* The units of offset and total_work_size may be chosen arbitrarily by the
+ * block driver; total_work_size may change during the course of the amendment
+ * operation */
+typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
+ int64_t total_work_size);
+int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
+ BlockDriverAmendStatusCB *status_cb);
+
+/* external snapshots */
+bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
+ BlockDriverState *candidate);
+bool bdrv_is_first_non_filter(BlockDriverState *candidate);
+
+/* check if a named node can be replaced when doing drive-mirror */
+BlockDriverState *check_to_replace_node(const char *node_name, Error **errp);
+
+/* async block I/O */
+typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
+ int sector_num);
+BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *iov, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *iov, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
+ BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque);
+void bdrv_aio_cancel(BlockAIOCB *acb);
+void bdrv_aio_cancel_async(BlockAIOCB *acb);
+
+typedef struct BlockRequest {
+ /* Fields to be filled by multiwrite caller */
+ int64_t sector;
+ int nb_sectors;
+ int flags;
+ QEMUIOVector *qiov;
+ BlockCompletionFunc *cb;
+ void *opaque;
+
+ /* Filled by multiwrite implementation */
+ int error;
+} BlockRequest;
+
+int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs,
+ int num_reqs);
+
+/* sg packet commands */
+int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf);
+BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockCompletionFunc *cb, void *opaque);
+
+/* Invalidate any cached metadata used by image formats */
+void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
+void bdrv_invalidate_cache_all(Error **errp);
+
+/* Ensure contents are flushed to disk. */
+int bdrv_flush(BlockDriverState *bs);
+int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
+int bdrv_flush_all(void);
+void bdrv_close_all(void);
+void bdrv_drain(BlockDriverState *bs);
+void bdrv_drain_all(void);
+
+int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
+int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
+int bdrv_has_zero_init_1(BlockDriverState *bs);
+int bdrv_has_zero_init(BlockDriverState *bs);
+bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
+bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
+int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *pnum);
+int64_t bdrv_get_block_status_above(BlockDriverState *bs,
+ BlockDriverState *base,
+ int64_t sector_num,
+ int nb_sectors, int *pnum);
+int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ int *pnum);
+int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
+ int64_t sector_num, int nb_sectors, int *pnum);
+
+void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
+ BlockdevOnError on_write_error);
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read);
+BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error);
+void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
+ bool is_read, int error);
+int bdrv_is_read_only(BlockDriverState *bs);
+int bdrv_is_sg(BlockDriverState *bs);
+int bdrv_enable_write_cache(BlockDriverState *bs);
+void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce);
+int bdrv_is_inserted(BlockDriverState *bs);
+int bdrv_media_changed(BlockDriverState *bs);
+void bdrv_lock_medium(BlockDriverState *bs, bool locked);
+void bdrv_eject(BlockDriverState *bs, bool eject_flag);
+const char *bdrv_get_format_name(BlockDriverState *bs);
+BlockDriverState *bdrv_find_node(const char *node_name);
+BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp);
+BlockDriverState *bdrv_lookup_bs(const char *device,
+ const char *node_name,
+ Error **errp);
+bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
+BlockDriverState *bdrv_next_node(BlockDriverState *bs);
+BlockDriverState *bdrv_next(BlockDriverState *bs);
+int bdrv_is_encrypted(BlockDriverState *bs);
+int bdrv_key_required(BlockDriverState *bs);
+int bdrv_set_key(BlockDriverState *bs, const char *key);
+void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp);
+int bdrv_query_missing_keys(void);
+void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
+ void *opaque);
+const char *bdrv_get_node_name(const BlockDriverState *bs);
+const char *bdrv_get_device_name(const BlockDriverState *bs);
+const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
+int bdrv_get_flags(BlockDriverState *bs);
+int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
+ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs);
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors);
+
+const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
+void bdrv_get_backing_filename(BlockDriverState *bs,
+ char *filename, int filename_size);
+void bdrv_get_full_backing_filename(BlockDriverState *bs,
+ char *dest, size_t sz, Error **errp);
+void bdrv_get_full_backing_filename_from_filename(const char *backed,
+ const char *backing,
+ char *dest, size_t sz,
+ Error **errp);
+int bdrv_is_snapshot(BlockDriverState *bs);
+
+int path_has_protocol(const char *path);
+int path_is_absolute(const char *path);
+void path_combine(char *dest, int dest_size,
+ const char *base_path,
+ const char *filename);
+
+int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
+ int64_t pos, int size);
+
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size);
+
+void bdrv_img_create(const char *filename, const char *fmt,
+ const char *base_filename, const char *base_fmt,
+ char *options, uint64_t img_size, int flags,
+ Error **errp, bool quiet);
+
+/* Returns the alignment in bytes that is required so that no bounce buffer
+ * is required throughout the stack */
+size_t bdrv_min_mem_align(BlockDriverState *bs);
+/* Returns optimal alignment in bytes for bounce buffer */
+size_t bdrv_opt_mem_align(BlockDriverState *bs);
+void bdrv_set_guest_block_size(BlockDriverState *bs, int align);
+void *qemu_blockalign(BlockDriverState *bs, size_t size);
+void *qemu_blockalign0(BlockDriverState *bs, size_t size);
+void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
+void *qemu_try_blockalign0(BlockDriverState *bs, size_t size);
+bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
+
+struct HBitmapIter;
+typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
+ uint32_t granularity,
+ const char *name,
+ Error **errp);
+int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap,
+ Error **errp);
+BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap,
+ Error **errp);
+BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap,
+ Error **errp);
+BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
+ const char *name);
+void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap);
+void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
+void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
+void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
+BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
+uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs);
+uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap);
+bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
+bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
+DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
+int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector);
+void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int nr_sectors);
+void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int nr_sectors);
+void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap);
+void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
+void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
+int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
+
+void bdrv_enable_copy_on_read(BlockDriverState *bs);
+void bdrv_disable_copy_on_read(BlockDriverState *bs);
+
+void bdrv_ref(BlockDriverState *bs);
+void bdrv_unref(BlockDriverState *bs);
+void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
+
+bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
+void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
+void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason);
+void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
+void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
+bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
+
+typedef enum {
+ BLKDBG_L1_UPDATE,
+
+ BLKDBG_L1_GROW_ALLOC_TABLE,
+ BLKDBG_L1_GROW_WRITE_TABLE,
+ BLKDBG_L1_GROW_ACTIVATE_TABLE,
+
+ BLKDBG_L2_LOAD,
+ BLKDBG_L2_UPDATE,
+ BLKDBG_L2_UPDATE_COMPRESSED,
+ BLKDBG_L2_ALLOC_COW_READ,
+ BLKDBG_L2_ALLOC_WRITE,
+
+ BLKDBG_READ_AIO,
+ BLKDBG_READ_BACKING_AIO,
+ BLKDBG_READ_COMPRESSED,
+
+ BLKDBG_WRITE_AIO,
+ BLKDBG_WRITE_COMPRESSED,
+
+ BLKDBG_VMSTATE_LOAD,
+ BLKDBG_VMSTATE_SAVE,
+
+ BLKDBG_COW_READ,
+ BLKDBG_COW_WRITE,
+
+ BLKDBG_REFTABLE_LOAD,
+ BLKDBG_REFTABLE_GROW,
+ BLKDBG_REFTABLE_UPDATE,
+
+ BLKDBG_REFBLOCK_LOAD,
+ BLKDBG_REFBLOCK_UPDATE,
+ BLKDBG_REFBLOCK_UPDATE_PART,
+ BLKDBG_REFBLOCK_ALLOC,
+ BLKDBG_REFBLOCK_ALLOC_HOOKUP,
+ BLKDBG_REFBLOCK_ALLOC_WRITE,
+ BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS,
+ BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE,
+ BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE,
+
+ BLKDBG_CLUSTER_ALLOC,
+ BLKDBG_CLUSTER_ALLOC_BYTES,
+ BLKDBG_CLUSTER_FREE,
+
+ BLKDBG_FLUSH_TO_OS,
+ BLKDBG_FLUSH_TO_DISK,
+
+ BLKDBG_PWRITEV_RMW_HEAD,
+ BLKDBG_PWRITEV_RMW_AFTER_HEAD,
+ BLKDBG_PWRITEV_RMW_TAIL,
+ BLKDBG_PWRITEV_RMW_AFTER_TAIL,
+ BLKDBG_PWRITEV,
+ BLKDBG_PWRITEV_ZERO,
+ BLKDBG_PWRITEV_DONE,
+
+ BLKDBG_EMPTY_IMAGE_PREPARE,
+
+ BLKDBG_EVENT_MAX,
+} BlkDebugEvent;
+
+#define BLKDBG_EVENT(bs, evt) bdrv_debug_event(bs, evt)
+void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event);
+
+int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
+ const char *tag);
+int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
+int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
+bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
+
+/**
+ * bdrv_get_aio_context:
+ *
+ * Returns: the currently bound #AioContext
+ */
+AioContext *bdrv_get_aio_context(BlockDriverState *bs);
+
+/**
+ * bdrv_set_aio_context:
+ *
+ * Changes the #AioContext used for fd handlers, timers, and BHs by this
+ * BlockDriverState and all its children.
+ *
+ * This function must be called with iothread lock held.
+ */
+void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context);
+int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
+int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
+
+void bdrv_io_plug(BlockDriverState *bs);
+void bdrv_io_unplug(BlockDriverState *bs);
+void bdrv_flush_io_queue(BlockDriverState *bs);
+
+BlockAcctStats *bdrv_get_stats(BlockDriverState *bs);
+
+#endif
diff --git a/qemu/include/block/block_int.h b/qemu/include/block/block_int.h
new file mode 100644
index 000000000..14ad4c334
--- /dev/null
+++ b/qemu/include/block/block_int.h
@@ -0,0 +1,667 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_H
+#define BLOCK_INT_H
+
+#include "block/accounting.h"
+#include "block/block.h"
+#include "qemu/option.h"
+#include "qemu/queue.h"
+#include "block/coroutine.h"
+#include "qemu/timer.h"
+#include "qapi-types.h"
+#include "qemu/hbitmap.h"
+#include "block/snapshot.h"
+#include "qemu/main-loop.h"
+#include "qemu/throttle.h"
+
+#define BLOCK_FLAG_ENCRYPT 1
+#define BLOCK_FLAG_COMPAT6 4
+#define BLOCK_FLAG_LAZY_REFCOUNTS 8
+
+#define BLOCK_OPT_SIZE "size"
+#define BLOCK_OPT_ENCRYPT "encryption"
+#define BLOCK_OPT_COMPAT6 "compat6"
+#define BLOCK_OPT_BACKING_FILE "backing_file"
+#define BLOCK_OPT_BACKING_FMT "backing_fmt"
+#define BLOCK_OPT_CLUSTER_SIZE "cluster_size"
+#define BLOCK_OPT_TABLE_SIZE "table_size"
+#define BLOCK_OPT_PREALLOC "preallocation"
+#define BLOCK_OPT_SUBFMT "subformat"
+#define BLOCK_OPT_COMPAT_LEVEL "compat"
+#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts"
+#define BLOCK_OPT_ADAPTER_TYPE "adapter_type"
+#define BLOCK_OPT_REDUNDANCY "redundancy"
+#define BLOCK_OPT_NOCOW "nocow"
+#define BLOCK_OPT_OBJECT_SIZE "object_size"
+#define BLOCK_OPT_REFCOUNT_BITS "refcount_bits"
+
+#define BLOCK_PROBE_BUF_SIZE 512
+
+typedef struct BdrvTrackedRequest {
+ BlockDriverState *bs;
+ int64_t offset;
+ unsigned int bytes;
+ bool is_write;
+
+ bool serialising;
+ int64_t overlap_offset;
+ unsigned int overlap_bytes;
+
+ QLIST_ENTRY(BdrvTrackedRequest) list;
+ Coroutine *co; /* owner, used for deadlock detection */
+ CoQueue wait_queue; /* coroutines blocked on this request */
+
+ struct BdrvTrackedRequest *waiting_for;
+} BdrvTrackedRequest;
+
+struct BlockDriver {
+ const char *format_name;
+ int instance_size;
+
+ /* set to true if the BlockDriver is a block filter */
+ bool is_filter;
+ /* for snapshots block filter like Quorum can implement the
+ * following recursive callback.
+ * It's purpose is to recurse on the filter children while calling
+ * bdrv_recurse_is_first_non_filter on them.
+ * For a sample implementation look in the future Quorum block filter.
+ */
+ bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs,
+ BlockDriverState *candidate);
+
+ int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
+ int (*bdrv_probe_device)(const char *filename);
+
+ /* Any driver implementing this callback is expected to be able to handle
+ * NULL file names in its .bdrv_open() implementation */
+ void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp);
+ /* Drivers not implementing bdrv_parse_filename nor bdrv_open should have
+ * this field set to true, except ones that are defined only by their
+ * child's bs.
+ * An example of the last type will be the quorum block driver.
+ */
+ bool bdrv_needs_filename;
+
+ /* Set if a driver can support backing files */
+ bool supports_backing;
+
+ /* For handling image reopen for split or non-split files */
+ int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp);
+ void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
+ void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
+
+ int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp);
+ int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp);
+ int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors);
+ int (*bdrv_write)(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+ void (*bdrv_close)(BlockDriverState *bs);
+ void (*bdrv_rebind)(BlockDriverState *bs);
+ int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp);
+ int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
+ int (*bdrv_make_empty)(BlockDriverState *bs);
+
+ void (*bdrv_refresh_filename)(BlockDriverState *bs);
+
+ /* aio */
+ BlockAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque);
+ BlockAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque);
+ BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
+ BlockCompletionFunc *cb, void *opaque);
+ BlockAIOCB *(*bdrv_aio_discard)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque);
+
+ int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ /*
+ * Efficiently zero a region of the disk image. Typically an image format
+ * would use a compact metadata representation to implement this. This
+ * function pointer may be NULL and .bdrv_co_writev() will be called
+ * instead.
+ */
+ int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
+ int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors);
+ int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum);
+
+ /*
+ * Invalidate any cached meta-data.
+ */
+ void (*bdrv_invalidate_cache)(BlockDriverState *bs, Error **errp);
+
+ /*
+ * Flushes all data that was already written to the OS all the way down to
+ * the disk (for example raw-posix calls fsync()).
+ */
+ int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
+
+ /*
+ * Flushes all internal caches to the OS. The data may still sit in a
+ * writeback cache of the host OS, but it will survive a crash of the qemu
+ * process.
+ */
+ int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
+
+ const char *protocol_name;
+ int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset);
+
+ int64_t (*bdrv_getlength)(BlockDriverState *bs);
+ bool has_variable_length;
+ int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
+
+ int (*bdrv_write_compressed)(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+
+ int (*bdrv_snapshot_create)(BlockDriverState *bs,
+ QEMUSnapshotInfo *sn_info);
+ int (*bdrv_snapshot_goto)(BlockDriverState *bs,
+ const char *snapshot_id);
+ int (*bdrv_snapshot_delete)(BlockDriverState *bs,
+ const char *snapshot_id,
+ const char *name,
+ Error **errp);
+ int (*bdrv_snapshot_list)(BlockDriverState *bs,
+ QEMUSnapshotInfo **psn_info);
+ int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
+ const char *snapshot_id,
+ const char *name,
+ Error **errp);
+ int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
+ ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs);
+
+ int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t pos);
+ int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size);
+
+ int (*bdrv_change_backing_file)(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt);
+
+ /* removable device specific */
+ int (*bdrv_is_inserted)(BlockDriverState *bs);
+ int (*bdrv_media_changed)(BlockDriverState *bs);
+ void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
+ void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
+
+ /* to control generic scsi devices */
+ int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf);
+ BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockCompletionFunc *cb, void *opaque);
+
+ /* List of options for creating images, terminated by name == NULL */
+ QemuOptsList *create_opts;
+
+ /*
+ * Returns 0 for completed check, -errno for internal errors.
+ * The check results are stored in result.
+ */
+ int (*bdrv_check)(BlockDriverState* bs, BdrvCheckResult *result,
+ BdrvCheckMode fix);
+
+ int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts,
+ BlockDriverAmendStatusCB *status_cb);
+
+ void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event);
+
+ /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
+ int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
+ const char *tag);
+ int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs,
+ const char *tag);
+ int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
+ bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
+
+ void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp);
+
+ /*
+ * Returns 1 if newly created images are guaranteed to contain only
+ * zeros, 0 otherwise.
+ */
+ int (*bdrv_has_zero_init)(BlockDriverState *bs);
+
+ /* Remove fd handlers, timers, and other event loop callbacks so the event
+ * loop is no longer in use. Called with no in-flight requests and in
+ * depth-first traversal order with parents before child nodes.
+ */
+ void (*bdrv_detach_aio_context)(BlockDriverState *bs);
+
+ /* Add fd handlers, timers, and other event loop callbacks so I/O requests
+ * can be processed again. Called with no in-flight requests and in
+ * depth-first traversal order with child nodes before parent nodes.
+ */
+ void (*bdrv_attach_aio_context)(BlockDriverState *bs,
+ AioContext *new_context);
+
+ /* io queue for linux-aio */
+ void (*bdrv_io_plug)(BlockDriverState *bs);
+ void (*bdrv_io_unplug)(BlockDriverState *bs);
+ void (*bdrv_flush_io_queue)(BlockDriverState *bs);
+
+ /**
+ * Try to get @bs's logical and physical block size.
+ * On success, store them in @bsz and return zero.
+ * On failure, return negative errno.
+ */
+ int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz);
+ /**
+ * Try to get @bs's geometry (cyls, heads, sectors)
+ * On success, store them in @geo and return 0.
+ * On failure return -errno.
+ * Only drivers that want to override guest geometry implement this
+ * callback; see hd_geometry_guess().
+ */
+ int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);
+
+ QLIST_ENTRY(BlockDriver) list;
+};
+
+typedef struct BlockLimits {
+ /* maximum number of sectors that can be discarded at once */
+ int max_discard;
+
+ /* optimal alignment for discard requests in sectors */
+ int64_t discard_alignment;
+
+ /* maximum number of sectors that can zeroized at once */
+ int max_write_zeroes;
+
+ /* optimal alignment for write zeroes requests in sectors */
+ int64_t write_zeroes_alignment;
+
+ /* optimal transfer length in sectors */
+ int opt_transfer_length;
+
+ /* maximal transfer length in sectors */
+ int max_transfer_length;
+
+ /* memory alignment so that no bounce buffer is needed */
+ size_t min_mem_alignment;
+
+ /* memory alignment for bounce buffer */
+ size_t opt_mem_alignment;
+} BlockLimits;
+
+typedef struct BdrvOpBlocker BdrvOpBlocker;
+
+typedef struct BdrvAioNotifier {
+ void (*attached_aio_context)(AioContext *new_context, void *opaque);
+ void (*detach_aio_context)(void *opaque);
+
+ void *opaque;
+
+ QLIST_ENTRY(BdrvAioNotifier) list;
+} BdrvAioNotifier;
+
+struct BdrvChildRole {
+ int (*inherit_flags)(int parent_flags);
+};
+
+extern const BdrvChildRole child_file;
+extern const BdrvChildRole child_format;
+
+struct BdrvChild {
+ BlockDriverState *bs;
+ const BdrvChildRole *role;
+ QLIST_ENTRY(BdrvChild) next;
+};
+
+/*
+ * Note: the function bdrv_append() copies and swaps contents of
+ * BlockDriverStates, so if you add new fields to this struct, please
+ * inspect bdrv_append() to determine if the new fields need to be
+ * copied as well.
+ */
+struct BlockDriverState {
+ int64_t total_sectors; /* if we are reading a disk image, give its
+ size in sectors */
+ int read_only; /* if true, the media is read only */
+ int open_flags; /* flags used to open the file, re-used for re-open */
+ int encrypted; /* if true, the media is encrypted */
+ int valid_key; /* if true, a valid encryption key has been set */
+ int sg; /* if true, the device is a /dev/sg* */
+ int copy_on_read; /* if true, copy read backing sectors into image
+ note this is a reference count */
+ bool probed;
+
+ BlockDriver *drv; /* NULL means no media */
+ void *opaque;
+
+ BlockBackend *blk; /* owning backend, if any */
+
+ AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
+ /* long-running tasks intended to always use the same AioContext as this
+ * BDS may register themselves in this list to be notified of changes
+ * regarding this BDS's context */
+ QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
+
+ char filename[PATH_MAX];
+ char backing_file[PATH_MAX]; /* if non zero, the image is a diff of
+ this file image */
+ char backing_format[16]; /* if non-zero and backing_file exists */
+
+ QDict *full_open_options;
+ char exact_filename[PATH_MAX];
+
+ BlockDriverState *backing_hd;
+ BdrvChild *backing_child;
+ BlockDriverState *file;
+
+ NotifierList close_notifiers;
+
+ /* Callback before write request is processed */
+ NotifierWithReturnList before_write_notifiers;
+
+ /* number of in-flight serialising requests */
+ unsigned int serialising_in_flight;
+
+ /* I/O throttling */
+ CoQueue throttled_reqs[2];
+ bool io_limits_enabled;
+ /* The following fields are protected by the ThrottleGroup lock.
+ * See the ThrottleGroup documentation for details. */
+ ThrottleState *throttle_state;
+ ThrottleTimers throttle_timers;
+ unsigned pending_reqs[2];
+ QLIST_ENTRY(BlockDriverState) round_robin;
+
+ /* I/O stats (display with "info blockstats"). */
+ BlockAcctStats stats;
+
+ /* I/O Limits */
+ BlockLimits bl;
+
+ /* Whether produces zeros when read beyond eof */
+ bool zero_beyond_eof;
+
+ /* Alignment requirement for offset/length of I/O requests */
+ unsigned int request_alignment;
+
+ /* the block size for which the guest device expects atomicity */
+ int guest_block_size;
+
+ /* do we need to tell the quest if we have a volatile write cache? */
+ int enable_write_cache;
+
+ /* NOTE: the following infos are only hints for real hardware
+ drivers. They are not used by the block driver */
+ BlockdevOnError on_read_error, on_write_error;
+ bool iostatus_enabled;
+ BlockDeviceIoStatus iostatus;
+
+ /* the following member gives a name to every node on the bs graph. */
+ char node_name[32];
+ /* element of the list of named nodes building the graph */
+ QTAILQ_ENTRY(BlockDriverState) node_list;
+ /* element of the list of "drives" the guest sees */
+ QTAILQ_ENTRY(BlockDriverState) device_list;
+ QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
+ int refcnt;
+
+ QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
+
+ /* operation blockers */
+ QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX];
+
+ /* long-running background operation */
+ BlockJob *job;
+
+ /* The node that this node inherited default options from (and a reopen on
+ * which can affect this node by changing these defaults). This is always a
+ * parent node of this node. */
+ BlockDriverState *inherits_from;
+ QLIST_HEAD(, BdrvChild) children;
+
+ QDict *options;
+ BlockdevDetectZeroesOptions detect_zeroes;
+
+ /* The error object in use for blocking operations on backing_hd */
+ Error *backing_blocker;
+
+ /* threshold limit for writes, in bytes. "High water mark". */
+ uint64_t write_threshold_offset;
+ NotifierWithReturn write_threshold_notifier;
+};
+
+
+/* Essential block drivers which must always be statically linked into qemu, and
+ * which therefore can be accessed without using bdrv_find_format() */
+extern BlockDriver bdrv_file;
+extern BlockDriver bdrv_raw;
+extern BlockDriver bdrv_qcow2;
+
+/**
+ * bdrv_setup_io_funcs:
+ *
+ * Prepare a #BlockDriver for I/O request processing by populating
+ * unimplemented coroutine and AIO interfaces with generic wrapper functions
+ * that fall back to implemented interfaces.
+ */
+void bdrv_setup_io_funcs(BlockDriver *bdrv);
+
+int get_tmp_filename(char *filename, int size);
+BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
+ const char *filename);
+
+void bdrv_set_io_limits(BlockDriverState *bs,
+ ThrottleConfig *cfg);
+
+
+/**
+ * bdrv_add_before_write_notifier:
+ *
+ * Register a callback that is invoked before write requests are processed but
+ * after any throttling or waiting for overlapping requests.
+ */
+void bdrv_add_before_write_notifier(BlockDriverState *bs,
+ NotifierWithReturn *notifier);
+
+/**
+ * bdrv_detach_aio_context:
+ *
+ * May be called from .bdrv_detach_aio_context() to detach children from the
+ * current #AioContext. This is only needed by block drivers that manage their
+ * own children. Both ->file and ->backing_hd are automatically handled and
+ * block drivers should not call this function on them explicitly.
+ */
+void bdrv_detach_aio_context(BlockDriverState *bs);
+
+/**
+ * bdrv_attach_aio_context:
+ *
+ * May be called from .bdrv_attach_aio_context() to attach children to the new
+ * #AioContext. This is only needed by block drivers that manage their own
+ * children. Both ->file and ->backing_hd are automatically handled and block
+ * drivers should not call this function on them explicitly.
+ */
+void bdrv_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context);
+
+/**
+ * bdrv_add_aio_context_notifier:
+ *
+ * If a long-running job intends to be always run in the same AioContext as a
+ * certain BDS, it may use this function to be notified of changes regarding the
+ * association of the BDS to an AioContext.
+ *
+ * attached_aio_context() is called after the target BDS has been attached to a
+ * new AioContext; detach_aio_context() is called before the target BDS is being
+ * detached from its old AioContext.
+ */
+void bdrv_add_aio_context_notifier(BlockDriverState *bs,
+ void (*attached_aio_context)(AioContext *new_context, void *opaque),
+ void (*detach_aio_context)(void *opaque), void *opaque);
+
+/**
+ * bdrv_remove_aio_context_notifier:
+ *
+ * Unsubscribe of change notifications regarding the BDS's AioContext. The
+ * parameters given here have to be the same as those given to
+ * bdrv_add_aio_context_notifier().
+ */
+void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
+ void (*aio_context_attached)(AioContext *,
+ void *),
+ void (*aio_context_detached)(void *),
+ void *opaque);
+
+#ifdef _WIN32
+int is_windows_drive(const char *filename);
+#endif
+
+/**
+ * stream_start:
+ * @bs: Block device to operate on.
+ * @base: Block device that will become the new base, or %NULL to
+ * flatten the whole backing file chain onto @bs.
+ * @base_id: The file name that will be written to @bs as the new
+ * backing file if the job completes. Ignored if @base is %NULL.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Start a streaming operation on @bs. Clusters that are unallocated
+ * in @bs, but allocated in any image between @base and @bs (both
+ * exclusive) will be written to @bs. At the end of a successful
+ * streaming job, the backing file of @bs will be changed to
+ * @base_id in the written image and to @base in the live BlockDriverState.
+ */
+void stream_start(BlockDriverState *bs, BlockDriverState *base,
+ const char *base_id, int64_t speed, BlockdevOnError on_error,
+ BlockCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/**
+ * commit_start:
+ * @bs: Active block device.
+ * @top: Top block device to be committed.
+ * @base: Block device that will be written into, and become the new top.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @backing_file_str: String to use as the backing file in @top's overlay
+ * @errp: Error object.
+ *
+ */
+void commit_start(BlockDriverState *bs, BlockDriverState *base,
+ BlockDriverState *top, int64_t speed,
+ BlockdevOnError on_error, BlockCompletionFunc *cb,
+ void *opaque, const char *backing_file_str, Error **errp);
+/**
+ * commit_active_start:
+ * @bs: Active block device to be committed.
+ * @base: Block device that will be written into, and become the new top.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ */
+void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
+ int64_t speed,
+ BlockdevOnError on_error,
+ BlockCompletionFunc *cb,
+ void *opaque, Error **errp);
+/*
+ * mirror_start:
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @replaces: Block graph node name to replace once the mirror is done. Can
+ * only be used when full mirroring is selected.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @granularity: The chosen granularity for the dirty bitmap.
+ * @buf_size: The amount of data that can be in flight at one time.
+ * @mode: Whether to collapse all images in the chain to the target.
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @unmap: Whether to unmap target where source sectors only contain zeroes.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Start a mirroring operation on @bs. Clusters that are allocated
+ * in @bs will be written to @bs until the job is cancelled or
+ * manually completed. At the end of a successful mirroring job,
+ * @bs will be switched to read from @target.
+ */
+void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+ const char *replaces,
+ int64_t speed, uint32_t granularity, int64_t buf_size,
+ MirrorSyncMode mode, BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ bool unmap,
+ BlockCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/*
+ * backup_start:
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @sync_mode: What parts of the disk image should be copied to the destination.
+ * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_INCREMENTAL.
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ *
+ * Start a backup operation on @bs. Clusters in @bs are written to @target
+ * until the job is cancelled or manually completed.
+ */
+void backup_start(BlockDriverState *bs, BlockDriverState *target,
+ int64_t speed, MirrorSyncMode sync_mode,
+ BdrvDirtyBitmap *sync_bitmap,
+ BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ BlockCompletionFunc *cb, void *opaque,
+ Error **errp);
+
+void blk_dev_change_media_cb(BlockBackend *blk, bool load);
+bool blk_dev_has_removable_media(BlockBackend *blk);
+void blk_dev_eject_request(BlockBackend *blk, bool force);
+bool blk_dev_is_tray_open(BlockBackend *blk);
+bool blk_dev_is_medium_locked(BlockBackend *blk);
+void blk_dev_resize_cb(BlockBackend *blk);
+
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+
+#endif /* BLOCK_INT_H */
diff --git a/qemu/include/block/blockjob.h b/qemu/include/block/blockjob.h
new file mode 100644
index 000000000..dd9d5e6aa
--- /dev/null
+++ b/qemu/include/block/blockjob.h
@@ -0,0 +1,359 @@
+/*
+ * Declarations for long-running block device operations
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCKJOB_H
+#define BLOCKJOB_H 1
+
+#include "block/block.h"
+
+/**
+ * BlockJobDriver:
+ *
+ * A class type for block job driver.
+ */
+typedef struct BlockJobDriver {
+ /** Derived BlockJob struct size */
+ size_t instance_size;
+
+ /** String describing the operation, part of query-block-jobs QMP API */
+ BlockJobType job_type;
+
+ /** Optional callback for job types that support setting a speed limit */
+ void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
+
+ /** Optional callback for job types that need to forward I/O status reset */
+ void (*iostatus_reset)(BlockJob *job);
+
+ /**
+ * Optional callback for job types whose completion must be triggered
+ * manually.
+ */
+ void (*complete)(BlockJob *job, Error **errp);
+} BlockJobDriver;
+
+/**
+ * BlockJob:
+ *
+ * Long-running operation on a BlockDriverState.
+ */
+struct BlockJob {
+ /** The job type, including the job vtable. */
+ const BlockJobDriver *driver;
+
+ /** The block device on which the job is operating. */
+ BlockDriverState *bs;
+
+ /**
+ * The coroutine that executes the job. If not NULL, it is
+ * reentered when busy is false and the job is cancelled.
+ */
+ Coroutine *co;
+
+ /**
+ * Set to true if the job should cancel itself. The flag must
+ * always be tested just before toggling the busy flag from false
+ * to true. After a job has been cancelled, it should only yield
+ * if #aio_poll will ("sooner or later") reenter the coroutine.
+ */
+ bool cancelled;
+
+ /**
+ * Counter for pause request. If non-zero, the block job is either paused,
+ * or if busy == true will pause itself as soon as possible.
+ */
+ int pause_count;
+
+ /**
+ * Set to true if the job is paused by user. Can be unpaused with the
+ * block-job-resume QMP command.
+ */
+ bool user_paused;
+
+ /**
+ * Set to false by the job while it is in a quiescent state, where
+ * no I/O is pending and the job has yielded on any condition
+ * that is not detected by #aio_poll, such as a timer.
+ */
+ bool busy;
+
+ /**
+ * Set to true when the job is ready to be completed.
+ */
+ bool ready;
+
+ /** Status that is published by the query-block-jobs QMP API */
+ BlockDeviceIoStatus iostatus;
+
+ /** Offset that is published by the query-block-jobs QMP API */
+ int64_t offset;
+
+ /** Length that is published by the query-block-jobs QMP API */
+ int64_t len;
+
+ /** Speed that was set with @block_job_set_speed. */
+ int64_t speed;
+
+ /** The completion function that will be called when the job completes. */
+ BlockCompletionFunc *cb;
+
+ /** Block other operations when block job is running */
+ Error *blocker;
+
+ /** The opaque value that is passed to the completion function. */
+ void *opaque;
+};
+
+/**
+ * block_job_create:
+ * @job_type: The class object for the newly-created job.
+ * @bs: The block
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Create a new long-running block device job and return it. The job
+ * will call @cb asynchronously when the job completes. Note that
+ * @bs may have been closed at the time the @cb it is called. If
+ * this is the case, the job may be reported as either cancelled or
+ * completed.
+ *
+ * This function is not part of the public job interface; it should be
+ * called from a wrapper that is specific to the job type.
+ */
+void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
+ int64_t speed, BlockCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/**
+ * block_job_sleep_ns:
+ * @job: The job that calls the function.
+ * @clock: The clock to sleep on.
+ * @ns: How many nanoseconds to stop for.
+ *
+ * Put the job to sleep (assuming that it wasn't canceled) for @ns
+ * nanoseconds. Canceling the job will interrupt the wait immediately.
+ */
+void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns);
+
+/**
+ * block_job_yield:
+ * @job: The job that calls the function.
+ *
+ * Yield the block job coroutine.
+ */
+void block_job_yield(BlockJob *job);
+
+/**
+ * block_job_release:
+ * @bs: The block device.
+ *
+ * Release job resources when an error occurred or job completed.
+ */
+void block_job_release(BlockDriverState *bs);
+
+/**
+ * block_job_completed:
+ * @job: The job being completed.
+ * @ret: The status code.
+ *
+ * Call the completion function that was registered at creation time, and
+ * free @job.
+ */
+void block_job_completed(BlockJob *job, int ret);
+
+/**
+ * block_job_set_speed:
+ * @job: The job to set the speed for.
+ * @speed: The new value
+ * @errp: Error object.
+ *
+ * Set a rate-limiting parameter for the job; the actual meaning may
+ * vary depending on the job type.
+ */
+void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
+
+/**
+ * block_job_cancel:
+ * @job: The job to be canceled.
+ *
+ * Asynchronously cancel the specified job.
+ */
+void block_job_cancel(BlockJob *job);
+
+/**
+ * block_job_complete:
+ * @job: The job to be completed.
+ * @errp: Error object.
+ *
+ * Asynchronously complete the specified job.
+ */
+void block_job_complete(BlockJob *job, Error **errp);
+
+/**
+ * block_job_is_cancelled:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is scheduled for cancellation.
+ */
+bool block_job_is_cancelled(BlockJob *job);
+
+/**
+ * block_job_query:
+ * @job: The job to get information about.
+ *
+ * Return information about a job.
+ */
+BlockJobInfo *block_job_query(BlockJob *job);
+
+/**
+ * block_job_pause:
+ * @job: The job to be paused.
+ *
+ * Asynchronously pause the specified job.
+ */
+void block_job_pause(BlockJob *job);
+
+/**
+ * block_job_resume:
+ * @job: The job to be resumed.
+ *
+ * Resume the specified job. Must be paired with a preceding block_job_pause.
+ */
+void block_job_resume(BlockJob *job);
+
+/**
+ * block_job_enter:
+ * @job: The job to enter.
+ *
+ * Continue the specified job by entering the coroutine.
+ */
+void block_job_enter(BlockJob *job);
+
+/**
+ * block_job_event_cancelled:
+ * @job: The job whose information is requested.
+ *
+ * Send a BLOCK_JOB_CANCELLED event for the specified job.
+ */
+void block_job_event_cancelled(BlockJob *job);
+
+/**
+ * block_job_ready:
+ * @job: The job which is now ready to complete.
+ * @msg: Error message. Only present on failure.
+ *
+ * Send a BLOCK_JOB_COMPLETED event for the specified job.
+ */
+void block_job_event_completed(BlockJob *job, const char *msg);
+
+/**
+ * block_job_ready:
+ * @job: The job which is now ready to complete.
+ *
+ * Send a BLOCK_JOB_READY event for the specified job.
+ */
+void block_job_event_ready(BlockJob *job);
+
+/**
+ * block_job_is_paused:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is currently paused, or will pause
+ * as soon as it reaches a sleeping point.
+ */
+bool block_job_is_paused(BlockJob *job);
+
+/**
+ * block_job_cancel_sync:
+ * @job: The job to be canceled.
+ *
+ * Synchronously cancel the job. The completion callback is called
+ * before the function returns. The job may actually complete
+ * instead of canceling itself; the circumstances under which this
+ * happens depend on the kind of job that is active.
+ *
+ * Returns the return value from the job if the job actually completed
+ * during the call, or -ECANCELED if it was canceled.
+ */
+int block_job_cancel_sync(BlockJob *job);
+
+/**
+ * block_job_complete_sync:
+ * @job: The job to be completed.
+ * @errp: Error object which may be set by block_job_complete(); this is not
+ * necessarily set on every error, the job return value has to be
+ * checked as well.
+ *
+ * Synchronously complete the job. The completion callback is called before the
+ * function returns, unless it is NULL (which is permissible when using this
+ * function).
+ *
+ * Returns the return value from the job.
+ */
+int block_job_complete_sync(BlockJob *job, Error **errp);
+
+/**
+ * block_job_iostatus_reset:
+ * @job: The job whose I/O status should be reset.
+ *
+ * Reset I/O status on @job and on BlockDriverState objects it uses,
+ * other than job->bs.
+ */
+void block_job_iostatus_reset(BlockJob *job);
+
+/**
+ * block_job_error_action:
+ * @job: The job to signal an error for.
+ * @bs: The block device on which to set an I/O error.
+ * @on_err: The error action setting.
+ * @is_read: Whether the operation was a read.
+ * @error: The error that was reported.
+ *
+ * Report an I/O error for a block job and possibly stop the VM. Return the
+ * action that was selected based on @on_err and @error.
+ */
+BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
+ BlockdevOnError on_err,
+ int is_read, int error);
+
+typedef void BlockJobDeferToMainLoopFn(BlockJob *job, void *opaque);
+
+/**
+ * block_job_defer_to_main_loop:
+ * @job: The job
+ * @fn: The function to run in the main loop
+ * @opaque: The opaque value that is passed to @fn
+ *
+ * Execute a given function in the main loop with the BlockDriverState
+ * AioContext acquired. Block jobs must call bdrv_unref(), bdrv_close(), and
+ * anything that uses bdrv_drain_all() in the main loop.
+ *
+ * The @job AioContext is held while @fn executes.
+ */
+void block_job_defer_to_main_loop(BlockJob *job,
+ BlockJobDeferToMainLoopFn *fn,
+ void *opaque);
+
+#endif
diff --git a/qemu/include/block/coroutine.h b/qemu/include/block/coroutine.h
new file mode 100644
index 000000000..20c027a7f
--- /dev/null
+++ b/qemu/include/block/coroutine.h
@@ -0,0 +1,219 @@
+/*
+ * QEMU coroutine implementation
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Kevin Wolf <kwolf@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_COROUTINE_H
+#define QEMU_COROUTINE_H
+
+#include <stdbool.h>
+#include "qemu/typedefs.h"
+#include "qemu/queue.h"
+#include "qemu/timer.h"
+
+/**
+ * Coroutines are a mechanism for stack switching and can be used for
+ * cooperative userspace threading. These functions provide a simple but
+ * useful flavor of coroutines that is suitable for writing sequential code,
+ * rather than callbacks, for operations that need to give up control while
+ * waiting for events to complete.
+ *
+ * These functions are re-entrant and may be used outside the global mutex.
+ */
+
+/**
+ * Mark a function that executes in coroutine context
+ *
+ * Functions that execute in coroutine context cannot be called directly from
+ * normal functions. In the future it would be nice to enable compiler or
+ * static checker support for catching such errors. This annotation might make
+ * it possible and in the meantime it serves as documentation.
+ *
+ * For example:
+ *
+ * static void coroutine_fn foo(void) {
+ * ....
+ * }
+ */
+#define coroutine_fn
+
+typedef struct Coroutine Coroutine;
+
+/**
+ * Coroutine entry point
+ *
+ * When the coroutine is entered for the first time, opaque is passed in as an
+ * argument.
+ *
+ * When this function returns, the coroutine is destroyed automatically and
+ * execution continues in the caller who last entered the coroutine.
+ */
+typedef void coroutine_fn CoroutineEntry(void *opaque);
+
+/**
+ * Create a new coroutine
+ *
+ * Use qemu_coroutine_enter() to actually transfer control to the coroutine.
+ */
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry);
+
+/**
+ * Transfer control to a coroutine
+ *
+ * The opaque argument is passed as the argument to the entry point when
+ * entering the coroutine for the first time. It is subsequently ignored.
+ */
+void qemu_coroutine_enter(Coroutine *coroutine, void *opaque);
+
+/**
+ * Transfer control back to a coroutine's caller
+ *
+ * This function does not return until the coroutine is re-entered using
+ * qemu_coroutine_enter().
+ */
+void coroutine_fn qemu_coroutine_yield(void);
+
+/**
+ * Get the currently executing coroutine
+ */
+Coroutine *coroutine_fn qemu_coroutine_self(void);
+
+/**
+ * Return whether or not currently inside a coroutine
+ *
+ * This can be used to write functions that work both when in coroutine context
+ * and when not in coroutine context. Note that such functions cannot use the
+ * coroutine_fn annotation since they work outside coroutine context.
+ */
+bool qemu_in_coroutine(void);
+
+
+
+/**
+ * CoQueues are a mechanism to queue coroutines in order to continue executing
+ * them later. They provide the fundamental primitives on which coroutine locks
+ * are built.
+ */
+typedef struct CoQueue {
+ QTAILQ_HEAD(, Coroutine) entries;
+} CoQueue;
+
+/**
+ * Initialise a CoQueue. This must be called before any other operation is used
+ * on the CoQueue.
+ */
+void qemu_co_queue_init(CoQueue *queue);
+
+/**
+ * Adds the current coroutine to the CoQueue and transfers control to the
+ * caller of the coroutine.
+ */
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
+
+/**
+ * Restarts the next coroutine in the CoQueue and removes it from the queue.
+ *
+ * Returns true if a coroutine was restarted, false if the queue is empty.
+ */
+bool coroutine_fn qemu_co_queue_next(CoQueue *queue);
+
+/**
+ * Restarts all coroutines in the CoQueue and leaves the queue empty.
+ */
+void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue);
+
+/**
+ * Enter the next coroutine in the queue
+ */
+bool qemu_co_enter_next(CoQueue *queue);
+
+/**
+ * Checks if the CoQueue is empty.
+ */
+bool qemu_co_queue_empty(CoQueue *queue);
+
+
+/**
+ * Provides a mutex that can be used to synchronise coroutines
+ */
+typedef struct CoMutex {
+ bool locked;
+ CoQueue queue;
+} CoMutex;
+
+/**
+ * Initialises a CoMutex. This must be called before any other operation is used
+ * on the CoMutex.
+ */
+void qemu_co_mutex_init(CoMutex *mutex);
+
+/**
+ * Locks the mutex. If the lock cannot be taken immediately, control is
+ * transferred to the caller of the current coroutine.
+ */
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
+
+/**
+ * Unlocks the mutex and schedules the next coroutine that was waiting for this
+ * lock to be run.
+ */
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
+
+typedef struct CoRwlock {
+ bool writer;
+ int reader;
+ CoQueue queue;
+} CoRwlock;
+
+/**
+ * Initialises a CoRwlock. This must be called before any other operation
+ * is used on the CoRwlock
+ */
+void qemu_co_rwlock_init(CoRwlock *lock);
+
+/**
+ * Read locks the CoRwlock. If the lock cannot be taken immediately because
+ * of a parallel writer, control is transferred to the caller of the current
+ * coroutine.
+ */
+void qemu_co_rwlock_rdlock(CoRwlock *lock);
+
+/**
+ * Write Locks the mutex. If the lock cannot be taken immediately because
+ * of a parallel reader, control is transferred to the caller of the current
+ * coroutine.
+ */
+void qemu_co_rwlock_wrlock(CoRwlock *lock);
+
+/**
+ * Unlocks the read/write lock and schedules the next coroutine that was
+ * waiting for this lock to be run.
+ */
+void qemu_co_rwlock_unlock(CoRwlock *lock);
+
+/**
+ * Yield the coroutine for a given duration
+ *
+ * Behaves similarly to co_sleep_ns(), but the sleeping coroutine will be
+ * resumed when using aio_poll().
+ */
+void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
+ int64_t ns);
+
+/**
+ * Yield until a file descriptor becomes readable
+ *
+ * Note that this function clobbers the handlers for the file descriptor.
+ */
+void coroutine_fn yield_until_fd_readable(int fd);
+
+#endif /* QEMU_COROUTINE_H */
diff --git a/qemu/include/block/coroutine_int.h b/qemu/include/block/coroutine_int.h
new file mode 100644
index 000000000..9aa1aae5d
--- /dev/null
+++ b/qemu/include/block/coroutine_int.h
@@ -0,0 +1,54 @@
+/*
+ * Coroutine internals
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_COROUTINE_INT_H
+#define QEMU_COROUTINE_INT_H
+
+#include "qemu/queue.h"
+#include "block/coroutine.h"
+
+typedef enum {
+ COROUTINE_YIELD = 1,
+ COROUTINE_TERMINATE = 2,
+ COROUTINE_ENTER = 3,
+} CoroutineAction;
+
+struct Coroutine {
+ CoroutineEntry *entry;
+ void *entry_arg;
+ Coroutine *caller;
+ QSLIST_ENTRY(Coroutine) pool_next;
+
+ /* Coroutines that should be woken up when we yield or terminate */
+ QTAILQ_HEAD(, Coroutine) co_queue_wakeup;
+ QTAILQ_ENTRY(Coroutine) co_queue_next;
+};
+
+Coroutine *qemu_coroutine_new(void);
+void qemu_coroutine_delete(Coroutine *co);
+CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to,
+ CoroutineAction action);
+void coroutine_fn qemu_co_queue_run_restart(Coroutine *co);
+
+#endif
diff --git a/qemu/include/block/nbd.h b/qemu/include/block/nbd.h
new file mode 100644
index 000000000..65f409d80
--- /dev/null
+++ b/qemu/include/block/nbd.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws>
+ *
+ * Network Block Device
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef NBD_H
+#define NBD_H
+
+#include <sys/types.h>
+
+#include "qemu-common.h"
+#include "qemu/option.h"
+
+struct nbd_request {
+ uint32_t magic;
+ uint32_t type;
+ uint64_t handle;
+ uint64_t from;
+ uint32_t len;
+} QEMU_PACKED;
+
+struct nbd_reply {
+ uint32_t magic;
+ uint32_t error;
+ uint64_t handle;
+} QEMU_PACKED;
+
+#define NBD_FLAG_HAS_FLAGS (1 << 0) /* Flags are there */
+#define NBD_FLAG_READ_ONLY (1 << 1) /* Device is read-only */
+#define NBD_FLAG_SEND_FLUSH (1 << 2) /* Send FLUSH */
+#define NBD_FLAG_SEND_FUA (1 << 3) /* Send FUA (Force Unit Access) */
+#define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - rotational media */
+#define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */
+
+/* New-style global flags. */
+#define NBD_FLAG_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */
+
+/* New-style client flags. */
+#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */
+
+/* Reply types. */
+#define NBD_REP_ACK (1) /* Data sending finished. */
+#define NBD_REP_SERVER (2) /* Export description. */
+#define NBD_REP_ERR_UNSUP ((UINT32_C(1) << 31) | 1) /* Unknown option. */
+#define NBD_REP_ERR_INVALID ((UINT32_C(1) << 31) | 3) /* Invalid length. */
+
+#define NBD_CMD_MASK_COMMAND 0x0000ffff
+#define NBD_CMD_FLAG_FUA (1 << 16)
+
+enum {
+ NBD_CMD_READ = 0,
+ NBD_CMD_WRITE = 1,
+ NBD_CMD_DISC = 2,
+ NBD_CMD_FLUSH = 3,
+ NBD_CMD_TRIM = 4
+};
+
+#define NBD_DEFAULT_PORT 10809
+
+/* Maximum size of a single READ/WRITE data buffer */
+#define NBD_MAX_BUFFER_SIZE (32 * 1024 * 1024)
+
+ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read);
+int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
+ off_t *size, Error **errp);
+int nbd_init(int fd, int csock, uint32_t flags, off_t size);
+ssize_t nbd_send_request(int csock, struct nbd_request *request);
+ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply);
+int nbd_client(int fd);
+int nbd_disconnect(int fd);
+
+typedef struct NBDExport NBDExport;
+typedef struct NBDClient NBDClient;
+
+NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size,
+ uint32_t nbdflags, void (*close)(NBDExport *),
+ Error **errp);
+void nbd_export_close(NBDExport *exp);
+void nbd_export_get(NBDExport *exp);
+void nbd_export_put(NBDExport *exp);
+
+BlockBackend *nbd_export_get_blockdev(NBDExport *exp);
+
+NBDExport *nbd_export_find(const char *name);
+void nbd_export_set_name(NBDExport *exp, const char *name);
+void nbd_export_close_all(void);
+
+NBDClient *nbd_client_new(NBDExport *exp, int csock,
+ void (*close)(NBDClient *));
+void nbd_client_get(NBDClient *client);
+void nbd_client_put(NBDClient *client);
+
+#endif
diff --git a/qemu/include/block/qapi.h b/qemu/include/block/qapi.h
new file mode 100644
index 000000000..327549d91
--- /dev/null
+++ b/qemu/include/block/qapi.h
@@ -0,0 +1,46 @@
+/*
+ * Block layer qmp and info dump related functions
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef BLOCK_QAPI_H
+#define BLOCK_QAPI_H
+
+#include "qapi-types.h"
+#include "block/block.h"
+#include "block/snapshot.h"
+
+BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp);
+int bdrv_query_snapshot_info_list(BlockDriverState *bs,
+ SnapshotInfoList **p_list,
+ Error **errp);
+void bdrv_query_image_info(BlockDriverState *bs,
+ ImageInfo **p_info,
+ Error **errp);
+
+void bdrv_snapshot_dump(fprintf_function func_fprintf, void *f,
+ QEMUSnapshotInfo *sn);
+void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
+ ImageInfoSpecific *info_spec);
+void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,
+ ImageInfo *info);
+#endif
diff --git a/qemu/include/block/scsi.h b/qemu/include/block/scsi.h
new file mode 100644
index 000000000..edde960d1
--- /dev/null
+++ b/qemu/include/block/scsi.h
@@ -0,0 +1,309 @@
+/* Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ * This header file contains public constants and structures used by
+ * the scsi code for linux.
+ */
+#ifndef HW_SCSI_DEFS_H
+#define HW_SCSI_DEFS_H 1
+
+/*
+ * SCSI opcodes
+ */
+
+#define TEST_UNIT_READY 0x00
+#define REWIND 0x01
+#define REQUEST_SENSE 0x03
+#define FORMAT_UNIT 0x04
+#define READ_BLOCK_LIMITS 0x05
+#define INITIALIZE_ELEMENT_STATUS 0x07
+#define REASSIGN_BLOCKS 0x07
+#define READ_6 0x08
+#define WRITE_6 0x0a
+#define SET_CAPACITY 0x0b
+#define READ_REVERSE 0x0f
+#define WRITE_FILEMARKS 0x10
+#define SPACE 0x11
+#define INQUIRY 0x12
+#define RECOVER_BUFFERED_DATA 0x14
+#define MODE_SELECT 0x15
+#define RESERVE 0x16
+#define RELEASE 0x17
+#define COPY 0x18
+#define ERASE 0x19
+#define MODE_SENSE 0x1a
+#define LOAD_UNLOAD 0x1b
+#define START_STOP 0x1b
+#define RECEIVE_DIAGNOSTIC 0x1c
+#define SEND_DIAGNOSTIC 0x1d
+#define ALLOW_MEDIUM_REMOVAL 0x1e
+#define READ_CAPACITY_10 0x25
+#define READ_10 0x28
+#define WRITE_10 0x2a
+#define SEEK_10 0x2b
+#define LOCATE_10 0x2b
+#define POSITION_TO_ELEMENT 0x2b
+#define WRITE_VERIFY_10 0x2e
+#define VERIFY_10 0x2f
+#define SEARCH_HIGH 0x30
+#define SEARCH_EQUAL 0x31
+#define SEARCH_LOW 0x32
+#define SET_LIMITS 0x33
+#define PRE_FETCH 0x34
+#define READ_POSITION 0x34
+#define SYNCHRONIZE_CACHE 0x35
+#define LOCK_UNLOCK_CACHE 0x36
+#define INITIALIZE_ELEMENT_STATUS_WITH_RANGE 0x37
+#define READ_DEFECT_DATA 0x37
+#define MEDIUM_SCAN 0x38
+#define COMPARE 0x39
+#define COPY_VERIFY 0x3a
+#define WRITE_BUFFER 0x3b
+#define READ_BUFFER 0x3c
+#define UPDATE_BLOCK 0x3d
+#define READ_LONG_10 0x3e
+#define WRITE_LONG_10 0x3f
+#define CHANGE_DEFINITION 0x40
+#define WRITE_SAME_10 0x41
+#define UNMAP 0x42
+#define READ_TOC 0x43
+#define REPORT_DENSITY_SUPPORT 0x44
+#define GET_CONFIGURATION 0x46
+#define SANITIZE 0x48
+#define GET_EVENT_STATUS_NOTIFICATION 0x4a
+#define LOG_SELECT 0x4c
+#define LOG_SENSE 0x4d
+#define READ_DISC_INFORMATION 0x51
+#define RESERVE_TRACK 0x53
+#define MODE_SELECT_10 0x55
+#define RESERVE_10 0x56
+#define RELEASE_10 0x57
+#define MODE_SENSE_10 0x5a
+#define SEND_CUE_SHEET 0x5d
+#define PERSISTENT_RESERVE_IN 0x5e
+#define PERSISTENT_RESERVE_OUT 0x5f
+#define VARLENGTH_CDB 0x7f
+#define WRITE_FILEMARKS_16 0x80
+#define READ_REVERSE_16 0x81
+#define ALLOW_OVERWRITE 0x82
+#define EXTENDED_COPY 0x83
+#define ATA_PASSTHROUGH_16 0x85
+#define ACCESS_CONTROL_IN 0x86
+#define ACCESS_CONTROL_OUT 0x87
+#define READ_16 0x88
+#define COMPARE_AND_WRITE 0x89
+#define WRITE_16 0x8a
+#define WRITE_VERIFY_16 0x8e
+#define VERIFY_16 0x8f
+#define PRE_FETCH_16 0x90
+#define SPACE_16 0x91
+#define SYNCHRONIZE_CACHE_16 0x91
+#define LOCATE_16 0x92
+#define WRITE_SAME_16 0x93
+#define ERASE_16 0x93
+#define SERVICE_ACTION_IN_16 0x9e
+#define WRITE_LONG_16 0x9f
+#define REPORT_LUNS 0xa0
+#define ATA_PASSTHROUGH_12 0xa1
+#define MAINTENANCE_IN 0xa3
+#define MAINTENANCE_OUT 0xa4
+#define MOVE_MEDIUM 0xa5
+#define EXCHANGE_MEDIUM 0xa6
+#define SET_READ_AHEAD 0xa7
+#define READ_12 0xa8
+#define WRITE_12 0xaa
+#define SERVICE_ACTION_IN_12 0xab
+#define ERASE_12 0xac
+#define READ_DVD_STRUCTURE 0xad
+#define WRITE_VERIFY_12 0xae
+#define VERIFY_12 0xaf
+#define SEARCH_HIGH_12 0xb0
+#define SEARCH_EQUAL_12 0xb1
+#define SEARCH_LOW_12 0xb2
+#define READ_ELEMENT_STATUS 0xb8
+#define SEND_VOLUME_TAG 0xb6
+#define READ_DEFECT_DATA_12 0xb7
+#define SET_CD_SPEED 0xbb
+#define MECHANISM_STATUS 0xbd
+#define READ_CD 0xbe
+#define SEND_DVD_STRUCTURE 0xbf
+
+const char *scsi_command_name(uint8_t cmd);
+
+/*
+ * SERVICE ACTION IN subcodes
+ */
+#define SAI_READ_CAPACITY_16 0x10
+
+/*
+ * READ POSITION service action codes
+ */
+#define SHORT_FORM_BLOCK_ID 0x00
+#define SHORT_FORM_VENDOR_SPECIFIC 0x01
+#define LONG_FORM 0x06
+#define EXTENDED_FORM 0x08
+
+/*
+ * SAM Status codes
+ */
+
+#define GOOD 0x00
+#define CHECK_CONDITION 0x02
+#define CONDITION_GOOD 0x04
+#define BUSY 0x08
+#define INTERMEDIATE_GOOD 0x10
+#define INTERMEDIATE_C_GOOD 0x14
+#define RESERVATION_CONFLICT 0x18
+#define COMMAND_TERMINATED 0x22
+#define TASK_SET_FULL 0x28
+#define ACA_ACTIVE 0x30
+#define TASK_ABORTED 0x40
+
+#define STATUS_MASK 0x3e
+
+/*
+ * SENSE KEYS
+ */
+
+#define NO_SENSE 0x00
+#define RECOVERED_ERROR 0x01
+#define NOT_READY 0x02
+#define MEDIUM_ERROR 0x03
+#define HARDWARE_ERROR 0x04
+#define ILLEGAL_REQUEST 0x05
+#define UNIT_ATTENTION 0x06
+#define DATA_PROTECT 0x07
+#define BLANK_CHECK 0x08
+#define COPY_ABORTED 0x0a
+#define ABORTED_COMMAND 0x0b
+#define VOLUME_OVERFLOW 0x0d
+#define MISCOMPARE 0x0e
+
+
+/*
+ * DEVICE TYPES
+ */
+
+#define TYPE_DISK 0x00
+#define TYPE_TAPE 0x01
+#define TYPE_PRINTER 0x02
+#define TYPE_PROCESSOR 0x03 /* HP scanners use this */
+#define TYPE_WORM 0x04 /* Treated as ROM by our system */
+#define TYPE_ROM 0x05
+#define TYPE_SCANNER 0x06
+#define TYPE_MOD 0x07 /* Magneto-optical disk -
+ * - treated as TYPE_DISK */
+#define TYPE_MEDIUM_CHANGER 0x08
+#define TYPE_STORAGE_ARRAY 0x0c /* Storage array device */
+#define TYPE_ENCLOSURE 0x0d /* Enclosure Services Device */
+#define TYPE_RBC 0x0e /* Simplified Direct-Access Device */
+#define TYPE_OSD 0x11 /* Object-storage Device */
+#define TYPE_WLUN 0x1e /* Well known LUN */
+#define TYPE_NOT_PRESENT 0x1f
+#define TYPE_INACTIVE 0x20
+#define TYPE_NO_LUN 0x7f
+
+/* Mode page codes for mode sense/set */
+#define MODE_PAGE_R_W_ERROR 0x01
+#define MODE_PAGE_HD_GEOMETRY 0x04
+#define MODE_PAGE_FLEXIBLE_DISK_GEOMETRY 0x05
+#define MODE_PAGE_CACHING 0x08
+#define MODE_PAGE_AUDIO_CTL 0x0e
+#define MODE_PAGE_POWER 0x1a
+#define MODE_PAGE_FAULT_FAIL 0x1c
+#define MODE_PAGE_TO_PROTECT 0x1d
+#define MODE_PAGE_CAPABILITIES 0x2a
+#define MODE_PAGE_ALLS 0x3f
+/* Not in Mt. Fuji, but in ATAPI 2.6 -- depricated now in favor
+ * of MODE_PAGE_SENSE_POWER */
+#define MODE_PAGE_CDROM 0x0d
+
+/* Event notification classes for GET EVENT STATUS NOTIFICATION */
+#define GESN_NO_EVENTS 0
+#define GESN_OPERATIONAL_CHANGE 1
+#define GESN_POWER_MANAGEMENT 2
+#define GESN_EXTERNAL_REQUEST 3
+#define GESN_MEDIA 4
+#define GESN_MULTIPLE_HOSTS 5
+#define GESN_DEVICE_BUSY 6
+
+/* Event codes for MEDIA event status notification */
+#define MEC_NO_CHANGE 0
+#define MEC_EJECT_REQUESTED 1
+#define MEC_NEW_MEDIA 2
+#define MEC_MEDIA_REMOVAL 3 /* only for media changers */
+#define MEC_MEDIA_CHANGED 4 /* only for media changers */
+#define MEC_BG_FORMAT_COMPLETED 5 /* MRW or DVD+RW b/g format completed */
+#define MEC_BG_FORMAT_RESTARTED 6 /* MRW or DVD+RW b/g format restarted */
+
+#define MS_TRAY_OPEN 1
+#define MS_MEDIA_PRESENT 2
+
+/*
+ * Based on values from <linux/cdrom.h> but extending CD_MINS
+ * to the maximum common size allowed by the Orange's Book ATIP
+ *
+ * 90 and 99 min CDs are also available but using them as the
+ * upper limit reduces the effectiveness of the heuristic to
+ * detect DVDs burned to less than 25% of their maximum capacity
+ */
+
+/* Some generally useful CD-ROM information */
+#define CD_MINS 80 /* max. minutes per CD */
+#define CD_SECS 60 /* seconds per minute */
+#define CD_FRAMES 75 /* frames per second */
+#define CD_FRAMESIZE 2048 /* bytes per frame, "cooked" mode */
+#define CD_MAX_BYTES (CD_MINS * CD_SECS * CD_FRAMES * CD_FRAMESIZE)
+#define CD_MAX_SECTORS (CD_MAX_BYTES / 512)
+
+/*
+ * The MMC values are not IDE specific and might need to be moved
+ * to a common header if they are also needed for the SCSI emulation
+ */
+
+/* Profile list from MMC-6 revision 1 table 91 */
+#define MMC_PROFILE_NONE 0x0000
+#define MMC_PROFILE_CD_ROM 0x0008
+#define MMC_PROFILE_CD_R 0x0009
+#define MMC_PROFILE_CD_RW 0x000A
+#define MMC_PROFILE_DVD_ROM 0x0010
+#define MMC_PROFILE_DVD_R_SR 0x0011
+#define MMC_PROFILE_DVD_RAM 0x0012
+#define MMC_PROFILE_DVD_RW_RO 0x0013
+#define MMC_PROFILE_DVD_RW_SR 0x0014
+#define MMC_PROFILE_DVD_R_DL_SR 0x0015
+#define MMC_PROFILE_DVD_R_DL_JR 0x0016
+#define MMC_PROFILE_DVD_RW_DL 0x0017
+#define MMC_PROFILE_DVD_DDR 0x0018
+#define MMC_PROFILE_DVD_PLUS_RW 0x001A
+#define MMC_PROFILE_DVD_PLUS_R 0x001B
+#define MMC_PROFILE_DVD_PLUS_RW_DL 0x002A
+#define MMC_PROFILE_DVD_PLUS_R_DL 0x002B
+#define MMC_PROFILE_BD_ROM 0x0040
+#define MMC_PROFILE_BD_R_SRM 0x0041
+#define MMC_PROFILE_BD_R_RRM 0x0042
+#define MMC_PROFILE_BD_RE 0x0043
+#define MMC_PROFILE_HDDVD_ROM 0x0050
+#define MMC_PROFILE_HDDVD_R 0x0051
+#define MMC_PROFILE_HDDVD_RAM 0x0052
+#define MMC_PROFILE_HDDVD_RW 0x0053
+#define MMC_PROFILE_HDDVD_R_DL 0x0058
+#define MMC_PROFILE_HDDVD_RW_DL 0x005A
+#define MMC_PROFILE_INVALID 0xFFFF
+
+#endif
diff --git a/qemu/include/block/snapshot.h b/qemu/include/block/snapshot.h
new file mode 100644
index 000000000..770d9bbc8
--- /dev/null
+++ b/qemu/include/block/snapshot.h
@@ -0,0 +1,78 @@
+/*
+ * Block layer snapshot related functions
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef SNAPSHOT_H
+#define SNAPSHOT_H
+
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "qemu/option.h"
+
+
+#define SNAPSHOT_OPT_BASE "snapshot."
+#define SNAPSHOT_OPT_ID "snapshot.id"
+#define SNAPSHOT_OPT_NAME "snapshot.name"
+
+extern QemuOptsList internal_snapshot_opts;
+
+typedef struct QEMUSnapshotInfo {
+ char id_str[128]; /* unique snapshot id */
+ /* the following fields are informative. They are not needed for
+ the consistency of the snapshot */
+ char name[256]; /* user chosen name */
+ uint64_t vm_state_size; /* VM state info size */
+ uint32_t date_sec; /* UTC date of the snapshot */
+ uint32_t date_nsec;
+ uint64_t vm_clock_nsec; /* VM clock relative to boot */
+} QEMUSnapshotInfo;
+
+int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
+ const char *name);
+bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
+ const char *id,
+ const char *name,
+ QEMUSnapshotInfo *sn_info,
+ Error **errp);
+int bdrv_can_snapshot(BlockDriverState *bs);
+int bdrv_snapshot_create(BlockDriverState *bs,
+ QEMUSnapshotInfo *sn_info);
+int bdrv_snapshot_goto(BlockDriverState *bs,
+ const char *snapshot_id);
+int bdrv_snapshot_delete(BlockDriverState *bs,
+ const char *snapshot_id,
+ const char *name,
+ Error **errp);
+void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
+ const char *id_or_name,
+ Error **errp);
+int bdrv_snapshot_list(BlockDriverState *bs,
+ QEMUSnapshotInfo **psn_info);
+int bdrv_snapshot_load_tmp(BlockDriverState *bs,
+ const char *snapshot_id,
+ const char *name,
+ Error **errp);
+int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
+ const char *id_or_name,
+ Error **errp);
+#endif
diff --git a/qemu/include/block/thread-pool.h b/qemu/include/block/thread-pool.h
new file mode 100644
index 000000000..42eb5e842
--- /dev/null
+++ b/qemu/include/block/thread-pool.h
@@ -0,0 +1,37 @@
+/*
+ * QEMU block layer thread pool
+ *
+ * Copyright IBM, Corp. 2008
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ * Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#ifndef QEMU_THREAD_POOL_H
+#define QEMU_THREAD_POOL_H 1
+
+#include "block/block.h"
+
+typedef int ThreadPoolFunc(void *opaque);
+
+typedef struct ThreadPool ThreadPool;
+
+ThreadPool *thread_pool_new(struct AioContext *ctx);
+void thread_pool_free(ThreadPool *pool);
+
+BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool,
+ ThreadPoolFunc *func, void *arg,
+ BlockCompletionFunc *cb, void *opaque);
+int coroutine_fn thread_pool_submit_co(ThreadPool *pool,
+ ThreadPoolFunc *func, void *arg);
+void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg);
+
+#endif
diff --git a/qemu/include/block/throttle-groups.h b/qemu/include/block/throttle-groups.h
new file mode 100644
index 000000000..fab113f6d
--- /dev/null
+++ b/qemu/include/block/throttle-groups.h
@@ -0,0 +1,46 @@
+/*
+ * QEMU block throttling group infrastructure
+ *
+ * Copyright (C) Nodalink, EURL. 2014
+ * Copyright (C) Igalia, S.L. 2015
+ *
+ * Authors:
+ * BenoƮt Canet <benoit.canet@nodalink.com>
+ * Alberto Garcia <berto@igalia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef THROTTLE_GROUPS_H
+#define THROTTLE_GROUPS_H
+
+#include "qemu/throttle.h"
+#include "block/block_int.h"
+
+const char *throttle_group_get_name(BlockDriverState *bs);
+
+void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg);
+void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg);
+
+void throttle_group_register_bs(BlockDriverState *bs, const char *groupname);
+void throttle_group_unregister_bs(BlockDriverState *bs);
+
+void coroutine_fn throttle_group_co_io_limits_intercept(BlockDriverState *bs,
+ unsigned int bytes,
+ bool is_write);
+
+void throttle_group_lock(BlockDriverState *bs);
+void throttle_group_unlock(BlockDriverState *bs);
+
+#endif
diff --git a/qemu/include/block/write-threshold.h b/qemu/include/block/write-threshold.h
new file mode 100644
index 000000000..f1b899cd5
--- /dev/null
+++ b/qemu/include/block/write-threshold.h
@@ -0,0 +1,64 @@
+/*
+ * QEMU System Emulator block write threshold notification
+ *
+ * Copyright Red Hat, Inc. 2014
+ *
+ * Authors:
+ * Francesco Romani <fromani@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+#ifndef BLOCK_WRITE_THRESHOLD_H
+#define BLOCK_WRITE_THRESHOLD_H
+
+#include <stdint.h>
+
+#include "qemu/typedefs.h"
+#include "qemu-common.h"
+
+/*
+ * bdrv_write_threshold_set:
+ *
+ * Set the write threshold for block devices, in bytes.
+ * Notify when a write exceeds the threshold, meaning the device
+ * is becoming full, so it can be transparently resized.
+ * To be used with thin-provisioned block devices.
+ *
+ * Use threshold_bytes == 0 to disable.
+ */
+void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes);
+
+/*
+ * bdrv_write_threshold_get
+ *
+ * Get the configured write threshold, in bytes.
+ * Zero means no threshold configured.
+ */
+uint64_t bdrv_write_threshold_get(const BlockDriverState *bs);
+
+/*
+ * bdrv_write_threshold_is_set
+ *
+ * Tell if a write threshold is set for a given BDS.
+ */
+bool bdrv_write_threshold_is_set(const BlockDriverState *bs);
+
+/*
+ * bdrv_write_threshold_exceeded
+ *
+ * Return the extent of a write request that exceeded the threshold,
+ * or zero if the request is below the threshold.
+ * Return zero also if the threshold was not set.
+ *
+ * NOTE: here we assume the following holds for each request this code
+ * deals with:
+ *
+ * assert((req->offset + req->bytes) <= UINT64_MAX)
+ *
+ * Please not there is *not* an actual C assert().
+ */
+uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs,
+ const BdrvTrackedRequest *req);
+
+#endif